yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .cookies import load_cookies
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     EntryNotInPlaylist,
  61     error_to_compat_str,
  62     ExistingVideoReached,
  63     expand_path,
  64     ExtractorError,
  65     float_or_none,
  66     format_bytes,
  67     format_field,
  68     STR_FORMAT_RE,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     HEADRequest,
  72     int_or_none,
  73     iri_to_uri,
  74     ISO3166Utils,
  75     LazyList,
  76     locked_file,
  77     make_dir,
  78     make_HTTPS_handler,
  79     MaxDownloadsReached,
  80     network_exceptions,
  81     orderedSet,
  82     OUTTMPL_TYPES,
  83     PagedList,
  84     parse_filesize,
  85     PerRequestProxyHandler,
  86     platform_name,
  87     PostProcessingError,
  88     preferredencoding,
  89     prepend_extension,
  90     process_communicate_or_kill,
  91     register_socks_protocols,
  92     RejectedVideoReached,
  93     render_table,
  94     replace_extension,
  95     SameFileError,
  96     sanitize_filename,
  97     sanitize_path,
  98     sanitize_url,
  99     sanitized_Request,
 100     std_headers,
 101     str_or_none,
 102     strftime_or_none,
 103     subtitles_filename,
 104     ThrottledDownload,
 105     to_high_limit_path,
 106     traverse_obj,
 107     try_get,
 108     UnavailableVideoError,
 109     url_basename,
 110     version_tuple,
 111     write_json_file,
 112     write_string,
 113     YoutubeDLCookieProcessor,
 114     YoutubeDLHandler,
 115     YoutubeDLRedirectHandler,
 116 )
 117 from .cache import Cache
 118 from .extractor import (
 119     gen_extractor_classes,
 120     get_info_extractor,
 121     _LAZY_LOADER,
 122     _PLUGIN_CLASSES
 123 )
 124 from .extractor.openload import PhantomJSwrapper
 125 from .downloader import (
 126     get_suitable_downloader,
 127     shorten_protocol_name
 128 )
 129 from .downloader.rtmp import rtmpdump_version
 130 from .postprocessor import (
 131     get_postprocessor,
 132     FFmpegFixupDurationPP,
 133     FFmpegFixupM3u8PP,
 134     FFmpegFixupM4aPP,
 135     FFmpegFixupStretchedPP,
 136     FFmpegFixupTimestampPP,
 137     FFmpegMergerPP,
 138     FFmpegPostProcessor,
 139     MoveFilesAfterDownloadPP,
 140 )
 141 from .version import __version__
 142
 143 if compat_os_name == 'nt':
 144     import ctypes
 145
 146
 147 class YoutubeDL(object):
 148     """YoutubeDL class.
 149
 150     YoutubeDL objects are the ones responsible of downloading the
 151     actual video file and writing it to disk if the user has requested
 152     it, among some other tasks. In most cases there should be one per
 153     program. As, given a video URL, the downloader doesn't know how to
 154     extract all the needed information, task that InfoExtractors do, it
 155     has to pass the URL to one of them.
 156
 157     For this, YoutubeDL objects have a method that allows
 158     InfoExtractors to be registered in a given order. When it is passed
 159     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 160     finds that reports being able to handle it. The InfoExtractor extracts
 161     all the information about the video or videos the URL refers to, and
 162     YoutubeDL process the extracted information, possibly using a File
 163     Downloader to download the video.
 164
 165     YoutubeDL objects accept a lot of parameters. In order not to saturate
 166     the object constructor with arguments, it receives a dictionary of
 167     options instead. These options are available through the params
 168     attribute for the InfoExtractors to use. The YoutubeDL also
 169     registers itself as the downloader in charge for the InfoExtractors
 170     that are added to it, so this is a "mutual registration".
 171
 172     Available options:
 173
 174     username:          Username for authentication purposes.
 175     password:          Password for authentication purposes.
 176     videopassword:     Password for accessing a video.
 177     ap_mso:            Adobe Pass multiple-system operator identifier.
 178     ap_username:       Multiple-system operator account username.
 179     ap_password:       Multiple-system operator account password.
 180     usenetrc:          Use netrc for authentication instead.
 181     verbose:           Print additional info to stdout.
 182     quiet:             Do not print messages to stdout.
 183     no_warnings:       Do not print out anything for warnings.
 184     forceprint:        A list of templates to force print
 185     forceurl:          Force printing final URL. (Deprecated)
 186     forcetitle:        Force printing title. (Deprecated)
 187     forceid:           Force printing ID. (Deprecated)
 188     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 189     forcedescription:  Force printing description. (Deprecated)
 190     forcefilename:     Force printing final filename. (Deprecated)
 191     forceduration:     Force printing duration. (Deprecated)
 192     forcejson:         Force printing info_dict as JSON.
 193     dump_single_json:  Force printing the info_dict of the whole playlist
 194                        (or video) as a single JSON line.
 195     force_write_download_archive: Force writing download archive regardless
 196                        of 'skip_download' or 'simulate'.
 197     simulate:          Do not download the video files.
 198     format:            Video format code. see "FORMAT SELECTION" for more details.
 199     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 200     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 201                        extracting metadata even if the video is not actually
 202                        available for download (experimental)
 203     format_sort:       How to sort the video formats. see "Sorting Formats"
 204                        for more details.
 205     format_sort_force: Force the given format_sort. see "Sorting Formats"
 206                        for more details.
 207     allow_multiple_video_streams:   Allow multiple video streams to be merged
 208                        into a single file
 209     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 210                        into a single file
 211     check_formats      Whether to test if the formats are downloadable.
 212                        Can be True (check all), False (check none)
 213                        or None (check only if requested by extractor)
 214     paths:             Dictionary of output paths. The allowed keys are 'home'
 215                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 216     outtmpl:           Dictionary of templates for output names. Allowed keys
 217                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 218                        A string a also accepted for backward compatibility
 219     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 220     restrictfilenames: Do not allow "&" and spaces in file names
 221     trim_file_name:    Limit length of filename (extension excluded)
 222     windowsfilenames:  Force the filenames to be windows compatible
 223     ignoreerrors:      Do not stop on download errors
 224                        (Default True when running yt-dlp,
 225                        but False when directly accessing YoutubeDL class)
 226     skip_playlist_after_errors: Number of allowed failures until the rest of
 227                        the playlist is skipped
 228     force_generic_extractor: Force downloader to use the generic extractor
 229     overwrites:        Overwrite all video and metadata files if True,
 230                        overwrite only non-video files if None
 231                        and don't overwrite any file if False
 232     playliststart:     Playlist item to start at.
 233     playlistend:       Playlist item to end at.
 234     playlist_items:    Specific indices of playlist to download.
 235     playlistreverse:   Download playlist items in reverse order.
 236     playlistrandom:    Download playlist items in random order.
 237     matchtitle:        Download only matching titles.
 238     rejecttitle:       Reject downloads for matching titles.
 239     logger:            Log messages to a logging.Logger instance.
 240     logtostderr:       Log messages to stderr instead of stdout.
 241     writedescription:  Write the video description to a .description file
 242     writeinfojson:     Write the video description to a .info.json file
 243     clean_infojson:    Remove private fields from the infojson
 244     writecomments:     Extract video comments. This will not be written to disk
 245                        unless writeinfojson is also given
 246     writeannotations:  Write the video annotations to a .annotations.xml file
 247     writethumbnail:    Write the thumbnail image to a file
 248     allow_playlist_files: Whether to write playlists' description, infojson etc
 249                        also to disk when using the 'write*' options
 250     write_all_thumbnails:  Write all thumbnail formats to files
 251     writelink:         Write an internet shortcut file, depending on the
 252                        current platform (.url/.webloc/.desktop)
 253     writeurllink:      Write a Windows internet shortcut file (.url)
 254     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 255     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 256     writesubtitles:    Write the video subtitles to a file
 257     writeautomaticsub: Write the automatically generated subtitles to a file
 258     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 259                        Downloads all the subtitles of the video
 260                        (requires writesubtitles or writeautomaticsub)
 261     listsubtitles:     Lists all available subtitles for the video
 262     subtitlesformat:   The format code for subtitles
 263     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 264                        The list may contain "all" to refer to all the available
 265                        subtitles. The language can be prefixed with a "-" to
 266                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 267     keepvideo:         Keep the video file after post-processing
 268     daterange:         A DateRange object, download only if the upload_date is in the range.
 269     skip_download:     Skip the actual download of the video file
 270     cachedir:          Location of the cache files in the filesystem.
 271                        False to disable filesystem cache.
 272     noplaylist:        Download single video instead of a playlist if in doubt.
 273     age_limit:         An integer representing the user's age in years.
 274                        Unsuitable videos for the given age are skipped.
 275     min_views:         An integer representing the minimum view count the video
 276                        must have in order to not be skipped.
 277                        Videos without view count information are always
 278                        downloaded. None for no limit.
 279     max_views:         An integer representing the maximum view count.
 280                        Videos that are more popular than that are not
 281                        downloaded.
 282                        Videos without view count information are always
 283                        downloaded. None for no limit.
 284     download_archive:  File name of a file where all downloads are recorded.
 285                        Videos already present in the file are not downloaded
 286                        again.
 287     break_on_existing: Stop the download process after attempting to download a
 288                        file that is in the archive.
 289     break_on_reject:   Stop the download process when encountering a video that
 290                        has been filtered out.
 291     cookiefile:        File name where cookies should be read from and dumped to
 292     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 293                        name/path from where cookies are loaded.
 294                        Eg: ('chrome', ) or (vivaldi, 'default')
 295     nocheckcertificate:Do not verify SSL certificates
 296     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 297                        At the moment, this is only supported by YouTube.
 298     proxy:             URL of the proxy server to use
 299     geo_verification_proxy:  URL of the proxy to use for IP address verification
 300                        on geo-restricted sites.
 301     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 302     bidi_workaround:   Work around buggy terminals without bidirectional text
 303                        support, using fridibi
 304     debug_printtraffic:Print out sent and received HTTP traffic
 305     include_ads:       Download ads as well
 306     default_search:    Prepend this string if an input url is not valid.
 307                        'auto' for elaborate guessing
 308     encoding:          Use this encoding instead of the system-specified.
 309     extract_flat:      Do not resolve URLs, return the immediate result.
 310                        Pass in 'in_playlist' to only show this behavior for
 311                        playlist items.
 312     postprocessors:    A list of dictionaries, each with an entry
 313                        * key:  The name of the postprocessor. See
 314                                yt_dlp/postprocessor/__init__.py for a list.
 315                        * when: When to run the postprocessor. Can be one of
 316                                pre_process|before_dl|post_process|after_move.
 317                                Assumed to be 'post_process' if not given
 318     post_hooks:        A list of functions that get called as the final step
 319                        for each video file, after all postprocessors have been
 320                        called. The filename will be passed as the only argument.
 321     progress_hooks:    A list of functions that get called on download
 322                        progress, with a dictionary with the entries
 323                        * status: One of "downloading", "error", or "finished".
 324                                  Check this first and ignore unknown values.
 325                        * info_dict: The extracted info_dict
 326
 327                        If status is one of "downloading", or "finished", the
 328                        following properties may also be present:
 329                        * filename: The final filename (always present)
 330                        * tmpfilename: The filename we're currently writing to
 331                        * downloaded_bytes: Bytes on disk
 332                        * total_bytes: Size of the whole file, None if unknown
 333                        * total_bytes_estimate: Guess of the eventual file size,
 334                                                None if unavailable.
 335                        * elapsed: The number of seconds since download started.
 336                        * eta: The estimated time in seconds, None if unknown
 337                        * speed: The download speed in bytes/second, None if
 338                                 unknown
 339                        * fragment_index: The counter of the currently
 340                                          downloaded video fragment.
 341                        * fragment_count: The number of fragments (= individual
 342                                          files that will be merged)
 343
 344                        Progress hooks are guaranteed to be called at least once
 345                        (with status "finished") if the download is successful.
 346     merge_output_format: Extension to use when merging formats.
 347     final_ext:         Expected final extension; used to detect when the file was
 348                        already downloaded and converted. "merge_output_format" is
 349                        replaced by this extension when given
 350     fixup:             Automatically correct known faults of the file.
 351                        One of:
 352                        - "never": do nothing
 353                        - "warn": only emit a warning
 354                        - "detect_or_warn": check whether we can do anything
 355                                            about it, warn otherwise (default)
 356     source_address:    Client-side IP address to bind to.
 357     call_home:         Boolean, true iff we are allowed to contact the
 358                        yt-dlp servers for debugging. (BROKEN)
 359     sleep_interval_requests: Number of seconds to sleep between requests
 360                        during extraction
 361     sleep_interval:    Number of seconds to sleep before each download when
 362                        used alone or a lower bound of a range for randomized
 363                        sleep before each download (minimum possible number
 364                        of seconds to sleep) when used along with
 365                        max_sleep_interval.
 366     max_sleep_interval:Upper bound of a range for randomized sleep before each
 367                        download (maximum possible number of seconds to sleep).
 368                        Must only be used along with sleep_interval.
 369                        Actual sleep time will be a random float from range
 370                        [sleep_interval; max_sleep_interval].
 371     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 372     listformats:       Print an overview of available video formats and exit.
 373     list_thumbnails:   Print a table of all thumbnails and exit.
 374     match_filter:      A function that gets called with the info_dict of
 375                        every video.
 376                        If it returns a message, the video is ignored.
 377                        If it returns None, the video is downloaded.
 378                        match_filter_func in utils.py is one example for this.
 379     no_color:          Do not emit color codes in output.
 380     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 381                        HTTP header
 382     geo_bypass_country:
 383                        Two-letter ISO 3166-2 country code that will be used for
 384                        explicit geographic restriction bypassing via faking
 385                        X-Forwarded-For HTTP header
 386     geo_bypass_ip_block:
 387                        IP range in CIDR notation that will be used similarly to
 388                        geo_bypass_country
 389
 390     The following options determine which downloader is picked:
 391     external_downloader: A dictionary of protocol keys and the executable of the
 392                        external downloader to use for it. The allowed protocols
 393                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 394                        Set the value to 'native' to use the native downloader
 395     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 396                        or {'m3u8': 'ffmpeg'} instead.
 397                        Use the native HLS downloader instead of ffmpeg/avconv
 398                        if True, otherwise use ffmpeg/avconv if False, otherwise
 399                        use downloader suggested by extractor if None.
 400     compat_opts:       Compatibility options. See "Differences in default behavior".
 401                        The following options do not work when used through the API:
 402                        filename, abort-on-error, multistreams, no-live-chat,
 403                        no-playlist-metafiles. Refer __init__.py for their implementation
 404
 405     The following parameters are not used by YoutubeDL itself, they are used by
 406     the downloader (see yt_dlp/downloader/common.py):
 407     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 408     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 409     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 410
 411     The following options are used by the post processors:
 412     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 413                        otherwise prefer ffmpeg. (avconv support is deprecated)
 414     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 415                        to the binary or its containing directory.
 416     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 417                         and a list of additional command-line arguments for the
 418                         postprocessor/executable. The dict can also have "PP+EXE" keys
 419                         which are used when the given exe is used by the given PP.
 420                         Use 'default' as the name for arguments to passed to all PP
 421
 422     The following options are used by the extractors:
 423     extractor_retries: Number of times to retry for known errors
 424     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 425     hls_split_discontinuity: Split HLS playlists to different formats at
 426                        discontinuities such as ad breaks (default: False)
 427     extractor_args:    A dictionary of arguments to be passed to the extractors.
 428                        See "EXTRACTOR ARGUMENTS" for details.
 429                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 430     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 431                        If True (default), DASH manifests and related
 432                        data will be downloaded and processed by extractor.
 433                        You can reduce network I/O by disabling it if you don't
 434                        care about DASH. (only for youtube)
 435     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 436                        If True (default), HLS manifests and related
 437                        data will be downloaded and processed by extractor.
 438                        You can reduce network I/O by disabling it if you don't
 439                        care about HLS. (only for youtube)
 440     """
 441
 442     _NUMERIC_FIELDS = set((
 443         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 444         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 445         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 446         'average_rating', 'comment_count', 'age_limit',
 447         'start_time', 'end_time',
 448         'chapter_number', 'season_number', 'episode_number',
 449         'track_number', 'disc_number', 'release_year',
 450         'playlist_index',
 451     ))
 452
 453     params = None
 454     _ies = []
 455     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 456     _printed_messages = set()
 457     _first_webpage_request = True
 458     _download_retcode = None
 459     _num_downloads = None
 460     _playlist_level = 0
 461     _playlist_urls = set()
 462     _screen_file = None
 463
 464     def __init__(self, params=None, auto_init=True):
 465         """Create a FileDownloader object with the given options."""
 466         if params is None:
 467             params = {}
 468         self._ies = []
 469         self._ies_instances = {}
 470         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 471         self._printed_messages = set()
 472         self._first_webpage_request = True
 473         self._post_hooks = []
 474         self._progress_hooks = []
 475         self._download_retcode = 0
 476         self._num_downloads = 0
 477         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 478         self._err_file = sys.stderr
 479         self.params = {
 480             # Default parameters
 481             'nocheckcertificate': False,
 482         }
 483         self.params.update(params)
 484         self.cache = Cache(self)
 485
 486         if sys.version_info < (3, 6):
 487             self.report_warning(
 488                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 489
 490         def check_deprecated(param, option, suggestion):
 491             if self.params.get(param) is not None:
 492                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 493                 return True
 494             return False
 495
 496         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 497             if self.params.get('geo_verification_proxy') is None:
 498                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 499
 500         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 501         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 502         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 503
 504         for msg in self.params.get('warnings', []):
 505             self.report_warning(msg)
 506
 507         if self.params.get('final_ext'):
 508             if self.params.get('merge_output_format'):
 509                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 510             self.params['merge_output_format'] = self.params['final_ext']
 511
 512         if 'overwrites' in self.params and self.params['overwrites'] is None:
 513             del self.params['overwrites']
 514
 515         if params.get('bidi_workaround', False):
 516             try:
 517                 import pty
 518                 master, slave = pty.openpty()
 519                 width = compat_get_terminal_size().columns
 520                 if width is None:
 521                     width_args = []
 522                 else:
 523                     width_args = ['-w', str(width)]
 524                 sp_kwargs = dict(
 525                     stdin=subprocess.PIPE,
 526                     stdout=slave,
 527                     stderr=self._err_file)
 528                 try:
 529                     self._output_process = subprocess.Popen(
 530                         ['bidiv'] + width_args, **sp_kwargs
 531                     )
 532                 except OSError:
 533                     self._output_process = subprocess.Popen(
 534                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 535                 self._output_channel = os.fdopen(master, 'rb')
 536             except OSError as ose:
 537                 if ose.errno == errno.ENOENT:
 538                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 539                 else:
 540                     raise
 541
 542         if (sys.platform != 'win32'
 543                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 544                 and not params.get('restrictfilenames', False)):
 545             # Unicode filesystem API will throw errors (#1474, #13027)
 546             self.report_warning(
 547                 'Assuming --restrict-filenames since file system encoding '
 548                 'cannot encode all characters. '
 549                 'Set the LC_ALL environment variable to fix this.')
 550             self.params['restrictfilenames'] = True
 551
 552         self.outtmpl_dict = self.parse_outtmpl()
 553
 554         # Creating format selector here allows us to catch syntax errors before the extraction
 555         self.format_selector = (
 556             None if self.params.get('format') is None
 557             else self.build_format_selector(self.params['format']))
 558
 559         self._setup_opener()
 560
 561         """Preload the archive, if any is specified"""
 562         def preload_download_archive(fn):
 563             if fn is None:
 564                 return False
 565             self.write_debug('Loading archive file %r\n' % fn)
 566             try:
 567                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 568                     for line in archive_file:
 569                         self.archive.add(line.strip())
 570             except IOError as ioe:
 571                 if ioe.errno != errno.ENOENT:
 572                     raise
 573                 return False
 574             return True
 575
 576         self.archive = set()
 577         preload_download_archive(self.params.get('download_archive'))
 578
 579         if auto_init:
 580             self.print_debug_header()
 581             self.add_default_info_extractors()
 582
 583         for pp_def_raw in self.params.get('postprocessors', []):
 584             pp_def = dict(pp_def_raw)
 585             when = pp_def.pop('when', 'post_process')
 586             pp_class = get_postprocessor(pp_def.pop('key'))
 587             pp = pp_class(self, **compat_kwargs(pp_def))
 588             self.add_post_processor(pp, when=when)
 589
 590         for ph in self.params.get('post_hooks', []):
 591             self.add_post_hook(ph)
 592
 593         for ph in self.params.get('progress_hooks', []):
 594             self.add_progress_hook(ph)
 595
 596         register_socks_protocols()
 597
 598     def warn_if_short_id(self, argv):
 599         # short YouTube ID starting with dash?
 600         idxs = [
 601             i for i, a in enumerate(argv)
 602             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 603         if idxs:
 604             correct_argv = (
 605                 ['yt-dlp']
 606                 + [a for i, a in enumerate(argv) if i not in idxs]
 607                 + ['--'] + [argv[i] for i in idxs]
 608             )
 609             self.report_warning(
 610                 'Long argument string detected. '
 611                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 612                 args_to_str(correct_argv))
 613
 614     def add_info_extractor(self, ie):
 615         """Add an InfoExtractor object to the end of the list."""
 616         self._ies.append(ie)
 617         if not isinstance(ie, type):
 618             self._ies_instances[ie.ie_key()] = ie
 619             ie.set_downloader(self)
 620
 621     def get_info_extractor(self, ie_key):
 622         """
 623         Get an instance of an IE with name ie_key, it will try to get one from
 624         the _ies list, if there's no instance it will create a new one and add
 625         it to the extractor list.
 626         """
 627         ie = self._ies_instances.get(ie_key)
 628         if ie is None:
 629             ie = get_info_extractor(ie_key)()
 630             self.add_info_extractor(ie)
 631         return ie
 632
 633     def add_default_info_extractors(self):
 634         """
 635         Add the InfoExtractors returned by gen_extractors to the end of the list
 636         """
 637         for ie in gen_extractor_classes():
 638             self.add_info_extractor(ie)
 639
 640     def add_post_processor(self, pp, when='post_process'):
 641         """Add a PostProcessor object to the end of the chain."""
 642         self._pps[when].append(pp)
 643         pp.set_downloader(self)
 644
 645     def add_post_hook(self, ph):
 646         """Add the post hook"""
 647         self._post_hooks.append(ph)
 648
 649     def add_progress_hook(self, ph):
 650         """Add the progress hook (currently only for the file downloader)"""
 651         self._progress_hooks.append(ph)
 652
 653     def _bidi_workaround(self, message):
 654         if not hasattr(self, '_output_channel'):
 655             return message
 656
 657         assert hasattr(self, '_output_process')
 658         assert isinstance(message, compat_str)
 659         line_count = message.count('\n') + 1
 660         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 661         self._output_process.stdin.flush()
 662         res = ''.join(self._output_channel.readline().decode('utf-8')
 663                       for _ in range(line_count))
 664         return res[:-len('\n')]
 665
 666     def _write_string(self, message, out=None, only_once=False):
 667         if only_once:
 668             if message in self._printed_messages:
 669                 return
 670             self._printed_messages.add(message)
 671         write_string(message, out=out, encoding=self.params.get('encoding'))
 672
 673     def to_stdout(self, message, skip_eol=False, quiet=False):
 674         """Print message to stdout"""
 675         if self.params.get('logger'):
 676             self.params['logger'].debug(message)
 677         elif not quiet or self.params.get('verbose'):
 678             self._write_string(
 679                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 680                 self._err_file if quiet else self._screen_file)
 681
 682     def to_stderr(self, message, only_once=False):
 683         """Print message to stderr"""
 684         assert isinstance(message, compat_str)
 685         if self.params.get('logger'):
 686             self.params['logger'].error(message)
 687         else:
 688             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 689
 690     def to_console_title(self, message):
 691         if not self.params.get('consoletitle', False):
 692             return
 693         if compat_os_name == 'nt':
 694             if ctypes.windll.kernel32.GetConsoleWindow():
 695                 # c_wchar_p() might not be necessary if `message` is
 696                 # already of type unicode()
 697                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 698         elif 'TERM' in os.environ:
 699             self._write_string('\033]0;%s\007' % message, self._screen_file)
 700
 701     def save_console_title(self):
 702         if not self.params.get('consoletitle', False):
 703             return
 704         if self.params.get('simulate', False):
 705             return
 706         if compat_os_name != 'nt' and 'TERM' in os.environ:
 707             # Save the title on stack
 708             self._write_string('\033[22;0t', self._screen_file)
 709
 710     def restore_console_title(self):
 711         if not self.params.get('consoletitle', False):
 712             return
 713         if self.params.get('simulate', False):
 714             return
 715         if compat_os_name != 'nt' and 'TERM' in os.environ:
 716             # Restore the title from stack
 717             self._write_string('\033[23;0t', self._screen_file)
 718
 719     def __enter__(self):
 720         self.save_console_title()
 721         return self
 722
 723     def __exit__(self, *args):
 724         self.restore_console_title()
 725
 726         if self.params.get('cookiefile') is not None:
 727             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 728
 729     def trouble(self, message=None, tb=None):
 730         """Determine action to take when a download problem appears.
 731
 732         Depending on if the downloader has been configured to ignore
 733         download errors or not, this method may throw an exception or
 734         not when errors are found, after printing the message.
 735
 736         tb, if given, is additional traceback information.
 737         """
 738         if message is not None:
 739             self.to_stderr(message)
 740         if self.params.get('verbose'):
 741             if tb is None:
 742                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 743                     tb = ''
 744                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 745                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 746                     tb += encode_compat_str(traceback.format_exc())
 747                 else:
 748                     tb_data = traceback.format_list(traceback.extract_stack())
 749                     tb = ''.join(tb_data)
 750             if tb:
 751                 self.to_stderr(tb)
 752         if not self.params.get('ignoreerrors', False):
 753             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 754                 exc_info = sys.exc_info()[1].exc_info
 755             else:
 756                 exc_info = sys.exc_info()
 757             raise DownloadError(message, exc_info)
 758         self._download_retcode = 1
 759
 760     def to_screen(self, message, skip_eol=False):
 761         """Print message to stdout if not in quiet mode"""
 762         self.to_stdout(
 763             message, skip_eol, quiet=self.params.get('quiet', False))
 764
 765     def report_warning(self, message, only_once=False):
 766         '''
 767         Print the message to stderr, it will be prefixed with 'WARNING:'
 768         If stderr is a tty file the 'WARNING:' will be colored
 769         '''
 770         if self.params.get('logger') is not None:
 771             self.params['logger'].warning(message)
 772         else:
 773             if self.params.get('no_warnings'):
 774                 return
 775             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 776                 _msg_header = '\033[0;33mWARNING:\033[0m'
 777             else:
 778                 _msg_header = 'WARNING:'
 779             warning_message = '%s %s' % (_msg_header, message)
 780             self.to_stderr(warning_message, only_once)
 781
 782     def report_error(self, message, tb=None):
 783         '''
 784         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 785         in red if stderr is a tty file.
 786         '''
 787         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 788             _msg_header = '\033[0;31mERROR:\033[0m'
 789         else:
 790             _msg_header = 'ERROR:'
 791         error_message = '%s %s' % (_msg_header, message)
 792         self.trouble(error_message, tb)
 793
 794     def write_debug(self, message, only_once=False):
 795         '''Log debug message or Print message to stderr'''
 796         if not self.params.get('verbose', False):
 797             return
 798         message = '[debug] %s' % message
 799         if self.params.get('logger'):
 800             self.params['logger'].debug(message)
 801         else:
 802             self.to_stderr(message, only_once)
 803
 804     def report_file_already_downloaded(self, file_name):
 805         """Report file has already been fully downloaded."""
 806         try:
 807             self.to_screen('[download] %s has already been downloaded' % file_name)
 808         except UnicodeEncodeError:
 809             self.to_screen('[download] The file has already been downloaded')
 810
 811     def report_file_delete(self, file_name):
 812         """Report that existing file will be deleted."""
 813         try:
 814             self.to_screen('Deleting existing file %s' % file_name)
 815         except UnicodeEncodeError:
 816             self.to_screen('Deleting existing file')
 817
 818     def parse_outtmpl(self):
 819         outtmpl_dict = self.params.get('outtmpl', {})
 820         if not isinstance(outtmpl_dict, dict):
 821             outtmpl_dict = {'default': outtmpl_dict}
 822         outtmpl_dict.update({
 823             k: v for k, v in DEFAULT_OUTTMPL.items()
 824             if not outtmpl_dict.get(k)})
 825         for key, val in outtmpl_dict.items():
 826             if isinstance(val, bytes):
 827                 self.report_warning(
 828                     'Parameter outtmpl is bytes, but should be a unicode string. '
 829                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 830         return outtmpl_dict
 831
 832     def get_output_path(self, dir_type='', filename=None):
 833         paths = self.params.get('paths', {})
 834         assert isinstance(paths, dict)
 835         path = os.path.join(
 836             expand_path(paths.get('home', '').strip()),
 837             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 838             filename or '')
 839
 840         # Temporary fix for #4787
 841         # 'Treat' all problem characters by passing filename through preferredencoding
 842         # to workaround encoding issues with subprocess on python2 @ Windows
 843         if sys.version_info < (3, 0) and sys.platform == 'win32':
 844             path = encodeFilename(path, True).decode(preferredencoding())
 845         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 846
 847     @staticmethod
 848     def validate_outtmpl(tmpl):
 849         ''' @return None or Exception object '''
 850         try:
 851             re.sub(
 852                 STR_FORMAT_RE.format(''),
 853                 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
 854                 tmpl
 855             ) % collections.defaultdict(int)
 856             return None
 857         except ValueError as err:
 858             return err
 859
 860     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 861         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 862         info_dict = dict(info_dict)
 863         na = self.params.get('outtmpl_na_placeholder', 'NA')
 864
 865         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 866             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 867             if info_dict.get('duration', None) is not None
 868             else None)
 869         info_dict['epoch'] = int(time.time())
 870         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 871         if info_dict.get('resolution') is None:
 872             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 873
 874         # For fields playlist_index and autonumber convert all occurrences
 875         # of %(field)s to %(field)0Nd for backward compatibility
 876         field_size_compat_map = {
 877             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 878             'autonumber': self.params.get('autonumber_size') or 5,
 879         }
 880
 881         TMPL_DICT = {}
 882         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
 883         MATH_FUNCTIONS = {
 884             '+': float.__add__,
 885             '-': float.__sub__,
 886         }
 887         # Field is of the form key1.key2...
 888         # where keys (except first) can be string, int or slice
 889         FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 890         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 891         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 892         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 893             (?P<negate>-)?
 894             (?P<fields>{field})
 895             (?P<maths>(?:{math_op}{math_field})*)
 896             (?:>(?P<strf_format>.+?))?
 897             (?:\|(?P<default>.*?))?
 898             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 899
 900         get_key = lambda k: traverse_obj(
 901             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 902
 903         def get_value(mdict):
 904             # Object traversal
 905             value = get_key(mdict['fields'])
 906             # Negative
 907             if mdict['negate']:
 908                 value = float_or_none(value)
 909                 if value is not None:
 910                     value *= -1
 911             # Do maths
 912             offset_key = mdict['maths']
 913             if offset_key:
 914                 value = float_or_none(value)
 915                 operator = None
 916                 while offset_key:
 917                     item = re.match(
 918                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 919                         offset_key).group(0)
 920                     offset_key = offset_key[len(item):]
 921                     if operator is None:
 922                         operator = MATH_FUNCTIONS[item]
 923                         continue
 924                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 925                     offset = float_or_none(item)
 926                     if offset is None:
 927                         offset = float_or_none(get_key(item))
 928                     try:
 929                         value = operator(value, multiplier * offset)
 930                     except (TypeError, ZeroDivisionError):
 931                         return None
 932                     operator = None
 933             # Datetime formatting
 934             if mdict['strf_format']:
 935                 value = strftime_or_none(value, mdict['strf_format'])
 936
 937             return value
 938
 939         def create_key(outer_mobj):
 940             if not outer_mobj.group('has_key'):
 941                 return '%{}'.format(outer_mobj.group(0))
 942
 943             key = outer_mobj.group('key')
 944             fmt = outer_mobj.group('format')
 945             mobj = re.match(INTERNAL_FORMAT_RE, key)
 946             if mobj is None:
 947                 value, default, mobj = None, na, {'fields': ''}
 948             else:
 949                 mobj = mobj.groupdict()
 950                 default = mobj['default'] if mobj['default'] is not None else na
 951                 value = get_value(mobj)
 952
 953             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 954                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 955
 956             value = default if value is None else value
 957
 958             if fmt == 'c':
 959                 value = compat_str(value)
 960                 if value is None:
 961                     value, fmt = default, 's'
 962                 else:
 963                     value = value[0]
 964             elif fmt[-1] not in 'rs':  # numeric
 965                 value = float_or_none(value)
 966                 if value is None:
 967                     value, fmt = default, 's'
 968             if sanitize:
 969                 if fmt[-1] == 'r':
 970                     # If value is an object, sanitize might convert it to a string
 971                     # So we convert it to repr first
 972                     value, fmt = repr(value), '%ss' % fmt[:-1]
 973                 if fmt[-1] in 'csr':
 974                     value = sanitize(mobj['fields'].split('.')[-1], value)
 975             key += '\0%s' % fmt
 976             TMPL_DICT[key] = value
 977             return '%({key}){fmt}'.format(key=key, fmt=fmt)
 978
 979         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
 980
 981     def _prepare_filename(self, info_dict, tmpl_type='default'):
 982         try:
 983             sanitize = lambda k, v: sanitize_filename(
 984                 compat_str(v),
 985                 restricted=self.params.get('restrictfilenames'),
 986                 is_id=(k == 'id' or k.endswith('_id')))
 987             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 988             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 989
 990             # expand_path translates '%%' into '%' and '$$' into '$'
 991             # correspondingly that is not what we want since we need to keep
 992             # '%%' intact for template dict substitution step. Working around
 993             # with boundary-alike separator hack.
 994             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 995             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 996
 997             # outtmpl should be expand_path'ed before template dict substitution
 998             # because meta fields may contain env variables we don't want to
 999             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1000             # title "Hello $PATH", we don't want `$PATH` to be expanded.
1001             filename = expand_path(outtmpl).replace(sep, '') % template_dict
1002
1003             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1004             if force_ext is not None:
1005                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1006
1007             # https://github.com/blackjack4494/youtube-dlc/issues/85
1008             trim_file_name = self.params.get('trim_file_name', False)
1009             if trim_file_name:
1010                 fn_groups = filename.rsplit('.')
1011                 ext = fn_groups[-1]
1012                 sub_ext = ''
1013                 if len(fn_groups) > 2:
1014                     sub_ext = fn_groups[-2]
1015                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1016
1017             return filename
1018         except ValueError as err:
1019             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1020             return None
1021
1022     def prepare_filename(self, info_dict, dir_type='', warn=False):
1023         """Generate the output filename."""
1024
1025         filename = self._prepare_filename(info_dict, dir_type or 'default')
1026
1027         if warn:
1028             if not self.params.get('paths'):
1029                 pass
1030             elif filename == '-':
1031                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1032             elif os.path.isabs(filename):
1033                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1034             self.__prepare_filename_warned = True
1035         if filename == '-' or not filename:
1036             return filename
1037
1038         return self.get_output_path(dir_type, filename)
1039
1040     def _match_entry(self, info_dict, incomplete=False, silent=False):
1041         """ Returns None if the file should be downloaded """
1042
1043         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1044
1045         def check_filter():
1046             if 'title' in info_dict:
1047                 # This can happen when we're just evaluating the playlist
1048                 title = info_dict['title']
1049                 matchtitle = self.params.get('matchtitle', False)
1050                 if matchtitle:
1051                     if not re.search(matchtitle, title, re.IGNORECASE):
1052                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1053                 rejecttitle = self.params.get('rejecttitle', False)
1054                 if rejecttitle:
1055                     if re.search(rejecttitle, title, re.IGNORECASE):
1056                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1057             date = info_dict.get('upload_date')
1058             if date is not None:
1059                 dateRange = self.params.get('daterange', DateRange())
1060                 if date not in dateRange:
1061                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1062             view_count = info_dict.get('view_count')
1063             if view_count is not None:
1064                 min_views = self.params.get('min_views')
1065                 if min_views is not None and view_count < min_views:
1066                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1067                 max_views = self.params.get('max_views')
1068                 if max_views is not None and view_count > max_views:
1069                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1070             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1071                 return 'Skipping "%s" because it is age restricted' % video_title
1072
1073             if not incomplete:
1074                 match_filter = self.params.get('match_filter')
1075                 if match_filter is not None:
1076                     ret = match_filter(info_dict)
1077                     if ret is not None:
1078                         return ret
1079             return None
1080
1081         if self.in_download_archive(info_dict):
1082             reason = '%s has already been recorded in the archive' % video_title
1083             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1084         else:
1085             reason = check_filter()
1086             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1087         if reason is not None:
1088             if not silent:
1089                 self.to_screen('[download] ' + reason)
1090             if self.params.get(break_opt, False):
1091                 raise break_err()
1092         return reason
1093
1094     @staticmethod
1095     def add_extra_info(info_dict, extra_info):
1096         '''Set the keys from extra_info in info dict if they are missing'''
1097         for key, value in extra_info.items():
1098             info_dict.setdefault(key, value)
1099
1100     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1101                      process=True, force_generic_extractor=False):
1102         """
1103         Return a list with a dictionary for each video extracted.
1104
1105         Arguments:
1106         url -- URL to extract
1107
1108         Keyword arguments:
1109         download -- whether to download videos during extraction
1110         ie_key -- extractor key hint
1111         extra_info -- dictionary containing the extra values to add to each result
1112         process -- whether to resolve all unresolved references (URLs, playlist items),
1113             must be True for download to work.
1114         force_generic_extractor -- force using the generic extractor
1115         """
1116
1117         if not ie_key and force_generic_extractor:
1118             ie_key = 'Generic'
1119
1120         if ie_key:
1121             ies = [self.get_info_extractor(ie_key)]
1122         else:
1123             ies = self._ies
1124
1125         for ie in ies:
1126             if not ie.suitable(url):
1127                 continue
1128
1129             ie_key = ie.ie_key()
1130             ie = self.get_info_extractor(ie_key)
1131             if not ie.working():
1132                 self.report_warning('The program functionality for this site has been marked as broken, '
1133                                     'and will probably not work.')
1134
1135             try:
1136                 temp_id = str_or_none(
1137                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1138                     else ie._match_id(url))
1139             except (AssertionError, IndexError, AttributeError):
1140                 temp_id = None
1141             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1142                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1143                                ie_key, temp_id))
1144                 break
1145             return self.__extract_info(url, ie, download, extra_info, process)
1146         else:
1147             self.report_error('no suitable InfoExtractor for URL %s' % url)
1148
1149     def __handle_extraction_exceptions(func, handle_all_errors=True):
1150         def wrapper(self, *args, **kwargs):
1151             try:
1152                 return func(self, *args, **kwargs)
1153             except GeoRestrictedError as e:
1154                 msg = e.msg
1155                 if e.countries:
1156                     msg += '\nThis video is available in %s.' % ', '.join(
1157                         map(ISO3166Utils.short2full, e.countries))
1158                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1159                 self.report_error(msg)
1160             except ExtractorError as e:  # An error we somewhat expected
1161                 self.report_error(compat_str(e), e.format_traceback())
1162             except ThrottledDownload:
1163                 self.to_stderr('\r')
1164                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1165                 return wrapper(self, *args, **kwargs)
1166             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1167                 raise
1168             except Exception as e:
1169                 if handle_all_errors and self.params.get('ignoreerrors', False):
1170                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1171                 else:
1172                     raise
1173         return wrapper
1174
1175     @__handle_extraction_exceptions
1176     def __extract_info(self, url, ie, download, extra_info, process):
1177         ie_result = ie.extract(url)
1178         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1179             return
1180         if isinstance(ie_result, list):
1181             # Backwards compatibility: old IE result format
1182             ie_result = {
1183                 '_type': 'compat_list',
1184                 'entries': ie_result,
1185             }
1186         if extra_info.get('original_url'):
1187             ie_result.setdefault('original_url', extra_info['original_url'])
1188         self.add_default_extra_info(ie_result, ie, url)
1189         if process:
1190             return self.process_ie_result(ie_result, download, extra_info)
1191         else:
1192             return ie_result
1193
1194     def add_default_extra_info(self, ie_result, ie, url):
1195         if url is not None:
1196             self.add_extra_info(ie_result, {
1197                 'webpage_url': url,
1198                 'original_url': url,
1199                 'webpage_url_basename': url_basename(url),
1200             })
1201         if ie is not None:
1202             self.add_extra_info(ie_result, {
1203                 'extractor': ie.IE_NAME,
1204                 'extractor_key': ie.ie_key(),
1205             })
1206
1207     def process_ie_result(self, ie_result, download=True, extra_info={}):
1208         """
1209         Take the result of the ie(may be modified) and resolve all unresolved
1210         references (URLs, playlist items).
1211
1212         It will also download the videos if 'download'.
1213         Returns the resolved ie_result.
1214         """
1215         result_type = ie_result.get('_type', 'video')
1216
1217         if result_type in ('url', 'url_transparent'):
1218             ie_result['url'] = sanitize_url(ie_result['url'])
1219             if ie_result.get('original_url'):
1220                 extra_info.setdefault('original_url', ie_result['original_url'])
1221
1222             extract_flat = self.params.get('extract_flat', False)
1223             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1224                     or extract_flat is True):
1225                 info_copy = ie_result.copy()
1226                 self.add_extra_info(info_copy, extra_info)
1227                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1228                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1229                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1230                 return ie_result
1231
1232         if result_type == 'video':
1233             self.add_extra_info(ie_result, extra_info)
1234             ie_result = self.process_video_result(ie_result, download=download)
1235             additional_urls = (ie_result or {}).get('additional_urls')
1236             if additional_urls:
1237                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1238                 if isinstance(additional_urls, compat_str):
1239                     additional_urls = [additional_urls]
1240                 self.to_screen(
1241                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1242                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1243                 ie_result['additional_entries'] = [
1244                     self.extract_info(
1245                         url, download, extra_info,
1246                         force_generic_extractor=self.params.get('force_generic_extractor'))
1247                     for url in additional_urls
1248                 ]
1249             return ie_result
1250         elif result_type == 'url':
1251             # We have to add extra_info to the results because it may be
1252             # contained in a playlist
1253             return self.extract_info(
1254                 ie_result['url'], download,
1255                 ie_key=ie_result.get('ie_key'),
1256                 extra_info=extra_info)
1257         elif result_type == 'url_transparent':
1258             # Use the information from the embedding page
1259             info = self.extract_info(
1260                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1261                 extra_info=extra_info, download=False, process=False)
1262
1263             # extract_info may return None when ignoreerrors is enabled and
1264             # extraction failed with an error, don't crash and return early
1265             # in this case
1266             if not info:
1267                 return info
1268
1269             force_properties = dict(
1270                 (k, v) for k, v in ie_result.items() if v is not None)
1271             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1272                 if f in force_properties:
1273                     del force_properties[f]
1274             new_result = info.copy()
1275             new_result.update(force_properties)
1276
1277             # Extracted info may not be a video result (i.e.
1278             # info.get('_type', 'video') != video) but rather an url or
1279             # url_transparent. In such cases outer metadata (from ie_result)
1280             # should be propagated to inner one (info). For this to happen
1281             # _type of info should be overridden with url_transparent. This
1282             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1283             if new_result.get('_type') == 'url':
1284                 new_result['_type'] = 'url_transparent'
1285
1286             return self.process_ie_result(
1287                 new_result, download=download, extra_info=extra_info)
1288         elif result_type in ('playlist', 'multi_video'):
1289             # Protect from infinite recursion due to recursively nested playlists
1290             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1291             webpage_url = ie_result['webpage_url']
1292             if webpage_url in self._playlist_urls:
1293                 self.to_screen(
1294                     '[download] Skipping already downloaded playlist: %s'
1295                     % ie_result.get('title') or ie_result.get('id'))
1296                 return
1297
1298             self._playlist_level += 1
1299             self._playlist_urls.add(webpage_url)
1300             self._sanitize_thumbnails(ie_result)
1301             try:
1302                 return self.__process_playlist(ie_result, download)
1303             finally:
1304                 self._playlist_level -= 1
1305                 if not self._playlist_level:
1306                     self._playlist_urls.clear()
1307         elif result_type == 'compat_list':
1308             self.report_warning(
1309                 'Extractor %s returned a compat_list result. '
1310                 'It needs to be updated.' % ie_result.get('extractor'))
1311
1312             def _fixup(r):
1313                 self.add_extra_info(
1314                     r,
1315                     {
1316                         'extractor': ie_result['extractor'],
1317                         'webpage_url': ie_result['webpage_url'],
1318                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1319                         'extractor_key': ie_result['extractor_key'],
1320                     }
1321                 )
1322                 return r
1323             ie_result['entries'] = [
1324                 self.process_ie_result(_fixup(r), download, extra_info)
1325                 for r in ie_result['entries']
1326             ]
1327             return ie_result
1328         else:
1329             raise Exception('Invalid result type: %s' % result_type)
1330
1331     def _ensure_dir_exists(self, path):
1332         return make_dir(path, self.report_error)
1333
1334     def __process_playlist(self, ie_result, download):
1335         # We process each entry in the playlist
1336         playlist = ie_result.get('title') or ie_result.get('id')
1337         self.to_screen('[download] Downloading playlist: %s' % playlist)
1338
1339         if 'entries' not in ie_result:
1340             raise EntryNotInPlaylist()
1341         incomplete_entries = bool(ie_result.get('requested_entries'))
1342         if incomplete_entries:
1343             def fill_missing_entries(entries, indexes):
1344                 ret = [None] * max(*indexes)
1345                 for i, entry in zip(indexes, entries):
1346                     ret[i - 1] = entry
1347                 return ret
1348             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1349
1350         playlist_results = []
1351
1352         playliststart = self.params.get('playliststart', 1)
1353         playlistend = self.params.get('playlistend')
1354         # For backwards compatibility, interpret -1 as whole list
1355         if playlistend == -1:
1356             playlistend = None
1357
1358         playlistitems_str = self.params.get('playlist_items')
1359         playlistitems = None
1360         if playlistitems_str is not None:
1361             def iter_playlistitems(format):
1362                 for string_segment in format.split(','):
1363                     if '-' in string_segment:
1364                         start, end = string_segment.split('-')
1365                         for item in range(int(start), int(end) + 1):
1366                             yield int(item)
1367                     else:
1368                         yield int(string_segment)
1369             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1370
1371         ie_entries = ie_result['entries']
1372         msg = (
1373             'Downloading %d videos' if not isinstance(ie_entries, list)
1374             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1375         if not isinstance(ie_entries, (list, PagedList)):
1376             ie_entries = LazyList(ie_entries)
1377
1378         def get_entry(i):
1379             return YoutubeDL.__handle_extraction_exceptions(
1380                 lambda self, i: ie_entries[i - 1],
1381                 False
1382             )(self, i)
1383
1384         entries = []
1385         for i in playlistitems or itertools.count(playliststart):
1386             if playlistitems is None and playlistend is not None and playlistend < i:
1387                 break
1388             entry = None
1389             try:
1390                 entry = get_entry(i)
1391                 if entry is None:
1392                     raise EntryNotInPlaylist()
1393             except (IndexError, EntryNotInPlaylist):
1394                 if incomplete_entries:
1395                     raise EntryNotInPlaylist()
1396                 elif not playlistitems:
1397                     break
1398             entries.append(entry)
1399             try:
1400                 if entry is not None:
1401                     self._match_entry(entry, incomplete=True, silent=True)
1402             except (ExistingVideoReached, RejectedVideoReached):
1403                 break
1404         ie_result['entries'] = entries
1405
1406         # Save playlist_index before re-ordering
1407         entries = [
1408             ((playlistitems[i - 1] if playlistitems else i), entry)
1409             for i, entry in enumerate(entries, 1)
1410             if entry is not None]
1411         n_entries = len(entries)
1412
1413         if not playlistitems and (playliststart or playlistend):
1414             playlistitems = list(range(playliststart, playliststart + n_entries))
1415         ie_result['requested_entries'] = playlistitems
1416
1417         if self.params.get('allow_playlist_files', True):
1418             ie_copy = {
1419                 'playlist': playlist,
1420                 'playlist_id': ie_result.get('id'),
1421                 'playlist_title': ie_result.get('title'),
1422                 'playlist_uploader': ie_result.get('uploader'),
1423                 'playlist_uploader_id': ie_result.get('uploader_id'),
1424                 'playlist_index': 0,
1425             }
1426             ie_copy.update(dict(ie_result))
1427
1428             if self.params.get('writeinfojson', False):
1429                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1430                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1431                     return
1432                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1433                     self.to_screen('[info] Playlist metadata is already present')
1434                 else:
1435                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1436                     try:
1437                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1438                     except (OSError, IOError):
1439                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1440
1441             # TODO: This should be passed to ThumbnailsConvertor if necessary
1442             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1443
1444             if self.params.get('writedescription', False):
1445                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1446                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1447                     return
1448                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1449                     self.to_screen('[info] Playlist description is already present')
1450                 elif ie_result.get('description') is None:
1451                     self.report_warning('There\'s no playlist description to write.')
1452                 else:
1453                     try:
1454                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1455                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1456                             descfile.write(ie_result['description'])
1457                     except (OSError, IOError):
1458                         self.report_error('Cannot write playlist description file ' + descfn)
1459                         return
1460
1461         if self.params.get('playlistreverse', False):
1462             entries = entries[::-1]
1463         if self.params.get('playlistrandom', False):
1464             random.shuffle(entries)
1465
1466         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1467
1468         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1469         failures = 0
1470         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1471         for i, entry_tuple in enumerate(entries, 1):
1472             playlist_index, entry = entry_tuple
1473             if 'playlist_index' in self.params.get('compat_options', []):
1474                 playlist_index = playlistitems[i - 1] if playlistitems else i
1475             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1476             # This __x_forwarded_for_ip thing is a bit ugly but requires
1477             # minimal changes
1478             if x_forwarded_for:
1479                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1480             extra = {
1481                 'n_entries': n_entries,
1482                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1483                 'playlist_index': playlist_index,
1484                 'playlist_autonumber': i,
1485                 'playlist': playlist,
1486                 'playlist_id': ie_result.get('id'),
1487                 'playlist_title': ie_result.get('title'),
1488                 'playlist_uploader': ie_result.get('uploader'),
1489                 'playlist_uploader_id': ie_result.get('uploader_id'),
1490                 'extractor': ie_result['extractor'],
1491                 'webpage_url': ie_result['webpage_url'],
1492                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1493                 'extractor_key': ie_result['extractor_key'],
1494             }
1495
1496             if self._match_entry(entry, incomplete=True) is not None:
1497                 continue
1498
1499             entry_result = self.__process_iterable_entry(entry, download, extra)
1500             if not entry_result:
1501                 failures += 1
1502             if failures >= max_failures:
1503                 self.report_error(
1504                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1505                 break
1506             # TODO: skip failed (empty) entries?
1507             playlist_results.append(entry_result)
1508         ie_result['entries'] = playlist_results
1509         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1510         return ie_result
1511
1512     @__handle_extraction_exceptions
1513     def __process_iterable_entry(self, entry, download, extra_info):
1514         return self.process_ie_result(
1515             entry, download=download, extra_info=extra_info)
1516
1517     def _build_format_filter(self, filter_spec):
1518         " Returns a function to filter the formats according to the filter_spec "
1519
1520         OPERATORS = {
1521             '<': operator.lt,
1522             '<=': operator.le,
1523             '>': operator.gt,
1524             '>=': operator.ge,
1525             '=': operator.eq,
1526             '!=': operator.ne,
1527         }
1528         operator_rex = re.compile(r'''(?x)\s*
1529             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1530             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1531             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1532             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1533         m = operator_rex.fullmatch(filter_spec)
1534         if m:
1535             try:
1536                 comparison_value = int(m.group('value'))
1537             except ValueError:
1538                 comparison_value = parse_filesize(m.group('value'))
1539                 if comparison_value is None:
1540                     comparison_value = parse_filesize(m.group('value') + 'B')
1541                 if comparison_value is None:
1542                     raise ValueError(
1543                         'Invalid value %r in format specification %r' % (
1544                             m.group('value'), filter_spec))
1545             op = OPERATORS[m.group('op')]
1546
1547         if not m:
1548             STR_OPERATORS = {
1549                 '=': operator.eq,
1550                 '^=': lambda attr, value: attr.startswith(value),
1551                 '$=': lambda attr, value: attr.endswith(value),
1552                 '*=': lambda attr, value: value in attr,
1553             }
1554             str_operator_rex = re.compile(r'''(?x)\s*
1555                 (?P<key>[a-zA-Z0-9._-]+)\s*
1556                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1557                 (?P<value>[a-zA-Z0-9._-]+)\s*
1558                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1559             m = str_operator_rex.fullmatch(filter_spec)
1560             if m:
1561                 comparison_value = m.group('value')
1562                 str_op = STR_OPERATORS[m.group('op')]
1563                 if m.group('negation'):
1564                     op = lambda attr, value: not str_op(attr, value)
1565                 else:
1566                     op = str_op
1567
1568         if not m:
1569             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1570
1571         def _filter(f):
1572             actual_value = f.get(m.group('key'))
1573             if actual_value is None:
1574                 return m.group('none_inclusive')
1575             return op(actual_value, comparison_value)
1576         return _filter
1577
1578     def _default_format_spec(self, info_dict, download=True):
1579
1580         def can_merge():
1581             merger = FFmpegMergerPP(self)
1582             return merger.available and merger.can_merge()
1583
1584         prefer_best = (
1585             not self.params.get('simulate', False)
1586             and download
1587             and (
1588                 not can_merge()
1589                 or info_dict.get('is_live', False)
1590                 or self.outtmpl_dict['default'] == '-'))
1591         compat = (
1592             prefer_best
1593             or self.params.get('allow_multiple_audio_streams', False)
1594             or 'format-spec' in self.params.get('compat_opts', []))
1595
1596         return (
1597             'best/bestvideo+bestaudio' if prefer_best
1598             else 'bestvideo*+bestaudio/best' if not compat
1599             else 'bestvideo+bestaudio/best')
1600
1601     def build_format_selector(self, format_spec):
1602         def syntax_error(note, start):
1603             message = (
1604                 'Invalid format specification: '
1605                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1606             return SyntaxError(message)
1607
1608         PICKFIRST = 'PICKFIRST'
1609         MERGE = 'MERGE'
1610         SINGLE = 'SINGLE'
1611         GROUP = 'GROUP'
1612         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1613
1614         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1615                                   'video': self.params.get('allow_multiple_video_streams', False)}
1616
1617         check_formats = self.params.get('check_formats')
1618
1619         def _parse_filter(tokens):
1620             filter_parts = []
1621             for type, string, start, _, _ in tokens:
1622                 if type == tokenize.OP and string == ']':
1623                     return ''.join(filter_parts)
1624                 else:
1625                     filter_parts.append(string)
1626
1627         def _remove_unused_ops(tokens):
1628             # Remove operators that we don't use and join them with the surrounding strings
1629             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1630             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1631             last_string, last_start, last_end, last_line = None, None, None, None
1632             for type, string, start, end, line in tokens:
1633                 if type == tokenize.OP and string == '[':
1634                     if last_string:
1635                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1636                         last_string = None
1637                     yield type, string, start, end, line
1638                     # everything inside brackets will be handled by _parse_filter
1639                     for type, string, start, end, line in tokens:
1640                         yield type, string, start, end, line
1641                         if type == tokenize.OP and string == ']':
1642                             break
1643                 elif type == tokenize.OP and string in ALLOWED_OPS:
1644                     if last_string:
1645                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1646                         last_string = None
1647                     yield type, string, start, end, line
1648                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1649                     if not last_string:
1650                         last_string = string
1651                         last_start = start
1652                         last_end = end
1653                     else:
1654                         last_string += string
1655             if last_string:
1656                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1657
1658         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1659             selectors = []
1660             current_selector = None
1661             for type, string, start, _, _ in tokens:
1662                 # ENCODING is only defined in python 3.x
1663                 if type == getattr(tokenize, 'ENCODING', None):
1664                     continue
1665                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1666                     current_selector = FormatSelector(SINGLE, string, [])
1667                 elif type == tokenize.OP:
1668                     if string == ')':
1669                         if not inside_group:
1670                             # ')' will be handled by the parentheses group
1671                             tokens.restore_last_token()
1672                         break
1673                     elif inside_merge and string in ['/', ',']:
1674                         tokens.restore_last_token()
1675                         break
1676                     elif inside_choice and string == ',':
1677                         tokens.restore_last_token()
1678                         break
1679                     elif string == ',':
1680                         if not current_selector:
1681                             raise syntax_error('"," must follow a format selector', start)
1682                         selectors.append(current_selector)
1683                         current_selector = None
1684                     elif string == '/':
1685                         if not current_selector:
1686                             raise syntax_error('"/" must follow a format selector', start)
1687                         first_choice = current_selector
1688                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1689                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1690                     elif string == '[':
1691                         if not current_selector:
1692                             current_selector = FormatSelector(SINGLE, 'best', [])
1693                         format_filter = _parse_filter(tokens)
1694                         current_selector.filters.append(format_filter)
1695                     elif string == '(':
1696                         if current_selector:
1697                             raise syntax_error('Unexpected "("', start)
1698                         group = _parse_format_selection(tokens, inside_group=True)
1699                         current_selector = FormatSelector(GROUP, group, [])
1700                     elif string == '+':
1701                         if not current_selector:
1702                             raise syntax_error('Unexpected "+"', start)
1703                         selector_1 = current_selector
1704                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1705                         if not selector_2:
1706                             raise syntax_error('Expected a selector', start)
1707                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1708                     else:
1709                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1710                 elif type == tokenize.ENDMARKER:
1711                     break
1712             if current_selector:
1713                 selectors.append(current_selector)
1714             return selectors
1715
1716         def _merge(formats_pair):
1717             format_1, format_2 = formats_pair
1718
1719             formats_info = []
1720             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1721             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1722
1723             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1724                 get_no_more = {'video': False, 'audio': False}
1725                 for (i, fmt_info) in enumerate(formats_info):
1726                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1727                         formats_info.pop(i)
1728                         continue
1729                     for aud_vid in ['audio', 'video']:
1730                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1731                             if get_no_more[aud_vid]:
1732                                 formats_info.pop(i)
1733                             get_no_more[aud_vid] = True
1734
1735             if len(formats_info) == 1:
1736                 return formats_info[0]
1737
1738             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1739             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1740
1741             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1742             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1743
1744             output_ext = self.params.get('merge_output_format')
1745             if not output_ext:
1746                 if the_only_video:
1747                     output_ext = the_only_video['ext']
1748                 elif the_only_audio and not video_fmts:
1749                     output_ext = the_only_audio['ext']
1750                 else:
1751                     output_ext = 'mkv'
1752
1753             new_dict = {
1754                 'requested_formats': formats_info,
1755                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1756                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1757                 'ext': output_ext,
1758             }
1759
1760             if the_only_video:
1761                 new_dict.update({
1762                     'width': the_only_video.get('width'),
1763                     'height': the_only_video.get('height'),
1764                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1765                     'fps': the_only_video.get('fps'),
1766                     'vcodec': the_only_video.get('vcodec'),
1767                     'vbr': the_only_video.get('vbr'),
1768                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1769                 })
1770
1771             if the_only_audio:
1772                 new_dict.update({
1773                     'acodec': the_only_audio.get('acodec'),
1774                     'abr': the_only_audio.get('abr'),
1775                 })
1776
1777             return new_dict
1778
1779         def _check_formats(formats):
1780             if not check_formats:
1781                 yield from formats
1782                 return
1783             for f in formats:
1784                 self.to_screen('[info] Testing format %s' % f['format_id'])
1785                 temp_file = tempfile.NamedTemporaryFile(
1786                     suffix='.tmp', delete=False,
1787                     dir=self.get_output_path('temp') or None)
1788                 temp_file.close()
1789                 try:
1790                     success, _ = self.dl(temp_file.name, f, test=True)
1791                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1792                     success = False
1793                 finally:
1794                     if os.path.exists(temp_file.name):
1795                         try:
1796                             os.remove(temp_file.name)
1797                         except OSError:
1798                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1799                 if success:
1800                     yield f
1801                 else:
1802                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1803
1804         def _build_selector_function(selector):
1805             if isinstance(selector, list):  # ,
1806                 fs = [_build_selector_function(s) for s in selector]
1807
1808                 def selector_function(ctx):
1809                     for f in fs:
1810                         yield from f(ctx)
1811                 return selector_function
1812
1813             elif selector.type == GROUP:  # ()
1814                 selector_function = _build_selector_function(selector.selector)
1815
1816             elif selector.type == PICKFIRST:  # /
1817                 fs = [_build_selector_function(s) for s in selector.selector]
1818
1819                 def selector_function(ctx):
1820                     for f in fs:
1821                         picked_formats = list(f(ctx))
1822                         if picked_formats:
1823                             return picked_formats
1824                     return []
1825
1826             elif selector.type == MERGE:  # +
1827                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1828
1829                 def selector_function(ctx):
1830                     for pair in itertools.product(
1831                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1832                         yield _merge(pair)
1833
1834             elif selector.type == SINGLE:  # atom
1835                 format_spec = selector.selector or 'best'
1836
1837                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1838                 if format_spec == 'all':
1839                     def selector_function(ctx):
1840                         yield from _check_formats(ctx['formats'])
1841                 elif format_spec == 'mergeall':
1842                     def selector_function(ctx):
1843                         formats = list(_check_formats(ctx['formats']))
1844                         if not formats:
1845                             return
1846                         merged_format = formats[-1]
1847                         for f in formats[-2::-1]:
1848                             merged_format = _merge((merged_format, f))
1849                         yield merged_format
1850
1851                 else:
1852                     format_fallback, format_reverse, format_idx = False, True, 1
1853                     mobj = re.match(
1854                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1855                         format_spec)
1856                     if mobj is not None:
1857                         format_idx = int_or_none(mobj.group('n'), default=1)
1858                         format_reverse = mobj.group('bw')[0] == 'b'
1859                         format_type = (mobj.group('type') or [None])[0]
1860                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1861                         format_modified = mobj.group('mod') is not None
1862
1863                         format_fallback = not format_type and not format_modified  # for b, w
1864                         _filter_f = (
1865                             (lambda f: f.get('%scodec' % format_type) != 'none')
1866                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1867                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1868                             if format_type  # bv, ba, wv, wa
1869                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1870                             if not format_modified  # b, w
1871                             else lambda f: True)  # b*, w*
1872                         filter_f = lambda f: _filter_f(f) and (
1873                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1874                     else:
1875                         filter_f = ((lambda f: f.get('ext') == format_spec)
1876                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1877                                     else (lambda f: f.get('format_id') == format_spec))  # id
1878
1879                     def selector_function(ctx):
1880                         formats = list(ctx['formats'])
1881                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1882                         if format_fallback and ctx['incomplete_formats'] and not matches:
1883                             # for extractors with incomplete formats (audio only (soundcloud)
1884                             # or video only (imgur)) best/worst will fallback to
1885                             # best/worst {video,audio}-only format
1886                             matches = formats
1887                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1888                         try:
1889                             yield matches[format_idx - 1]
1890                         except IndexError:
1891                             return
1892
1893             filters = [self._build_format_filter(f) for f in selector.filters]
1894
1895             def final_selector(ctx):
1896                 ctx_copy = copy.deepcopy(ctx)
1897                 for _filter in filters:
1898                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1899                 return selector_function(ctx_copy)
1900             return final_selector
1901
1902         stream = io.BytesIO(format_spec.encode('utf-8'))
1903         try:
1904             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1905         except tokenize.TokenError:
1906             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1907
1908         class TokenIterator(object):
1909             def __init__(self, tokens):
1910                 self.tokens = tokens
1911                 self.counter = 0
1912
1913             def __iter__(self):
1914                 return self
1915
1916             def __next__(self):
1917                 if self.counter >= len(self.tokens):
1918                     raise StopIteration()
1919                 value = self.tokens[self.counter]
1920                 self.counter += 1
1921                 return value
1922
1923             next = __next__
1924
1925             def restore_last_token(self):
1926                 self.counter -= 1
1927
1928         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1929         return _build_selector_function(parsed_selector)
1930
1931     def _calc_headers(self, info_dict):
1932         res = std_headers.copy()
1933
1934         add_headers = info_dict.get('http_headers')
1935         if add_headers:
1936             res.update(add_headers)
1937
1938         cookies = self._calc_cookies(info_dict)
1939         if cookies:
1940             res['Cookie'] = cookies
1941
1942         if 'X-Forwarded-For' not in res:
1943             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1944             if x_forwarded_for_ip:
1945                 res['X-Forwarded-For'] = x_forwarded_for_ip
1946
1947         return res
1948
1949     def _calc_cookies(self, info_dict):
1950         pr = sanitized_Request(info_dict['url'])
1951         self.cookiejar.add_cookie_header(pr)
1952         return pr.get_header('Cookie')
1953
1954     def _sanitize_thumbnails(self, info_dict):
1955         thumbnails = info_dict.get('thumbnails')
1956         if thumbnails is None:
1957             thumbnail = info_dict.get('thumbnail')
1958             if thumbnail:
1959                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1960         if thumbnails:
1961             thumbnails.sort(key=lambda t: (
1962                 t.get('preference') if t.get('preference') is not None else -1,
1963                 t.get('width') if t.get('width') is not None else -1,
1964                 t.get('height') if t.get('height') is not None else -1,
1965                 t.get('id') if t.get('id') is not None else '',
1966                 t.get('url')))
1967
1968             def thumbnail_tester():
1969                 if self.params.get('check_formats'):
1970                     test_all = True
1971                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
1972                 else:
1973                     test_all = False
1974                     to_screen = self.write_debug
1975
1976                 def test_thumbnail(t):
1977                     if not test_all and not t.get('_test_url'):
1978                         return True
1979                     to_screen('Testing thumbnail %s' % t['id'])
1980                     try:
1981                         self.urlopen(HEADRequest(t['url']))
1982                     except network_exceptions as err:
1983                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
1984                             t['id'], t['url'], error_to_compat_str(err)))
1985                         return False
1986                     return True
1987
1988                 return test_thumbnail
1989
1990             for i, t in enumerate(thumbnails):
1991                 if t.get('id') is None:
1992                     t['id'] = '%d' % i
1993                 if t.get('width') and t.get('height'):
1994                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1995                 t['url'] = sanitize_url(t['url'])
1996
1997             if self.params.get('check_formats') is not False:
1998                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
1999             else:
2000                 info_dict['thumbnails'] = thumbnails
2001
2002     def process_video_result(self, info_dict, download=True):
2003         assert info_dict.get('_type', 'video') == 'video'
2004
2005         if 'id' not in info_dict:
2006             raise ExtractorError('Missing "id" field in extractor result')
2007         if 'title' not in info_dict:
2008             raise ExtractorError('Missing "title" field in extractor result')
2009
2010         def report_force_conversion(field, field_not, conversion):
2011             self.report_warning(
2012                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2013                 % (field, field_not, conversion))
2014
2015         def sanitize_string_field(info, string_field):
2016             field = info.get(string_field)
2017             if field is None or isinstance(field, compat_str):
2018                 return
2019             report_force_conversion(string_field, 'a string', 'string')
2020             info[string_field] = compat_str(field)
2021
2022         def sanitize_numeric_fields(info):
2023             for numeric_field in self._NUMERIC_FIELDS:
2024                 field = info.get(numeric_field)
2025                 if field is None or isinstance(field, compat_numeric_types):
2026                     continue
2027                 report_force_conversion(numeric_field, 'numeric', 'int')
2028                 info[numeric_field] = int_or_none(field)
2029
2030         sanitize_string_field(info_dict, 'id')
2031         sanitize_numeric_fields(info_dict)
2032
2033         if 'playlist' not in info_dict:
2034             # It isn't part of a playlist
2035             info_dict['playlist'] = None
2036             info_dict['playlist_index'] = None
2037
2038         self._sanitize_thumbnails(info_dict)
2039
2040         thumbnail = info_dict.get('thumbnail')
2041         thumbnails = info_dict.get('thumbnails')
2042         if thumbnail:
2043             info_dict['thumbnail'] = sanitize_url(thumbnail)
2044         elif thumbnails:
2045             info_dict['thumbnail'] = thumbnails[-1]['url']
2046
2047         if info_dict.get('display_id') is None and 'id' in info_dict:
2048             info_dict['display_id'] = info_dict['id']
2049
2050         for ts_key, date_key in (
2051                 ('timestamp', 'upload_date'),
2052                 ('release_timestamp', 'release_date'),
2053         ):
2054             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2055                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2056                 # see http://bugs.python.org/issue1646728)
2057                 try:
2058                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2059                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2060                 except (ValueError, OverflowError, OSError):
2061                     pass
2062
2063         live_keys = ('is_live', 'was_live')
2064         live_status = info_dict.get('live_status')
2065         if live_status is None:
2066             for key in live_keys:
2067                 if info_dict.get(key) is False:
2068                     continue
2069                 if info_dict.get(key):
2070                     live_status = key
2071                 break
2072             if all(info_dict.get(key) is False for key in live_keys):
2073                 live_status = 'not_live'
2074         if live_status:
2075             info_dict['live_status'] = live_status
2076             for key in live_keys:
2077                 if info_dict.get(key) is None:
2078                     info_dict[key] = (live_status == key)
2079
2080         # Auto generate title fields corresponding to the *_number fields when missing
2081         # in order to always have clean titles. This is very common for TV series.
2082         for field in ('chapter', 'season', 'episode'):
2083             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2084                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2085
2086         for cc_kind in ('subtitles', 'automatic_captions'):
2087             cc = info_dict.get(cc_kind)
2088             if cc:
2089                 for _, subtitle in cc.items():
2090                     for subtitle_format in subtitle:
2091                         if subtitle_format.get('url'):
2092                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2093                         if subtitle_format.get('ext') is None:
2094                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2095
2096         automatic_captions = info_dict.get('automatic_captions')
2097         subtitles = info_dict.get('subtitles')
2098
2099         info_dict['requested_subtitles'] = self.process_subtitles(
2100             info_dict['id'], subtitles, automatic_captions)
2101
2102         # We now pick which formats have to be downloaded
2103         if info_dict.get('formats') is None:
2104             # There's only one format available
2105             formats = [info_dict]
2106         else:
2107             formats = info_dict['formats']
2108
2109         if not formats:
2110             if not self.params.get('ignore_no_formats_error'):
2111                 raise ExtractorError('No video formats found!')
2112             else:
2113                 self.report_warning('No video formats found!')
2114
2115         def is_wellformed(f):
2116             url = f.get('url')
2117             if not url:
2118                 self.report_warning(
2119                     '"url" field is missing or empty - skipping format, '
2120                     'there is an error in extractor')
2121                 return False
2122             if isinstance(url, bytes):
2123                 sanitize_string_field(f, 'url')
2124             return True
2125
2126         # Filter out malformed formats for better extraction robustness
2127         formats = list(filter(is_wellformed, formats))
2128
2129         formats_dict = {}
2130
2131         # We check that all the formats have the format and format_id fields
2132         for i, format in enumerate(formats):
2133             sanitize_string_field(format, 'format_id')
2134             sanitize_numeric_fields(format)
2135             format['url'] = sanitize_url(format['url'])
2136             if not format.get('format_id'):
2137                 format['format_id'] = compat_str(i)
2138             else:
2139                 # Sanitize format_id from characters used in format selector expression
2140                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2141             format_id = format['format_id']
2142             if format_id not in formats_dict:
2143                 formats_dict[format_id] = []
2144             formats_dict[format_id].append(format)
2145
2146         # Make sure all formats have unique format_id
2147         for format_id, ambiguous_formats in formats_dict.items():
2148             if len(ambiguous_formats) > 1:
2149                 for i, format in enumerate(ambiguous_formats):
2150                     format['format_id'] = '%s-%d' % (format_id, i)
2151
2152         for i, format in enumerate(formats):
2153             if format.get('format') is None:
2154                 format['format'] = '{id} - {res}{note}'.format(
2155                     id=format['format_id'],
2156                     res=self.format_resolution(format),
2157                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2158                 )
2159             # Automatically determine file extension if missing
2160             if format.get('ext') is None:
2161                 format['ext'] = determine_ext(format['url']).lower()
2162             # Automatically determine protocol if missing (useful for format
2163             # selection purposes)
2164             if format.get('protocol') is None:
2165                 format['protocol'] = determine_protocol(format)
2166             # Add HTTP headers, so that external programs can use them from the
2167             # json output
2168             full_format_info = info_dict.copy()
2169             full_format_info.update(format)
2170             format['http_headers'] = self._calc_headers(full_format_info)
2171         # Remove private housekeeping stuff
2172         if '__x_forwarded_for_ip' in info_dict:
2173             del info_dict['__x_forwarded_for_ip']
2174
2175         # TODO Central sorting goes here
2176
2177         if formats and formats[0] is not info_dict:
2178             # only set the 'formats' fields if the original info_dict list them
2179             # otherwise we end up with a circular reference, the first (and unique)
2180             # element in the 'formats' field in info_dict is info_dict itself,
2181             # which can't be exported to json
2182             info_dict['formats'] = formats
2183
2184         info_dict, _ = self.pre_process(info_dict)
2185
2186         list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2187         if list_only:
2188             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2189             if self.params.get('list_thumbnails'):
2190                 self.list_thumbnails(info_dict)
2191             if self.params.get('listformats'):
2192                 if not info_dict.get('formats'):
2193                     raise ExtractorError('No video formats found', expected=True)
2194                 self.list_formats(info_dict)
2195             if self.params.get('listsubtitles'):
2196                 if 'automatic_captions' in info_dict:
2197                     self.list_subtitles(
2198                         info_dict['id'], automatic_captions, 'automatic captions')
2199                 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2200             return
2201
2202         format_selector = self.format_selector
2203         if format_selector is None:
2204             req_format = self._default_format_spec(info_dict, download=download)
2205             self.write_debug('Default format spec: %s' % req_format)
2206             format_selector = self.build_format_selector(req_format)
2207
2208         # While in format selection we may need to have an access to the original
2209         # format set in order to calculate some metrics or do some processing.
2210         # For now we need to be able to guess whether original formats provided
2211         # by extractor are incomplete or not (i.e. whether extractor provides only
2212         # video-only or audio-only formats) for proper formats selection for
2213         # extractors with such incomplete formats (see
2214         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2215         # Since formats may be filtered during format selection and may not match
2216         # the original formats the results may be incorrect. Thus original formats
2217         # or pre-calculated metrics should be passed to format selection routines
2218         # as well.
2219         # We will pass a context object containing all necessary additional data
2220         # instead of just formats.
2221         # This fixes incorrect format selection issue (see
2222         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2223         incomplete_formats = (
2224             # All formats are video-only or
2225             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2226             # all formats are audio-only
2227             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2228
2229         ctx = {
2230             'formats': formats,
2231             'incomplete_formats': incomplete_formats,
2232         }
2233
2234         formats_to_download = list(format_selector(ctx))
2235         if not formats_to_download:
2236             if not self.params.get('ignore_no_formats_error'):
2237                 raise ExtractorError('Requested format is not available', expected=True)
2238             else:
2239                 self.report_warning('Requested format is not available')
2240                 # Process what we can, even without any available formats.
2241                 self.process_info(dict(info_dict))
2242         elif download:
2243             self.to_screen(
2244                 '[info] %s: Downloading %d format(s): %s' % (
2245                     info_dict['id'], len(formats_to_download),
2246                     ", ".join([f['format_id'] for f in formats_to_download])))
2247             for fmt in formats_to_download:
2248                 new_info = dict(info_dict)
2249                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2250                 new_info['__original_infodict'] = info_dict
2251                 new_info.update(fmt)
2252                 self.process_info(new_info)
2253         # We update the info dict with the best quality format (backwards compatibility)
2254         if formats_to_download:
2255             info_dict.update(formats_to_download[-1])
2256         return info_dict
2257
2258     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2259         """Select the requested subtitles and their format"""
2260         available_subs = {}
2261         if normal_subtitles and self.params.get('writesubtitles'):
2262             available_subs.update(normal_subtitles)
2263         if automatic_captions and self.params.get('writeautomaticsub'):
2264             for lang, cap_info in automatic_captions.items():
2265                 if lang not in available_subs:
2266                     available_subs[lang] = cap_info
2267
2268         if (not self.params.get('writesubtitles') and not
2269                 self.params.get('writeautomaticsub') or not
2270                 available_subs):
2271             return None
2272
2273         all_sub_langs = available_subs.keys()
2274         if self.params.get('allsubtitles', False):
2275             requested_langs = all_sub_langs
2276         elif self.params.get('subtitleslangs', False):
2277             requested_langs = set()
2278             for lang in self.params.get('subtitleslangs'):
2279                 if lang == 'all':
2280                     requested_langs.update(all_sub_langs)
2281                     continue
2282                 discard = lang[0] == '-'
2283                 if discard:
2284                     lang = lang[1:]
2285                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2286                 if discard:
2287                     for lang in current_langs:
2288                         requested_langs.discard(lang)
2289                 else:
2290                     requested_langs.update(current_langs)
2291         elif 'en' in available_subs:
2292             requested_langs = ['en']
2293         else:
2294             requested_langs = [list(all_sub_langs)[0]]
2295         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2296
2297         formats_query = self.params.get('subtitlesformat', 'best')
2298         formats_preference = formats_query.split('/') if formats_query else []
2299         subs = {}
2300         for lang in requested_langs:
2301             formats = available_subs.get(lang)
2302             if formats is None:
2303                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2304                 continue
2305             for ext in formats_preference:
2306                 if ext == 'best':
2307                     f = formats[-1]
2308                     break
2309                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2310                 if matches:
2311                     f = matches[-1]
2312                     break
2313             else:
2314                 f = formats[-1]
2315                 self.report_warning(
2316                     'No subtitle format found matching "%s" for language %s, '
2317                     'using %s' % (formats_query, lang, f['ext']))
2318             subs[lang] = f
2319         return subs
2320
2321     def __forced_printings(self, info_dict, filename, incomplete):
2322         def print_mandatory(field, actual_field=None):
2323             if actual_field is None:
2324                 actual_field = field
2325             if (self.params.get('force%s' % field, False)
2326                     and (not incomplete or info_dict.get(actual_field) is not None)):
2327                 self.to_stdout(info_dict[actual_field])
2328
2329         def print_optional(field):
2330             if (self.params.get('force%s' % field, False)
2331                     and info_dict.get(field) is not None):
2332                 self.to_stdout(info_dict[field])
2333
2334         info_dict = info_dict.copy()
2335         if filename is not None:
2336             info_dict['filename'] = filename
2337         if info_dict.get('requested_formats') is not None:
2338             # For RTMP URLs, also include the playpath
2339             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2340         elif 'url' in info_dict:
2341             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2342
2343         for tmpl in self.params.get('forceprint', []):
2344             if re.match(r'\w+$', tmpl):
2345                 tmpl = '%({})s'.format(tmpl)
2346             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2347             self.to_stdout(tmpl % info_copy)
2348
2349         print_mandatory('title')
2350         print_mandatory('id')
2351         print_mandatory('url', 'urls')
2352         print_optional('thumbnail')
2353         print_optional('description')
2354         print_optional('filename')
2355         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2356             self.to_stdout(formatSeconds(info_dict['duration']))
2357         print_mandatory('format')
2358
2359         if self.params.get('forcejson', False):
2360             self.post_extract(info_dict)
2361             self.to_stdout(json.dumps(info_dict, default=repr))
2362
2363     def dl(self, name, info, subtitle=False, test=False):
2364
2365         if test:
2366             verbose = self.params.get('verbose')
2367             params = {
2368                 'test': True,
2369                 'quiet': not verbose,
2370                 'verbose': verbose,
2371                 'noprogress': not verbose,
2372                 'nopart': True,
2373                 'skip_unavailable_fragments': False,
2374                 'keep_fragments': False,
2375                 'overwrites': True,
2376                 '_no_ytdl_file': True,
2377             }
2378         else:
2379             params = self.params
2380         fd = get_suitable_downloader(info, params)(self, params)
2381         if not test:
2382             for ph in self._progress_hooks:
2383                 fd.add_progress_hook(ph)
2384             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2385             self.write_debug('Invoking downloader on "%s"' % urls)
2386         new_info = dict(info)
2387         if new_info.get('http_headers') is None:
2388             new_info['http_headers'] = self._calc_headers(new_info)
2389         return fd.download(name, new_info, subtitle)
2390
2391     def process_info(self, info_dict):
2392         """Process a single resolved IE result."""
2393
2394         assert info_dict.get('_type', 'video') == 'video'
2395
2396         info_dict.setdefault('__postprocessors', [])
2397
2398         max_downloads = self.params.get('max_downloads')
2399         if max_downloads is not None:
2400             if self._num_downloads >= int(max_downloads):
2401                 raise MaxDownloadsReached()
2402
2403         # TODO: backward compatibility, to be removed
2404         info_dict['fulltitle'] = info_dict['title']
2405
2406         if 'format' not in info_dict and 'ext' in info_dict:
2407             info_dict['format'] = info_dict['ext']
2408
2409         if self._match_entry(info_dict) is not None:
2410             return
2411
2412         self.post_extract(info_dict)
2413         self._num_downloads += 1
2414
2415         # info_dict['_filename'] needs to be set for backward compatibility
2416         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2417         temp_filename = self.prepare_filename(info_dict, 'temp')
2418         files_to_move = {}
2419
2420         # Forced printings
2421         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2422
2423         if self.params.get('simulate', False):
2424             if self.params.get('force_write_download_archive', False):
2425                 self.record_download_archive(info_dict)
2426
2427             # Do nothing else if in simulate mode
2428             return
2429
2430         if full_filename is None:
2431             return
2432
2433         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2434             return
2435         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2436             return
2437
2438         if self.params.get('writedescription', False):
2439             descfn = self.prepare_filename(info_dict, 'description')
2440             if not self._ensure_dir_exists(encodeFilename(descfn)):
2441                 return
2442             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2443                 self.to_screen('[info] Video description is already present')
2444             elif info_dict.get('description') is None:
2445                 self.report_warning('There\'s no description to write.')
2446             else:
2447                 try:
2448                     self.to_screen('[info] Writing video description to: ' + descfn)
2449                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2450                         descfile.write(info_dict['description'])
2451                 except (OSError, IOError):
2452                     self.report_error('Cannot write description file ' + descfn)
2453                     return
2454
2455         if self.params.get('writeannotations', False):
2456             annofn = self.prepare_filename(info_dict, 'annotation')
2457             if not self._ensure_dir_exists(encodeFilename(annofn)):
2458                 return
2459             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2460                 self.to_screen('[info] Video annotations are already present')
2461             elif not info_dict.get('annotations'):
2462                 self.report_warning('There are no annotations to write.')
2463             else:
2464                 try:
2465                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2466                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2467                         annofile.write(info_dict['annotations'])
2468                 except (KeyError, TypeError):
2469                     self.report_warning('There are no annotations to write.')
2470                 except (OSError, IOError):
2471                     self.report_error('Cannot write annotations file: ' + annofn)
2472                     return
2473
2474         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2475                                        self.params.get('writeautomaticsub')])
2476
2477         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2478             # subtitles download errors are already managed as troubles in relevant IE
2479             # that way it will silently go on when used with unsupporting IE
2480             subtitles = info_dict['requested_subtitles']
2481             # ie = self.get_info_extractor(info_dict['extractor_key'])
2482             for sub_lang, sub_info in subtitles.items():
2483                 sub_format = sub_info['ext']
2484                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2485                 sub_filename_final = subtitles_filename(
2486                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2487                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2488                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2489                     sub_info['filepath'] = sub_filename
2490                     files_to_move[sub_filename] = sub_filename_final
2491                 else:
2492                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2493                     if sub_info.get('data') is not None:
2494                         try:
2495                             # Use newline='' to prevent conversion of newline characters
2496                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2497                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2498                                 subfile.write(sub_info['data'])
2499                             sub_info['filepath'] = sub_filename
2500                             files_to_move[sub_filename] = sub_filename_final
2501                         except (OSError, IOError):
2502                             self.report_error('Cannot write subtitles file ' + sub_filename)
2503                             return
2504                     else:
2505                         try:
2506                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2507                             sub_info['filepath'] = sub_filename
2508                             files_to_move[sub_filename] = sub_filename_final
2509                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2510                             self.report_warning('Unable to download subtitle for "%s": %s' %
2511                                                 (sub_lang, error_to_compat_str(err)))
2512                             continue
2513
2514         if self.params.get('writeinfojson', False):
2515             infofn = self.prepare_filename(info_dict, 'infojson')
2516             if not self._ensure_dir_exists(encodeFilename(infofn)):
2517                 return
2518             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2519                 self.to_screen('[info] Video metadata is already present')
2520             else:
2521                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2522                 try:
2523                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2524                 except (OSError, IOError):
2525                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2526                     return
2527             info_dict['__infojson_filename'] = infofn
2528
2529         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2530             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2531             thumb_filename = replace_extension(
2532                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2533             files_to_move[thumb_filename_temp] = thumb_filename
2534
2535         # Write internet shortcut files
2536         url_link = webloc_link = desktop_link = False
2537         if self.params.get('writelink', False):
2538             if sys.platform == "darwin":  # macOS.
2539                 webloc_link = True
2540             elif sys.platform.startswith("linux"):
2541                 desktop_link = True
2542             else:  # if sys.platform in ['win32', 'cygwin']:
2543                 url_link = True
2544         if self.params.get('writeurllink', False):
2545             url_link = True
2546         if self.params.get('writewebloclink', False):
2547             webloc_link = True
2548         if self.params.get('writedesktoplink', False):
2549             desktop_link = True
2550
2551         if url_link or webloc_link or desktop_link:
2552             if 'webpage_url' not in info_dict:
2553                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2554                 return
2555             ascii_url = iri_to_uri(info_dict['webpage_url'])
2556
2557         def _write_link_file(extension, template, newline, embed_filename):
2558             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2559             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2560                 self.to_screen('[info] Internet shortcut is already present')
2561             else:
2562                 try:
2563                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2564                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2565                         template_vars = {'url': ascii_url}
2566                         if embed_filename:
2567                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2568                         linkfile.write(template % template_vars)
2569                 except (OSError, IOError):
2570                     self.report_error('Cannot write internet shortcut ' + linkfn)
2571                     return False
2572             return True
2573
2574         if url_link:
2575             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2576                 return
2577         if webloc_link:
2578             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2579                 return
2580         if desktop_link:
2581             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2582                 return
2583
2584         try:
2585             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2586         except PostProcessingError as err:
2587             self.report_error('Preprocessing: %s' % str(err))
2588             return
2589
2590         must_record_download_archive = False
2591         if self.params.get('skip_download', False):
2592             info_dict['filepath'] = temp_filename
2593             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2594             info_dict['__files_to_move'] = files_to_move
2595             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2596         else:
2597             # Download
2598             try:
2599
2600                 def existing_file(*filepaths):
2601                     ext = info_dict.get('ext')
2602                     final_ext = self.params.get('final_ext', ext)
2603                     existing_files = []
2604                     for file in orderedSet(filepaths):
2605                         if final_ext != ext:
2606                             converted = replace_extension(file, final_ext, ext)
2607                             if os.path.exists(encodeFilename(converted)):
2608                                 existing_files.append(converted)
2609                         if os.path.exists(encodeFilename(file)):
2610                             existing_files.append(file)
2611
2612                     if not existing_files or self.params.get('overwrites', False):
2613                         for file in orderedSet(existing_files):
2614                             self.report_file_delete(file)
2615                             os.remove(encodeFilename(file))
2616                         return None
2617
2618                     self.report_file_already_downloaded(existing_files[0])
2619                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2620                     return existing_files[0]
2621
2622                 success = True
2623                 if info_dict.get('requested_formats') is not None:
2624
2625                     def compatible_formats(formats):
2626                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2627                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2628                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2629                         if len(video_formats) > 2 or len(audio_formats) > 2:
2630                             return False
2631
2632                         # Check extension
2633                         exts = set(format.get('ext') for format in formats)
2634                         COMPATIBLE_EXTS = (
2635                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2636                             set(('webm',)),
2637                         )
2638                         for ext_sets in COMPATIBLE_EXTS:
2639                             if ext_sets.issuperset(exts):
2640                                 return True
2641                         # TODO: Check acodec/vcodec
2642                         return False
2643
2644                     requested_formats = info_dict['requested_formats']
2645                     old_ext = info_dict['ext']
2646                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2647                         info_dict['ext'] = 'mkv'
2648                         self.report_warning(
2649                             'Requested formats are incompatible for merge and will be merged into mkv.')
2650
2651                     def correct_ext(filename):
2652                         filename_real_ext = os.path.splitext(filename)[1][1:]
2653                         filename_wo_ext = (
2654                             os.path.splitext(filename)[0]
2655                             if filename_real_ext == old_ext
2656                             else filename)
2657                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2658
2659                     # Ensure filename always has a correct extension for successful merge
2660                     full_filename = correct_ext(full_filename)
2661                     temp_filename = correct_ext(temp_filename)
2662                     dl_filename = existing_file(full_filename, temp_filename)
2663                     info_dict['__real_download'] = False
2664
2665                     _protocols = set(determine_protocol(f) for f in requested_formats)
2666                     if len(_protocols) == 1:
2667                         info_dict['protocol'] = _protocols.pop()
2668                     directly_mergable = (
2669                         'no-direct-merge' not in self.params.get('compat_opts', [])
2670                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2671                         and not self.params.get('allow_unplayable_formats')
2672                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2673                     if directly_mergable:
2674                         info_dict['url'] = requested_formats[0]['url']
2675                         # Treat it as a single download
2676                         dl_filename = existing_file(full_filename, temp_filename)
2677                         if dl_filename is None:
2678                             success, real_download = self.dl(temp_filename, info_dict)
2679                             info_dict['__real_download'] = real_download
2680                     else:
2681                         downloaded = []
2682                         merger = FFmpegMergerPP(self)
2683                         if self.params.get('allow_unplayable_formats'):
2684                             self.report_warning(
2685                                 'You have requested merging of multiple formats '
2686                                 'while also allowing unplayable formats to be downloaded. '
2687                                 'The formats won\'t be merged to prevent data corruption.')
2688                         elif not merger.available:
2689                             self.report_warning(
2690                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2691                                 'The formats won\'t be merged.')
2692
2693                         if dl_filename is None:
2694                             for f in requested_formats:
2695                                 new_info = dict(info_dict)
2696                                 del new_info['requested_formats']
2697                                 new_info.update(f)
2698                                 fname = prepend_extension(
2699                                     self.prepare_filename(new_info, 'temp'),
2700                                     'f%s' % f['format_id'], new_info['ext'])
2701                                 if not self._ensure_dir_exists(fname):
2702                                     return
2703                                 downloaded.append(fname)
2704                                 partial_success, real_download = self.dl(fname, new_info)
2705                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2706                                 success = success and partial_success
2707                             if merger.available and not self.params.get('allow_unplayable_formats'):
2708                                 info_dict['__postprocessors'].append(merger)
2709                                 info_dict['__files_to_merge'] = downloaded
2710                                 # Even if there were no downloads, it is being merged only now
2711                                 info_dict['__real_download'] = True
2712                             else:
2713                                 for file in downloaded:
2714                                     files_to_move[file] = None
2715                 else:
2716                     # Just a single file
2717                     dl_filename = existing_file(full_filename, temp_filename)
2718                     if dl_filename is None:
2719                         success, real_download = self.dl(temp_filename, info_dict)
2720                         info_dict['__real_download'] = real_download
2721
2722                 dl_filename = dl_filename or temp_filename
2723                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2724
2725             except network_exceptions as err:
2726                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2727                 return
2728             except (OSError, IOError) as err:
2729                 raise UnavailableVideoError(err)
2730             except (ContentTooShortError, ) as err:
2731                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2732                 return
2733
2734             if success and full_filename != '-':
2735
2736                 def fixup():
2737                     do_fixup = True
2738                     fixup_policy = self.params.get('fixup')
2739                     vid = info_dict['id']
2740
2741                     if fixup_policy in ('ignore', 'never'):
2742                         return
2743                     elif fixup_policy == 'warn':
2744                         do_fixup = False
2745                     elif fixup_policy != 'force':
2746                         assert fixup_policy in ('detect_or_warn', None)
2747                         if not info_dict.get('__real_download'):
2748                             do_fixup = False
2749
2750                     def ffmpeg_fixup(cndn, msg, cls):
2751                         if not cndn:
2752                             return
2753                         if not do_fixup:
2754                             self.report_warning(f'{vid}: {msg}')
2755                             return
2756                         pp = cls(self)
2757                         if pp.available:
2758                             info_dict['__postprocessors'].append(pp)
2759                         else:
2760                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2761
2762                     stretched_ratio = info_dict.get('stretched_ratio')
2763                     ffmpeg_fixup(
2764                         stretched_ratio not in (1, None),
2765                         f'Non-uniform pixel ratio {stretched_ratio}',
2766                         FFmpegFixupStretchedPP)
2767
2768                     ffmpeg_fixup(
2769                         (info_dict.get('requested_formats') is None
2770                          and info_dict.get('container') == 'm4a_dash'
2771                          and info_dict.get('ext') == 'm4a'),
2772                         'writing DASH m4a. Only some players support this container',
2773                         FFmpegFixupM4aPP)
2774
2775                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2776                                   if 'protocol' in info_dict else None)
2777                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2778                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2779                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2780
2781                 fixup()
2782                 try:
2783                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2784                 except PostProcessingError as err:
2785                     self.report_error('Postprocessing: %s' % str(err))
2786                     return
2787                 try:
2788                     for ph in self._post_hooks:
2789                         ph(info_dict['filepath'])
2790                 except Exception as err:
2791                     self.report_error('post hooks: %s' % str(err))
2792                     return
2793                 must_record_download_archive = True
2794
2795         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2796             self.record_download_archive(info_dict)
2797         max_downloads = self.params.get('max_downloads')
2798         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2799             raise MaxDownloadsReached()
2800
2801     def download(self, url_list):
2802         """Download a given list of URLs."""
2803         outtmpl = self.outtmpl_dict['default']
2804         if (len(url_list) > 1
2805                 and outtmpl != '-'
2806                 and '%' not in outtmpl
2807                 and self.params.get('max_downloads') != 1):
2808             raise SameFileError(outtmpl)
2809
2810         for url in url_list:
2811             try:
2812                 # It also downloads the videos
2813                 res = self.extract_info(
2814                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2815             except UnavailableVideoError:
2816                 self.report_error('unable to download video')
2817             except MaxDownloadsReached:
2818                 self.to_screen('[info] Maximum number of downloaded files reached')
2819                 raise
2820             except ExistingVideoReached:
2821                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2822                 raise
2823             except RejectedVideoReached:
2824                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2825                 raise
2826             else:
2827                 if self.params.get('dump_single_json', False):
2828                     self.post_extract(res)
2829                     self.to_stdout(json.dumps(res, default=repr))
2830
2831         return self._download_retcode
2832
2833     def download_with_info_file(self, info_filename):
2834         with contextlib.closing(fileinput.FileInput(
2835                 [info_filename], mode='r',
2836                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2837             # FileInput doesn't have a read method, we can't call json.load
2838             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2839         try:
2840             self.process_ie_result(info, download=True)
2841         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2842             webpage_url = info.get('webpage_url')
2843             if webpage_url is not None:
2844                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2845                 return self.download([webpage_url])
2846             else:
2847                 raise
2848         return self._download_retcode
2849
2850     @staticmethod
2851     def filter_requested_info(info_dict, actually_filter=True):
2852         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2853         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2854         if actually_filter:
2855             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2856             empty_values = (None, {}, [], set(), tuple())
2857             reject = lambda k, v: k not in keep_keys and (
2858                 k.startswith('_') or k in remove_keys or v in empty_values)
2859         else:
2860             info_dict['epoch'] = int(time.time())
2861             reject = lambda k, v: k in remove_keys
2862         filter_fn = lambda obj: (
2863             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2864             else obj if not isinstance(obj, dict)
2865             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2866         return filter_fn(info_dict)
2867
2868     def run_pp(self, pp, infodict):
2869         files_to_delete = []
2870         if '__files_to_move' not in infodict:
2871             infodict['__files_to_move'] = {}
2872         files_to_delete, infodict = pp.run(infodict)
2873         if not files_to_delete:
2874             return infodict
2875
2876         if self.params.get('keepvideo', False):
2877             for f in files_to_delete:
2878                 infodict['__files_to_move'].setdefault(f, '')
2879         else:
2880             for old_filename in set(files_to_delete):
2881                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2882                 try:
2883                     os.remove(encodeFilename(old_filename))
2884                 except (IOError, OSError):
2885                     self.report_warning('Unable to remove downloaded original file')
2886                 if old_filename in infodict['__files_to_move']:
2887                     del infodict['__files_to_move'][old_filename]
2888         return infodict
2889
2890     @staticmethod
2891     def post_extract(info_dict):
2892         def actual_post_extract(info_dict):
2893             if info_dict.get('_type') in ('playlist', 'multi_video'):
2894                 for video_dict in info_dict.get('entries', {}):
2895                     actual_post_extract(video_dict or {})
2896                 return
2897
2898             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2899             extra = post_extractor().items()
2900             info_dict.update(extra)
2901             info_dict.pop('__post_extractor', None)
2902
2903             original_infodict = info_dict.get('__original_infodict') or {}
2904             original_infodict.update(extra)
2905             original_infodict.pop('__post_extractor', None)
2906
2907         actual_post_extract(info_dict or {})
2908
2909     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2910         info = dict(ie_info)
2911         info['__files_to_move'] = files_to_move or {}
2912         for pp in self._pps[key]:
2913             info = self.run_pp(pp, info)
2914         return info, info.pop('__files_to_move', None)
2915
2916     def post_process(self, filename, ie_info, files_to_move=None):
2917         """Run all the postprocessors on the given file."""
2918         info = dict(ie_info)
2919         info['filepath'] = filename
2920         info['__files_to_move'] = files_to_move or {}
2921
2922         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2923             info = self.run_pp(pp, info)
2924         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2925         del info['__files_to_move']
2926         for pp in self._pps['after_move']:
2927             info = self.run_pp(pp, info)
2928         return info
2929
2930     def _make_archive_id(self, info_dict):
2931         video_id = info_dict.get('id')
2932         if not video_id:
2933             return
2934         # Future-proof against any change in case
2935         # and backwards compatibility with prior versions
2936         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2937         if extractor is None:
2938             url = str_or_none(info_dict.get('url'))
2939             if not url:
2940                 return
2941             # Try to find matching extractor for the URL and take its ie_key
2942             for ie in self._ies:
2943                 if ie.suitable(url):
2944                     extractor = ie.ie_key()
2945                     break
2946             else:
2947                 return
2948         return '%s %s' % (extractor.lower(), video_id)
2949
2950     def in_download_archive(self, info_dict):
2951         fn = self.params.get('download_archive')
2952         if fn is None:
2953             return False
2954
2955         vid_id = self._make_archive_id(info_dict)
2956         if not vid_id:
2957             return False  # Incomplete video information
2958
2959         return vid_id in self.archive
2960
2961     def record_download_archive(self, info_dict):
2962         fn = self.params.get('download_archive')
2963         if fn is None:
2964             return
2965         vid_id = self._make_archive_id(info_dict)
2966         assert vid_id
2967         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2968             archive_file.write(vid_id + '\n')
2969         self.archive.add(vid_id)
2970
2971     @staticmethod
2972     def format_resolution(format, default='unknown'):
2973         if format.get('vcodec') == 'none':
2974             if format.get('acodec') == 'none':
2975                 return 'images'
2976             return 'audio only'
2977         if format.get('resolution') is not None:
2978             return format['resolution']
2979         if format.get('width') and format.get('height'):
2980             res = '%dx%d' % (format['width'], format['height'])
2981         elif format.get('height'):
2982             res = '%sp' % format['height']
2983         elif format.get('width'):
2984             res = '%dx?' % format['width']
2985         else:
2986             res = default
2987         return res
2988
2989     def _format_note(self, fdict):
2990         res = ''
2991         if fdict.get('ext') in ['f4f', 'f4m']:
2992             res += '(unsupported) '
2993         if fdict.get('language'):
2994             if res:
2995                 res += ' '
2996             res += '[%s] ' % fdict['language']
2997         if fdict.get('format_note') is not None:
2998             res += fdict['format_note'] + ' '
2999         if fdict.get('tbr') is not None:
3000             res += '%4dk ' % fdict['tbr']
3001         if fdict.get('container') is not None:
3002             if res:
3003                 res += ', '
3004             res += '%s container' % fdict['container']
3005         if (fdict.get('vcodec') is not None
3006                 and fdict.get('vcodec') != 'none'):
3007             if res:
3008                 res += ', '
3009             res += fdict['vcodec']
3010             if fdict.get('vbr') is not None:
3011                 res += '@'
3012         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3013             res += 'video@'
3014         if fdict.get('vbr') is not None:
3015             res += '%4dk' % fdict['vbr']
3016         if fdict.get('fps') is not None:
3017             if res:
3018                 res += ', '
3019             res += '%sfps' % fdict['fps']
3020         if fdict.get('acodec') is not None:
3021             if res:
3022                 res += ', '
3023             if fdict['acodec'] == 'none':
3024                 res += 'video only'
3025             else:
3026                 res += '%-5s' % fdict['acodec']
3027         elif fdict.get('abr') is not None:
3028             if res:
3029                 res += ', '
3030             res += 'audio'
3031         if fdict.get('abr') is not None:
3032             res += '@%3dk' % fdict['abr']
3033         if fdict.get('asr') is not None:
3034             res += ' (%5dHz)' % fdict['asr']
3035         if fdict.get('filesize') is not None:
3036             if res:
3037                 res += ', '
3038             res += format_bytes(fdict['filesize'])
3039         elif fdict.get('filesize_approx') is not None:
3040             if res:
3041                 res += ', '
3042             res += '~' + format_bytes(fdict['filesize_approx'])
3043         return res
3044
3045     def list_formats(self, info_dict):
3046         formats = info_dict.get('formats', [info_dict])
3047         new_format = (
3048             'list-formats' not in self.params.get('compat_opts', [])
3049             and self.params.get('listformats_table', True) is not False)
3050         if new_format:
3051             table = [
3052                 [
3053                     format_field(f, 'format_id'),
3054                     format_field(f, 'ext'),
3055                     self.format_resolution(f),
3056                     format_field(f, 'fps', '%d'),
3057                     '|',
3058                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3059                     format_field(f, 'tbr', '%4dk'),
3060                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3061                     '|',
3062                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3063                     format_field(f, 'vbr', '%4dk'),
3064                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3065                     format_field(f, 'abr', '%3dk'),
3066                     format_field(f, 'asr', '%5dHz'),
3067                     ', '.join(filter(None, (
3068                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3069                         format_field(f, 'language', '[%s]'),
3070                         format_field(f, 'format_note'),
3071                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3072                         format_field(f, 'asr', '%5dHz')))),
3073                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3074             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3075                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3076         else:
3077             table = [
3078                 [
3079                     format_field(f, 'format_id'),
3080                     format_field(f, 'ext'),
3081                     self.format_resolution(f),
3082                     self._format_note(f)]
3083                 for f in formats
3084                 if f.get('preference') is None or f['preference'] >= -1000]
3085             header_line = ['format code', 'extension', 'resolution', 'note']
3086
3087         self.to_screen(
3088             '[info] Available formats for %s:' % info_dict['id'])
3089         self.to_stdout(render_table(
3090             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3091
3092     def list_thumbnails(self, info_dict):
3093         thumbnails = list(info_dict.get('thumbnails'))
3094         if not thumbnails:
3095             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3096             return
3097
3098         self.to_screen(
3099             '[info] Thumbnails for %s:' % info_dict['id'])
3100         self.to_stdout(render_table(
3101             ['ID', 'width', 'height', 'URL'],
3102             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3103
3104     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3105         if not subtitles:
3106             self.to_screen('%s has no %s' % (video_id, name))
3107             return
3108         self.to_screen(
3109             'Available %s for %s:' % (name, video_id))
3110
3111         def _row(lang, formats):
3112             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3113             if len(set(names)) == 1:
3114                 names = [] if names[0] == 'unknown' else names[:1]
3115             return [lang, ', '.join(names), ', '.join(exts)]
3116
3117         self.to_stdout(render_table(
3118             ['Language', 'Name', 'Formats'],
3119             [_row(lang, formats) for lang, formats in subtitles.items()],
3120             hideEmpty=True))
3121
3122     def urlopen(self, req):
3123         """ Start an HTTP download """
3124         if isinstance(req, compat_basestring):
3125             req = sanitized_Request(req)
3126         return self._opener.open(req, timeout=self._socket_timeout)
3127
3128     def print_debug_header(self):
3129         if not self.params.get('verbose'):
3130             return
3131
3132         if type('') is not compat_str:
3133             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3134             self.report_warning(
3135                 'Your Python is broken! Update to a newer and supported version')
3136
3137         stdout_encoding = getattr(
3138             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3139         encoding_str = (
3140             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3141                 locale.getpreferredencoding(),
3142                 sys.getfilesystemencoding(),
3143                 stdout_encoding,
3144                 self.get_encoding()))
3145         write_string(encoding_str, encoding=None)
3146
3147         source = (
3148             '(exe)' if hasattr(sys, 'frozen')
3149             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3150             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3151             else '')
3152         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3153         if _LAZY_LOADER:
3154             self._write_string('[debug] Lazy loading extractors enabled\n')
3155         if _PLUGIN_CLASSES:
3156             self._write_string(
3157                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3158         if self.params.get('compat_opts'):
3159             self._write_string(
3160                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3161         try:
3162             sp = subprocess.Popen(
3163                 ['git', 'rev-parse', '--short', 'HEAD'],
3164                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3165                 cwd=os.path.dirname(os.path.abspath(__file__)))
3166             out, err = process_communicate_or_kill(sp)
3167             out = out.decode().strip()
3168             if re.match('[0-9a-f]+', out):
3169                 self._write_string('[debug] Git HEAD: %s\n' % out)
3170         except Exception:
3171             try:
3172                 sys.exc_clear()
3173             except Exception:
3174                 pass
3175
3176         def python_implementation():
3177             impl_name = platform.python_implementation()
3178             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3179                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3180             return impl_name
3181
3182         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3183             platform.python_version(),
3184             python_implementation(),
3185             platform.architecture()[0],
3186             platform_name()))
3187
3188         exe_versions = FFmpegPostProcessor.get_versions(self)
3189         exe_versions['rtmpdump'] = rtmpdump_version()
3190         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3191         exe_str = ', '.join(
3192             '%s %s' % (exe, v)
3193             for exe, v in sorted(exe_versions.items())
3194             if v
3195         )
3196         if not exe_str:
3197             exe_str = 'none'
3198         self._write_string('[debug] exe versions: %s\n' % exe_str)
3199
3200         proxy_map = {}
3201         for handler in self._opener.handlers:
3202             if hasattr(handler, 'proxies'):
3203                 proxy_map.update(handler.proxies)
3204         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3205
3206         if self.params.get('call_home', False):
3207             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3208             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3209             return
3210             latest_version = self.urlopen(
3211                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3212             if version_tuple(latest_version) > version_tuple(__version__):
3213                 self.report_warning(
3214                     'You are using an outdated version (newest version: %s)! '
3215                     'See https://yt-dl.org/update if you need help updating.' %
3216                     latest_version)
3217
3218     def _setup_opener(self):
3219         timeout_val = self.params.get('socket_timeout')
3220         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3221
3222         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3223         opts_cookiefile = self.params.get('cookiefile')
3224         opts_proxy = self.params.get('proxy')
3225
3226         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3227
3228         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3229         if opts_proxy is not None:
3230             if opts_proxy == '':
3231                 proxies = {}
3232             else:
3233                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3234         else:
3235             proxies = compat_urllib_request.getproxies()
3236             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3237             if 'http' in proxies and 'https' not in proxies:
3238                 proxies['https'] = proxies['http']
3239         proxy_handler = PerRequestProxyHandler(proxies)
3240
3241         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3242         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3243         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3244         redirect_handler = YoutubeDLRedirectHandler()
3245         data_handler = compat_urllib_request_DataHandler()
3246
3247         # When passing our own FileHandler instance, build_opener won't add the
3248         # default FileHandler and allows us to disable the file protocol, which
3249         # can be used for malicious purposes (see
3250         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3251         file_handler = compat_urllib_request.FileHandler()
3252
3253         def file_open(*args, **kwargs):
3254             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3255         file_handler.file_open = file_open
3256
3257         opener = compat_urllib_request.build_opener(
3258             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3259
3260         # Delete the default user-agent header, which would otherwise apply in
3261         # cases where our custom HTTP handler doesn't come into play
3262         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3263         opener.addheaders = []
3264         self._opener = opener
3265
3266     def encode(self, s):
3267         if isinstance(s, bytes):
3268             return s  # Already encoded
3269
3270         try:
3271             return s.encode(self.get_encoding())
3272         except UnicodeEncodeError as err:
3273             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3274             raise
3275
3276     def get_encoding(self):
3277         encoding = self.params.get('encoding')
3278         if encoding is None:
3279             encoding = preferredencoding()
3280         return encoding
3281
3282     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3283         write_all = self.params.get('write_all_thumbnails', False)
3284         thumbnails = []
3285         if write_all or self.params.get('writethumbnail', False):
3286             thumbnails = info_dict.get('thumbnails') or []
3287         multiple = write_all and len(thumbnails) > 1
3288
3289         ret = []
3290         for t in thumbnails[::-1]:
3291             thumb_ext = determine_ext(t['url'], 'jpg')
3292             suffix = '%s.' % t['id'] if multiple else ''
3293             thumb_display_id = '%s ' % t['id'] if multiple else ''
3294             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3295
3296             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3297                 ret.append(suffix + thumb_ext)
3298                 t['filepath'] = thumb_filename
3299                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3300                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3301             else:
3302                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3303                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3304                 try:
3305                     uf = self.urlopen(t['url'])
3306                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3307                         shutil.copyfileobj(uf, thumbf)
3308                     ret.append(suffix + thumb_ext)
3309                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3310                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3311                     t['filepath'] = thumb_filename
3312                 except network_exceptions as err:
3313                     self.report_warning('Unable to download thumbnail "%s": %s' %
3314                                         (t['url'], error_to_compat_str(err)))
3315             if ret and not write_all:
3316                 break
3317         return ret