yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26 import random
  27
  28 from string import ascii_letters
  29 from zipimport import zipimporter
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .utils import (
  45     age_restricted,
  46     args_to_str,
  47     ContentTooShortError,
  48     date_from_str,
  49     DateRange,
  50     DEFAULT_OUTTMPL,
  51     determine_ext,
  52     determine_protocol,
  53     DOT_DESKTOP_LINK_TEMPLATE,
  54     DOT_URL_LINK_TEMPLATE,
  55     DOT_WEBLOC_LINK_TEMPLATE,
  56     DownloadError,
  57     encode_compat_str,
  58     encodeFilename,
  59     EntryNotInPlaylist,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     float_or_none,
  65     format_bytes,
  66     format_field,
  67     STR_FORMAT_RE,
  68     formatSeconds,
  69     GeoRestrictedError,
  70     int_or_none,
  71     iri_to_uri,
  72     ISO3166Utils,
  73     LazyList,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     network_exceptions,
  79     orderedSet,
  80     OUTTMPL_TYPES,
  81     PagedList,
  82     parse_filesize,
  83     PerRequestProxyHandler,
  84     platform_name,
  85     PostProcessingError,
  86     preferredencoding,
  87     prepend_extension,
  88     process_communicate_or_kill,
  89     random_uuidv4,
  90     register_socks_protocols,
  91     RejectedVideoReached,
  92     render_table,
  93     replace_extension,
  94     SameFileError,
  95     sanitize_filename,
  96     sanitize_path,
  97     sanitize_url,
  98     sanitized_Request,
  99     std_headers,
 100     str_or_none,
 101     strftime_or_none,
 102     subtitles_filename,
 103     to_high_limit_path,
 104     traverse_obj,
 105     UnavailableVideoError,
 106     url_basename,
 107     version_tuple,
 108     write_json_file,
 109     write_string,
 110     YoutubeDLCookieJar,
 111     YoutubeDLCookieProcessor,
 112     YoutubeDLHandler,
 113     YoutubeDLRedirectHandler,
 114 )
 115 from .cache import Cache
 116 from .extractor import (
 117     gen_extractor_classes,
 118     get_info_extractor,
 119     _LAZY_LOADER,
 120     _PLUGIN_CLASSES
 121 )
 122 from .extractor.openload import PhantomJSwrapper
 123 from .downloader import (
 124     get_suitable_downloader,
 125     shorten_protocol_name
 126 )
 127 from .downloader.rtmp import rtmpdump_version
 128 from .postprocessor import (
 129     FFmpegFixupM3u8PP,
 130     FFmpegFixupM4aPP,
 131     FFmpegFixupStretchedPP,
 132     FFmpegMergerPP,
 133     FFmpegPostProcessor,
 134     # FFmpegSubtitlesConvertorPP,
 135     get_postprocessor,
 136     MoveFilesAfterDownloadPP,
 137 )
 138 from .version import __version__
 139
 140 if compat_os_name == 'nt':
 141     import ctypes
 142
 143
 144 class YoutubeDL(object):
 145     """YoutubeDL class.
 146
 147     YoutubeDL objects are the ones responsible of downloading the
 148     actual video file and writing it to disk if the user has requested
 149     it, among some other tasks. In most cases there should be one per
 150     program. As, given a video URL, the downloader doesn't know how to
 151     extract all the needed information, task that InfoExtractors do, it
 152     has to pass the URL to one of them.
 153
 154     For this, YoutubeDL objects have a method that allows
 155     InfoExtractors to be registered in a given order. When it is passed
 156     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 157     finds that reports being able to handle it. The InfoExtractor extracts
 158     all the information about the video or videos the URL refers to, and
 159     YoutubeDL process the extracted information, possibly using a File
 160     Downloader to download the video.
 161
 162     YoutubeDL objects accept a lot of parameters. In order not to saturate
 163     the object constructor with arguments, it receives a dictionary of
 164     options instead. These options are available through the params
 165     attribute for the InfoExtractors to use. The YoutubeDL also
 166     registers itself as the downloader in charge for the InfoExtractors
 167     that are added to it, so this is a "mutual registration".
 168
 169     Available options:
 170
 171     username:          Username for authentication purposes.
 172     password:          Password for authentication purposes.
 173     videopassword:     Password for accessing a video.
 174     ap_mso:            Adobe Pass multiple-system operator identifier.
 175     ap_username:       Multiple-system operator account username.
 176     ap_password:       Multiple-system operator account password.
 177     usenetrc:          Use netrc for authentication instead.
 178     verbose:           Print additional info to stdout.
 179     quiet:             Do not print messages to stdout.
 180     no_warnings:       Do not print out anything for warnings.
 181     forceprint:        A list of templates to force print
 182     forceurl:          Force printing final URL. (Deprecated)
 183     forcetitle:        Force printing title. (Deprecated)
 184     forceid:           Force printing ID. (Deprecated)
 185     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 186     forcedescription:  Force printing description. (Deprecated)
 187     forcefilename:     Force printing final filename. (Deprecated)
 188     forceduration:     Force printing duration. (Deprecated)
 189     forcejson:         Force printing info_dict as JSON.
 190     dump_single_json:  Force printing the info_dict of the whole playlist
 191                        (or video) as a single JSON line.
 192     force_write_download_archive: Force writing download archive regardless
 193                        of 'skip_download' or 'simulate'.
 194     simulate:          Do not download the video files.
 195     format:            Video format code. see "FORMAT SELECTION" for more details.
 196     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 197     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 198                        extracting metadata even if the video is not actually
 199                        available for download (experimental)
 200     format_sort:       How to sort the video formats. see "Sorting Formats"
 201                        for more details.
 202     format_sort_force: Force the given format_sort. see "Sorting Formats"
 203                        for more details.
 204     allow_multiple_video_streams:   Allow multiple video streams to be merged
 205                        into a single file
 206     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 207                        into a single file
 208     paths:             Dictionary of output paths. The allowed keys are 'home'
 209                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 210     outtmpl:           Dictionary of templates for output names. Allowed keys
 211                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 212                        A string a also accepted for backward compatibility
 213     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 214     restrictfilenames: Do not allow "&" and spaces in file names
 215     trim_file_name:    Limit length of filename (extension excluded)
 216     windowsfilenames:  Force the filenames to be windows compatible
 217     ignoreerrors:      Do not stop on download errors
 218                        (Default True when running yt-dlp,
 219                        but False when directly accessing YoutubeDL class)
 220     skip_playlist_after_errors: Number of allowed failures until the rest of
 221                        the playlist is skipped
 222     force_generic_extractor: Force downloader to use the generic extractor
 223     overwrites:        Overwrite all video and metadata files if True,
 224                        overwrite only non-video files if None
 225                        and don't overwrite any file if False
 226     playliststart:     Playlist item to start at.
 227     playlistend:       Playlist item to end at.
 228     playlist_items:    Specific indices of playlist to download.
 229     playlistreverse:   Download playlist items in reverse order.
 230     playlistrandom:    Download playlist items in random order.
 231     matchtitle:        Download only matching titles.
 232     rejecttitle:       Reject downloads for matching titles.
 233     logger:            Log messages to a logging.Logger instance.
 234     logtostderr:       Log messages to stderr instead of stdout.
 235     writedescription:  Write the video description to a .description file
 236     writeinfojson:     Write the video description to a .info.json file
 237     clean_infojson:    Remove private fields from the infojson
 238     writecomments:     Extract video comments. This will not be written to disk
 239                        unless writeinfojson is also given
 240     writeannotations:  Write the video annotations to a .annotations.xml file
 241     writethumbnail:    Write the thumbnail image to a file
 242     allow_playlist_files: Whether to write playlists' description, infojson etc
 243                        also to disk when using the 'write*' options
 244     write_all_thumbnails:  Write all thumbnail formats to files
 245     writelink:         Write an internet shortcut file, depending on the
 246                        current platform (.url/.webloc/.desktop)
 247     writeurllink:      Write a Windows internet shortcut file (.url)
 248     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 249     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 250     writesubtitles:    Write the video subtitles to a file
 251     writeautomaticsub: Write the automatically generated subtitles to a file
 252     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 253                        Downloads all the subtitles of the video
 254                        (requires writesubtitles or writeautomaticsub)
 255     listsubtitles:     Lists all available subtitles for the video
 256     subtitlesformat:   The format code for subtitles
 257     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 258                        The list may contain "all" to refer to all the available
 259                        subtitles. The language can be prefixed with a "-" to
 260                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 261     keepvideo:         Keep the video file after post-processing
 262     daterange:         A DateRange object, download only if the upload_date is in the range.
 263     skip_download:     Skip the actual download of the video file
 264     cachedir:          Location of the cache files in the filesystem.
 265                        False to disable filesystem cache.
 266     noplaylist:        Download single video instead of a playlist if in doubt.
 267     age_limit:         An integer representing the user's age in years.
 268                        Unsuitable videos for the given age are skipped.
 269     min_views:         An integer representing the minimum view count the video
 270                        must have in order to not be skipped.
 271                        Videos without view count information are always
 272                        downloaded. None for no limit.
 273     max_views:         An integer representing the maximum view count.
 274                        Videos that are more popular than that are not
 275                        downloaded.
 276                        Videos without view count information are always
 277                        downloaded. None for no limit.
 278     download_archive:  File name of a file where all downloads are recorded.
 279                        Videos already present in the file are not downloaded
 280                        again.
 281     break_on_existing: Stop the download process after attempting to download a
 282                        file that is in the archive.
 283     break_on_reject:   Stop the download process when encountering a video that
 284                        has been filtered out.
 285     cookiefile:        File name where cookies should be read from and dumped to
 286     nocheckcertificate:Do not verify SSL certificates
 287     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 288                        At the moment, this is only supported by YouTube.
 289     proxy:             URL of the proxy server to use
 290     geo_verification_proxy:  URL of the proxy to use for IP address verification
 291                        on geo-restricted sites.
 292     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 293     bidi_workaround:   Work around buggy terminals without bidirectional text
 294                        support, using fridibi
 295     debug_printtraffic:Print out sent and received HTTP traffic
 296     include_ads:       Download ads as well
 297     default_search:    Prepend this string if an input url is not valid.
 298                        'auto' for elaborate guessing
 299     encoding:          Use this encoding instead of the system-specified.
 300     extract_flat:      Do not resolve URLs, return the immediate result.
 301                        Pass in 'in_playlist' to only show this behavior for
 302                        playlist items.
 303     postprocessors:    A list of dictionaries, each with an entry
 304                        * key:  The name of the postprocessor. See
 305                                yt_dlp/postprocessor/__init__.py for a list.
 306                        * when: When to run the postprocessor. Can be one of
 307                                pre_process|before_dl|post_process|after_move.
 308                                Assumed to be 'post_process' if not given
 309     post_hooks:        A list of functions that get called as the final step
 310                        for each video file, after all postprocessors have been
 311                        called. The filename will be passed as the only argument.
 312     progress_hooks:    A list of functions that get called on download
 313                        progress, with a dictionary with the entries
 314                        * status: One of "downloading", "error", or "finished".
 315                                  Check this first and ignore unknown values.
 316
 317                        If status is one of "downloading", or "finished", the
 318                        following properties may also be present:
 319                        * filename: The final filename (always present)
 320                        * tmpfilename: The filename we're currently writing to
 321                        * downloaded_bytes: Bytes on disk
 322                        * total_bytes: Size of the whole file, None if unknown
 323                        * total_bytes_estimate: Guess of the eventual file size,
 324                                                None if unavailable.
 325                        * elapsed: The number of seconds since download started.
 326                        * eta: The estimated time in seconds, None if unknown
 327                        * speed: The download speed in bytes/second, None if
 328                                 unknown
 329                        * fragment_index: The counter of the currently
 330                                          downloaded video fragment.
 331                        * fragment_count: The number of fragments (= individual
 332                                          files that will be merged)
 333
 334                        Progress hooks are guaranteed to be called at least once
 335                        (with status "finished") if the download is successful.
 336     merge_output_format: Extension to use when merging formats.
 337     final_ext:         Expected final extension; used to detect when the file was
 338                        already downloaded and converted. "merge_output_format" is
 339                        replaced by this extension when given
 340     fixup:             Automatically correct known faults of the file.
 341                        One of:
 342                        - "never": do nothing
 343                        - "warn": only emit a warning
 344                        - "detect_or_warn": check whether we can do anything
 345                                            about it, warn otherwise (default)
 346     source_address:    Client-side IP address to bind to.
 347     call_home:         Boolean, true iff we are allowed to contact the
 348                        yt-dlp servers for debugging. (BROKEN)
 349     sleep_interval_requests: Number of seconds to sleep between requests
 350                        during extraction
 351     sleep_interval:    Number of seconds to sleep before each download when
 352                        used alone or a lower bound of a range for randomized
 353                        sleep before each download (minimum possible number
 354                        of seconds to sleep) when used along with
 355                        max_sleep_interval.
 356     max_sleep_interval:Upper bound of a range for randomized sleep before each
 357                        download (maximum possible number of seconds to sleep).
 358                        Must only be used along with sleep_interval.
 359                        Actual sleep time will be a random float from range
 360                        [sleep_interval; max_sleep_interval].
 361     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 362     listformats:       Print an overview of available video formats and exit.
 363     list_thumbnails:   Print a table of all thumbnails and exit.
 364     match_filter:      A function that gets called with the info_dict of
 365                        every video.
 366                        If it returns a message, the video is ignored.
 367                        If it returns None, the video is downloaded.
 368                        match_filter_func in utils.py is one example for this.
 369     no_color:          Do not emit color codes in output.
 370     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 371                        HTTP header
 372     geo_bypass_country:
 373                        Two-letter ISO 3166-2 country code that will be used for
 374                        explicit geographic restriction bypassing via faking
 375                        X-Forwarded-For HTTP header
 376     geo_bypass_ip_block:
 377                        IP range in CIDR notation that will be used similarly to
 378                        geo_bypass_country
 379
 380     The following options determine which downloader is picked:
 381     external_downloader: A dictionary of protocol keys and the executable of the
 382                        external downloader to use for it. The allowed protocols
 383                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 384                        Set the value to 'native' to use the native downloader
 385     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 386                        or {'m3u8': 'ffmpeg'} instead.
 387                        Use the native HLS downloader instead of ffmpeg/avconv
 388                        if True, otherwise use ffmpeg/avconv if False, otherwise
 389                        use downloader suggested by extractor if None.
 390     compat_opts:       Compatibility options. See "Differences in default behavior".
 391                        Note that only format-sort, format-spec, no-live-chat,
 392                        no-attach-info-json, playlist-index, list-formats,
 393                        no-direct-merge, no-youtube-channel-redirect,
 394                        and no-youtube-unavailable-videos works when used via the API
 395
 396     The following parameters are not used by YoutubeDL itself, they are used by
 397     the downloader (see yt_dlp/downloader/common.py):
 398     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 399     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 400     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 401     http_chunk_size.
 402
 403     The following options are used by the post processors:
 404     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 405                        otherwise prefer ffmpeg. (avconv support is deprecated)
 406     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 407                        to the binary or its containing directory.
 408     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 409                         and a list of additional command-line arguments for the
 410                         postprocessor/executable. The dict can also have "PP+EXE" keys
 411                         which are used when the given exe is used by the given PP.
 412                         Use 'default' as the name for arguments to passed to all PP
 413
 414     The following options are used by the extractors:
 415     extractor_retries: Number of times to retry for known errors
 416     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 417     hls_split_discontinuity: Split HLS playlists to different formats at
 418                        discontinuities such as ad breaks (default: False)
 419     youtube_include_dash_manifest: If True (default), DASH manifests and related
 420                        data will be downloaded and processed by extractor.
 421                        You can reduce network I/O by disabling it if you don't
 422                        care about DASH. (only for youtube)
 423     youtube_include_hls_manifest: If True (default), HLS manifests and related
 424                        data will be downloaded and processed by extractor.
 425                        You can reduce network I/O by disabling it if you don't
 426                        care about HLS. (only for youtube)
 427     """
 428
 429     _NUMERIC_FIELDS = set((
 430         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 431         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 432         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 433         'average_rating', 'comment_count', 'age_limit',
 434         'start_time', 'end_time',
 435         'chapter_number', 'season_number', 'episode_number',
 436         'track_number', 'disc_number', 'release_year',
 437         'playlist_index',
 438     ))
 439
 440     params = None
 441     _ies = []
 442     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 443     __prepare_filename_warned = False
 444     _first_webpage_request = True
 445     _download_retcode = None
 446     _num_downloads = None
 447     _playlist_level = 0
 448     _playlist_urls = set()
 449     _screen_file = None
 450
 451     def __init__(self, params=None, auto_init=True):
 452         """Create a FileDownloader object with the given options."""
 453         if params is None:
 454             params = {}
 455         self._ies = []
 456         self._ies_instances = {}
 457         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 458         self.__prepare_filename_warned = False
 459         self._first_webpage_request = True
 460         self._post_hooks = []
 461         self._progress_hooks = []
 462         self._download_retcode = 0
 463         self._num_downloads = 0
 464         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 465         self._err_file = sys.stderr
 466         self.params = {
 467             # Default parameters
 468             'nocheckcertificate': False,
 469         }
 470         self.params.update(params)
 471         self.cache = Cache(self)
 472
 473         if sys.version_info < (3, 6):
 474             self.report_warning(
 475                 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
 476                 'Update to Python 3.6 or above' % sys.version_info[:2])
 477
 478         def check_deprecated(param, option, suggestion):
 479             if self.params.get(param) is not None:
 480                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 481                 return True
 482             return False
 483
 484         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 485             if self.params.get('geo_verification_proxy') is None:
 486                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 487
 488         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 489         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 490         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 491
 492         for msg in self.params.get('warnings', []):
 493             self.report_warning(msg)
 494
 495         if self.params.get('final_ext'):
 496             if self.params.get('merge_output_format'):
 497                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 498             self.params['merge_output_format'] = self.params['final_ext']
 499
 500         if 'overwrites' in self.params and self.params['overwrites'] is None:
 501             del self.params['overwrites']
 502
 503         if params.get('bidi_workaround', False):
 504             try:
 505                 import pty
 506                 master, slave = pty.openpty()
 507                 width = compat_get_terminal_size().columns
 508                 if width is None:
 509                     width_args = []
 510                 else:
 511                     width_args = ['-w', str(width)]
 512                 sp_kwargs = dict(
 513                     stdin=subprocess.PIPE,
 514                     stdout=slave,
 515                     stderr=self._err_file)
 516                 try:
 517                     self._output_process = subprocess.Popen(
 518                         ['bidiv'] + width_args, **sp_kwargs
 519                     )
 520                 except OSError:
 521                     self._output_process = subprocess.Popen(
 522                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 523                 self._output_channel = os.fdopen(master, 'rb')
 524             except OSError as ose:
 525                 if ose.errno == errno.ENOENT:
 526                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 527                 else:
 528                     raise
 529
 530         if (sys.platform != 'win32'
 531                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 532                 and not params.get('restrictfilenames', False)):
 533             # Unicode filesystem API will throw errors (#1474, #13027)
 534             self.report_warning(
 535                 'Assuming --restrict-filenames since file system encoding '
 536                 'cannot encode all characters. '
 537                 'Set the LC_ALL environment variable to fix this.')
 538             self.params['restrictfilenames'] = True
 539
 540         self.outtmpl_dict = self.parse_outtmpl()
 541
 542         self._setup_opener()
 543
 544         """Preload the archive, if any is specified"""
 545         def preload_download_archive(fn):
 546             if fn is None:
 547                 return False
 548             self.write_debug('Loading archive file %r\n' % fn)
 549             try:
 550                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 551                     for line in archive_file:
 552                         self.archive.add(line.strip())
 553             except IOError as ioe:
 554                 if ioe.errno != errno.ENOENT:
 555                     raise
 556                 return False
 557             return True
 558
 559         self.archive = set()
 560         preload_download_archive(self.params.get('download_archive'))
 561
 562         if auto_init:
 563             self.print_debug_header()
 564             self.add_default_info_extractors()
 565
 566         for pp_def_raw in self.params.get('postprocessors', []):
 567             pp_class = get_postprocessor(pp_def_raw['key'])
 568             pp_def = dict(pp_def_raw)
 569             del pp_def['key']
 570             if 'when' in pp_def:
 571                 when = pp_def['when']
 572                 del pp_def['when']
 573             else:
 574                 when = 'post_process'
 575             pp = pp_class(self, **compat_kwargs(pp_def))
 576             self.add_post_processor(pp, when=when)
 577
 578         for ph in self.params.get('post_hooks', []):
 579             self.add_post_hook(ph)
 580
 581         for ph in self.params.get('progress_hooks', []):
 582             self.add_progress_hook(ph)
 583
 584         register_socks_protocols()
 585
 586     def warn_if_short_id(self, argv):
 587         # short YouTube ID starting with dash?
 588         idxs = [
 589             i for i, a in enumerate(argv)
 590             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 591         if idxs:
 592             correct_argv = (
 593                 ['yt-dlp']
 594                 + [a for i, a in enumerate(argv) if i not in idxs]
 595                 + ['--'] + [argv[i] for i in idxs]
 596             )
 597             self.report_warning(
 598                 'Long argument string detected. '
 599                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 600                 args_to_str(correct_argv))
 601
 602     def add_info_extractor(self, ie):
 603         """Add an InfoExtractor object to the end of the list."""
 604         self._ies.append(ie)
 605         if not isinstance(ie, type):
 606             self._ies_instances[ie.ie_key()] = ie
 607             ie.set_downloader(self)
 608
 609     def get_info_extractor(self, ie_key):
 610         """
 611         Get an instance of an IE with name ie_key, it will try to get one from
 612         the _ies list, if there's no instance it will create a new one and add
 613         it to the extractor list.
 614         """
 615         ie = self._ies_instances.get(ie_key)
 616         if ie is None:
 617             ie = get_info_extractor(ie_key)()
 618             self.add_info_extractor(ie)
 619         return ie
 620
 621     def add_default_info_extractors(self):
 622         """
 623         Add the InfoExtractors returned by gen_extractors to the end of the list
 624         """
 625         for ie in gen_extractor_classes():
 626             self.add_info_extractor(ie)
 627
 628     def add_post_processor(self, pp, when='post_process'):
 629         """Add a PostProcessor object to the end of the chain."""
 630         self._pps[when].append(pp)
 631         pp.set_downloader(self)
 632
 633     def add_post_hook(self, ph):
 634         """Add the post hook"""
 635         self._post_hooks.append(ph)
 636
 637     def add_progress_hook(self, ph):
 638         """Add the progress hook (currently only for the file downloader)"""
 639         self._progress_hooks.append(ph)
 640
 641     def _bidi_workaround(self, message):
 642         if not hasattr(self, '_output_channel'):
 643             return message
 644
 645         assert hasattr(self, '_output_process')
 646         assert isinstance(message, compat_str)
 647         line_count = message.count('\n') + 1
 648         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 649         self._output_process.stdin.flush()
 650         res = ''.join(self._output_channel.readline().decode('utf-8')
 651                       for _ in range(line_count))
 652         return res[:-len('\n')]
 653
 654     def _write_string(self, s, out=None):
 655         write_string(s, out=out, encoding=self.params.get('encoding'))
 656
 657     def to_stdout(self, message, skip_eol=False, quiet=False):
 658         """Print message to stdout"""
 659         if self.params.get('logger'):
 660             self.params['logger'].debug(message)
 661         elif not quiet or self.params.get('verbose'):
 662             self._write_string(
 663                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 664                 self._err_file if quiet else self._screen_file)
 665
 666     def to_stderr(self, message):
 667         """Print message to stderr"""
 668         assert isinstance(message, compat_str)
 669         if self.params.get('logger'):
 670             self.params['logger'].error(message)
 671         else:
 672             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
 673
 674     def to_console_title(self, message):
 675         if not self.params.get('consoletitle', False):
 676             return
 677         if compat_os_name == 'nt':
 678             if ctypes.windll.kernel32.GetConsoleWindow():
 679                 # c_wchar_p() might not be necessary if `message` is
 680                 # already of type unicode()
 681                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 682         elif 'TERM' in os.environ:
 683             self._write_string('\033]0;%s\007' % message, self._screen_file)
 684
 685     def save_console_title(self):
 686         if not self.params.get('consoletitle', False):
 687             return
 688         if self.params.get('simulate', False):
 689             return
 690         if compat_os_name != 'nt' and 'TERM' in os.environ:
 691             # Save the title on stack
 692             self._write_string('\033[22;0t', self._screen_file)
 693
 694     def restore_console_title(self):
 695         if not self.params.get('consoletitle', False):
 696             return
 697         if self.params.get('simulate', False):
 698             return
 699         if compat_os_name != 'nt' and 'TERM' in os.environ:
 700             # Restore the title from stack
 701             self._write_string('\033[23;0t', self._screen_file)
 702
 703     def __enter__(self):
 704         self.save_console_title()
 705         return self
 706
 707     def __exit__(self, *args):
 708         self.restore_console_title()
 709
 710         if self.params.get('cookiefile') is not None:
 711             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 712
 713     def trouble(self, message=None, tb=None):
 714         """Determine action to take when a download problem appears.
 715
 716         Depending on if the downloader has been configured to ignore
 717         download errors or not, this method may throw an exception or
 718         not when errors are found, after printing the message.
 719
 720         tb, if given, is additional traceback information.
 721         """
 722         if message is not None:
 723             self.to_stderr(message)
 724         if self.params.get('verbose'):
 725             if tb is None:
 726                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 727                     tb = ''
 728                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 729                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 730                     tb += encode_compat_str(traceback.format_exc())
 731                 else:
 732                     tb_data = traceback.format_list(traceback.extract_stack())
 733                     tb = ''.join(tb_data)
 734             if tb:
 735                 self.to_stderr(tb)
 736         if not self.params.get('ignoreerrors', False):
 737             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 738                 exc_info = sys.exc_info()[1].exc_info
 739             else:
 740                 exc_info = sys.exc_info()
 741             raise DownloadError(message, exc_info)
 742         self._download_retcode = 1
 743
 744     def to_screen(self, message, skip_eol=False):
 745         """Print message to stdout if not in quiet mode"""
 746         self.to_stdout(
 747             message, skip_eol, quiet=self.params.get('quiet', False))
 748
 749     def report_warning(self, message):
 750         '''
 751         Print the message to stderr, it will be prefixed with 'WARNING:'
 752         If stderr is a tty file the 'WARNING:' will be colored
 753         '''
 754         if self.params.get('logger') is not None:
 755             self.params['logger'].warning(message)
 756         else:
 757             if self.params.get('no_warnings'):
 758                 return
 759             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 760                 _msg_header = '\033[0;33mWARNING:\033[0m'
 761             else:
 762                 _msg_header = 'WARNING:'
 763             warning_message = '%s %s' % (_msg_header, message)
 764             self.to_stderr(warning_message)
 765
 766     def report_error(self, message, tb=None):
 767         '''
 768         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 769         in red if stderr is a tty file.
 770         '''
 771         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 772             _msg_header = '\033[0;31mERROR:\033[0m'
 773         else:
 774             _msg_header = 'ERROR:'
 775         error_message = '%s %s' % (_msg_header, message)
 776         self.trouble(error_message, tb)
 777
 778     def write_debug(self, message):
 779         '''Log debug message or Print message to stderr'''
 780         if not self.params.get('verbose', False):
 781             return
 782         message = '[debug] %s' % message
 783         if self.params.get('logger'):
 784             self.params['logger'].debug(message)
 785         else:
 786             self._write_string('%s\n' % message)
 787
 788     def report_file_already_downloaded(self, file_name):
 789         """Report file has already been fully downloaded."""
 790         try:
 791             self.to_screen('[download] %s has already been downloaded' % file_name)
 792         except UnicodeEncodeError:
 793             self.to_screen('[download] The file has already been downloaded')
 794
 795     def report_file_delete(self, file_name):
 796         """Report that existing file will be deleted."""
 797         try:
 798             self.to_screen('Deleting existing file %s' % file_name)
 799         except UnicodeEncodeError:
 800             self.to_screen('Deleting existing file')
 801
 802     def parse_outtmpl(self):
 803         outtmpl_dict = self.params.get('outtmpl', {})
 804         if not isinstance(outtmpl_dict, dict):
 805             outtmpl_dict = {'default': outtmpl_dict}
 806         outtmpl_dict.update({
 807             k: v for k, v in DEFAULT_OUTTMPL.items()
 808             if not outtmpl_dict.get(k)})
 809         for key, val in outtmpl_dict.items():
 810             if isinstance(val, bytes):
 811                 self.report_warning(
 812                     'Parameter outtmpl is bytes, but should be a unicode string. '
 813                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 814         return outtmpl_dict
 815
 816     @staticmethod
 817     def validate_outtmpl(tmpl):
 818         ''' @return None or Exception object '''
 819         try:
 820             re.sub(
 821                 STR_FORMAT_RE.format(''),
 822                 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
 823                 tmpl
 824             ) % collections.defaultdict(int)
 825             return None
 826         except ValueError as err:
 827             return err
 828
 829     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 830         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 831         info_dict = dict(info_dict)
 832         na = self.params.get('outtmpl_na_placeholder', 'NA')
 833
 834         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 835             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 836             if info_dict.get('duration', None) is not None
 837             else None)
 838         info_dict['epoch'] = int(time.time())
 839         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 840         if info_dict.get('resolution') is None:
 841             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 842
 843         # For fields playlist_index and autonumber convert all occurrences
 844         # of %(field)s to %(field)0Nd for backward compatibility
 845         field_size_compat_map = {
 846             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 847             'autonumber': self.params.get('autonumber_size') or 5,
 848         }
 849
 850         TMPL_DICT = {}
 851         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
 852         MATH_FUNCTIONS = {
 853             '+': float.__add__,
 854             '-': float.__sub__,
 855         }
 856         # Field is of the form key1.key2...
 857         # where keys (except first) can be string, int or slice
 858         FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 859         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 860         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 861         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 862             (?P<negate>-)?
 863             (?P<fields>{field})
 864             (?P<maths>(?:{math_op}{math_field})*)
 865             (?:>(?P<strf_format>.+?))?
 866             (?:\|(?P<default>.*?))?
 867             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 868
 869         get_key = lambda k: traverse_obj(
 870             info_dict, k.split('.'), is_user_input=True, traverse_string=True)
 871
 872         def get_value(mdict):
 873             # Object traversal
 874             value = get_key(mdict['fields'])
 875             # Negative
 876             if mdict['negate']:
 877                 value = float_or_none(value)
 878                 if value is not None:
 879                     value *= -1
 880             # Do maths
 881             offset_key = mdict['maths']
 882             if offset_key:
 883                 value = float_or_none(value)
 884                 operator = None
 885                 while offset_key:
 886                     item = re.match(
 887                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 888                         offset_key).group(0)
 889                     offset_key = offset_key[len(item):]
 890                     if operator is None:
 891                         operator = MATH_FUNCTIONS[item]
 892                         continue
 893                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 894                     offset = float_or_none(item)
 895                     if offset is None:
 896                         offset = float_or_none(get_key(item))
 897                     try:
 898                         value = operator(value, multiplier * offset)
 899                     except (TypeError, ZeroDivisionError):
 900                         return None
 901                     operator = None
 902             # Datetime formatting
 903             if mdict['strf_format']:
 904                 value = strftime_or_none(value, mdict['strf_format'])
 905
 906             return value
 907
 908         def create_key(outer_mobj):
 909             if not outer_mobj.group('has_key'):
 910                 return '%{}'.format(outer_mobj.group(0))
 911
 912             key = outer_mobj.group('key')
 913             fmt = outer_mobj.group('format')
 914             mobj = re.match(INTERNAL_FORMAT_RE, key)
 915             if mobj is None:
 916                 value, default = None, na
 917             else:
 918                 mobj = mobj.groupdict()
 919                 default = mobj['default'] if mobj['default'] is not None else na
 920                 value = get_value(mobj)
 921
 922             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
 923                 fmt = '0{:d}d'.format(field_size_compat_map[key])
 924
 925             value = default if value is None else value
 926             key += '\0%s' % fmt
 927
 928             if fmt == 'c':
 929                 value = compat_str(value)
 930                 if value is None:
 931                     value, fmt = default, 's'
 932                 else:
 933                     value = value[0]
 934             elif fmt[-1] not in 'rs':  # numeric
 935                 value = float_or_none(value)
 936                 if value is None:
 937                     value, fmt = default, 's'
 938             if sanitize:
 939                 if fmt[-1] == 'r':
 940                     # If value is an object, sanitize might convert it to a string
 941                     # So we convert it to repr first
 942                     value, fmt = repr(value), '%ss' % fmt[:-1]
 943                 if fmt[-1] in 'csr':
 944                     value = sanitize(key, value)
 945             TMPL_DICT[key] = value
 946             return '%({key}){fmt}'.format(key=key, fmt=fmt)
 947
 948         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
 949
 950     def _prepare_filename(self, info_dict, tmpl_type='default'):
 951         try:
 952             sanitize = lambda k, v: sanitize_filename(
 953                 compat_str(v),
 954                 restricted=self.params.get('restrictfilenames'),
 955                 is_id=(k == 'id' or k.endswith('_id')))
 956             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 957             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 958
 959             # expand_path translates '%%' into '%' and '$$' into '$'
 960             # correspondingly that is not what we want since we need to keep
 961             # '%%' intact for template dict substitution step. Working around
 962             # with boundary-alike separator hack.
 963             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 964             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 965
 966             # outtmpl should be expand_path'ed before template dict substitution
 967             # because meta fields may contain env variables we don't want to
 968             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 969             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 970             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 971
 972             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 973             if force_ext is not None:
 974                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
 975
 976             # https://github.com/blackjack4494/youtube-dlc/issues/85
 977             trim_file_name = self.params.get('trim_file_name', False)
 978             if trim_file_name:
 979                 fn_groups = filename.rsplit('.')
 980                 ext = fn_groups[-1]
 981                 sub_ext = ''
 982                 if len(fn_groups) > 2:
 983                     sub_ext = fn_groups[-2]
 984                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 985
 986             return filename
 987         except ValueError as err:
 988             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 989             return None
 990
 991     def prepare_filename(self, info_dict, dir_type='', warn=False):
 992         """Generate the output filename."""
 993         paths = self.params.get('paths', {})
 994         assert isinstance(paths, dict)
 995         filename = self._prepare_filename(info_dict, dir_type or 'default')
 996
 997         if warn and not self.__prepare_filename_warned:
 998             if not paths:
 999                 pass
1000             elif filename == '-':
1001                 self.report_warning('--paths is ignored when an outputting to stdout')
1002             elif os.path.isabs(filename):
1003                 self.report_warning('--paths is ignored since an absolute path is given in output template')
1004             self.__prepare_filename_warned = True
1005         if filename == '-' or not filename:
1006             return filename
1007
1008         homepath = expand_path(paths.get('home', '').strip())
1009         assert isinstance(homepath, compat_str)
1010         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1011         assert isinstance(subdir, compat_str)
1012         path = os.path.join(homepath, subdir, filename)
1013
1014         # Temporary fix for #4787
1015         # 'Treat' all problem characters by passing filename through preferredencoding
1016         # to workaround encoding issues with subprocess on python2 @ Windows
1017         if sys.version_info < (3, 0) and sys.platform == 'win32':
1018             path = encodeFilename(path, True).decode(preferredencoding())
1019         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1020
1021     def _match_entry(self, info_dict, incomplete=False, silent=False):
1022         """ Returns None if the file should be downloaded """
1023
1024         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1025
1026         def check_filter():
1027             if 'title' in info_dict:
1028                 # This can happen when we're just evaluating the playlist
1029                 title = info_dict['title']
1030                 matchtitle = self.params.get('matchtitle', False)
1031                 if matchtitle:
1032                     if not re.search(matchtitle, title, re.IGNORECASE):
1033                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1034                 rejecttitle = self.params.get('rejecttitle', False)
1035                 if rejecttitle:
1036                     if re.search(rejecttitle, title, re.IGNORECASE):
1037                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1038             date = info_dict.get('upload_date')
1039             if date is not None:
1040                 dateRange = self.params.get('daterange', DateRange())
1041                 if date not in dateRange:
1042                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1043             view_count = info_dict.get('view_count')
1044             if view_count is not None:
1045                 min_views = self.params.get('min_views')
1046                 if min_views is not None and view_count < min_views:
1047                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1048                 max_views = self.params.get('max_views')
1049                 if max_views is not None and view_count > max_views:
1050                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1051             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1052                 return 'Skipping "%s" because it is age restricted' % video_title
1053
1054             if not incomplete:
1055                 match_filter = self.params.get('match_filter')
1056                 if match_filter is not None:
1057                     ret = match_filter(info_dict)
1058                     if ret is not None:
1059                         return ret
1060             return None
1061
1062         if self.in_download_archive(info_dict):
1063             reason = '%s has already been recorded in the archive' % video_title
1064             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1065         else:
1066             reason = check_filter()
1067             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1068         if reason is not None:
1069             if not silent:
1070                 self.to_screen('[download] ' + reason)
1071             if self.params.get(break_opt, False):
1072                 raise break_err()
1073         return reason
1074
1075     @staticmethod
1076     def add_extra_info(info_dict, extra_info):
1077         '''Set the keys from extra_info in info dict if they are missing'''
1078         for key, value in extra_info.items():
1079             info_dict.setdefault(key, value)
1080
1081     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1082                      process=True, force_generic_extractor=False):
1083         """
1084         Return a list with a dictionary for each video extracted.
1085
1086         Arguments:
1087         url -- URL to extract
1088
1089         Keyword arguments:
1090         download -- whether to download videos during extraction
1091         ie_key -- extractor key hint
1092         extra_info -- dictionary containing the extra values to add to each result
1093         process -- whether to resolve all unresolved references (URLs, playlist items),
1094             must be True for download to work.
1095         force_generic_extractor -- force using the generic extractor
1096         """
1097
1098         if not ie_key and force_generic_extractor:
1099             ie_key = 'Generic'
1100
1101         if ie_key:
1102             ies = [self.get_info_extractor(ie_key)]
1103         else:
1104             ies = self._ies
1105
1106         for ie in ies:
1107             if not ie.suitable(url):
1108                 continue
1109
1110             ie_key = ie.ie_key()
1111             ie = self.get_info_extractor(ie_key)
1112             if not ie.working():
1113                 self.report_warning('The program functionality for this site has been marked as broken, '
1114                                     'and will probably not work.')
1115
1116             try:
1117                 temp_id = str_or_none(
1118                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1119                     else ie._match_id(url))
1120             except (AssertionError, IndexError, AttributeError):
1121                 temp_id = None
1122             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1123                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1124                                ie_key, temp_id))
1125                 break
1126             return self.__extract_info(url, ie, download, extra_info, process)
1127         else:
1128             self.report_error('no suitable InfoExtractor for URL %s' % url)
1129
1130     def __handle_extraction_exceptions(func):
1131         def wrapper(self, *args, **kwargs):
1132             try:
1133                 return func(self, *args, **kwargs)
1134             except GeoRestrictedError as e:
1135                 msg = e.msg
1136                 if e.countries:
1137                     msg += '\nThis video is available in %s.' % ', '.join(
1138                         map(ISO3166Utils.short2full, e.countries))
1139                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1140                 self.report_error(msg)
1141             except ExtractorError as e:  # An error we somewhat expected
1142                 self.report_error(compat_str(e), e.format_traceback())
1143             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1144                 raise
1145             except Exception as e:
1146                 if self.params.get('ignoreerrors', False):
1147                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1148                 else:
1149                     raise
1150         return wrapper
1151
1152     @__handle_extraction_exceptions
1153     def __extract_info(self, url, ie, download, extra_info, process):
1154         ie_result = ie.extract(url)
1155         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1156             return
1157         if isinstance(ie_result, list):
1158             # Backwards compatibility: old IE result format
1159             ie_result = {
1160                 '_type': 'compat_list',
1161                 'entries': ie_result,
1162             }
1163         self.add_default_extra_info(ie_result, ie, url)
1164         if process:
1165             return self.process_ie_result(ie_result, download, extra_info)
1166         else:
1167             return ie_result
1168
1169     def add_default_extra_info(self, ie_result, ie, url):
1170         self.add_extra_info(ie_result, {
1171             'extractor': ie.IE_NAME,
1172             'webpage_url': url,
1173             'original_url': url,
1174             'webpage_url_basename': url_basename(url),
1175             'extractor_key': ie.ie_key(),
1176         })
1177
1178     def process_ie_result(self, ie_result, download=True, extra_info={}):
1179         """
1180         Take the result of the ie(may be modified) and resolve all unresolved
1181         references (URLs, playlist items).
1182
1183         It will also download the videos if 'download'.
1184         Returns the resolved ie_result.
1185         """
1186         result_type = ie_result.get('_type', 'video')
1187
1188         if result_type in ('url', 'url_transparent'):
1189             ie_result['url'] = sanitize_url(ie_result['url'])
1190             extract_flat = self.params.get('extract_flat', False)
1191             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1192                     or extract_flat is True):
1193                 info_copy = ie_result.copy()
1194                 self.add_extra_info(info_copy, extra_info)
1195                 self.add_default_extra_info(
1196                     info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
1197                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1198                 return ie_result
1199
1200         if result_type == 'video':
1201             self.add_extra_info(ie_result, extra_info)
1202             ie_result = self.process_video_result(ie_result, download=download)
1203             additional_urls = (ie_result or {}).get('additional_urls')
1204             if additional_urls:
1205                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1206                 if isinstance(additional_urls, compat_str):
1207                     additional_urls = [additional_urls]
1208                 self.to_screen(
1209                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1210                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1211                 ie_result['additional_entries'] = [
1212                     self.extract_info(
1213                         url, download, extra_info,
1214                         force_generic_extractor=self.params.get('force_generic_extractor'))
1215                     for url in additional_urls
1216                 ]
1217             return ie_result
1218         elif result_type == 'url':
1219             # We have to add extra_info to the results because it may be
1220             # contained in a playlist
1221             return self.extract_info(
1222                 ie_result['url'], download,
1223                 ie_key=ie_result.get('ie_key'),
1224                 extra_info=extra_info)
1225         elif result_type == 'url_transparent':
1226             # Use the information from the embedding page
1227             info = self.extract_info(
1228                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1229                 extra_info=extra_info, download=False, process=False)
1230
1231             # extract_info may return None when ignoreerrors is enabled and
1232             # extraction failed with an error, don't crash and return early
1233             # in this case
1234             if not info:
1235                 return info
1236
1237             force_properties = dict(
1238                 (k, v) for k, v in ie_result.items() if v is not None)
1239             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1240                 if f in force_properties:
1241                     del force_properties[f]
1242             new_result = info.copy()
1243             new_result.update(force_properties)
1244
1245             # Extracted info may not be a video result (i.e.
1246             # info.get('_type', 'video') != video) but rather an url or
1247             # url_transparent. In such cases outer metadata (from ie_result)
1248             # should be propagated to inner one (info). For this to happen
1249             # _type of info should be overridden with url_transparent. This
1250             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1251             if new_result.get('_type') == 'url':
1252                 new_result['_type'] = 'url_transparent'
1253
1254             return self.process_ie_result(
1255                 new_result, download=download, extra_info=extra_info)
1256         elif result_type in ('playlist', 'multi_video'):
1257             # Protect from infinite recursion due to recursively nested playlists
1258             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1259             webpage_url = ie_result['webpage_url']
1260             if webpage_url in self._playlist_urls:
1261                 self.to_screen(
1262                     '[download] Skipping already downloaded playlist: %s'
1263                     % ie_result.get('title') or ie_result.get('id'))
1264                 return
1265
1266             self._playlist_level += 1
1267             self._playlist_urls.add(webpage_url)
1268             self._sanitize_thumbnails(ie_result)
1269             try:
1270                 return self.__process_playlist(ie_result, download)
1271             finally:
1272                 self._playlist_level -= 1
1273                 if not self._playlist_level:
1274                     self._playlist_urls.clear()
1275         elif result_type == 'compat_list':
1276             self.report_warning(
1277                 'Extractor %s returned a compat_list result. '
1278                 'It needs to be updated.' % ie_result.get('extractor'))
1279
1280             def _fixup(r):
1281                 self.add_extra_info(
1282                     r,
1283                     {
1284                         'extractor': ie_result['extractor'],
1285                         'webpage_url': ie_result['webpage_url'],
1286                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1287                         'extractor_key': ie_result['extractor_key'],
1288                     }
1289                 )
1290                 return r
1291             ie_result['entries'] = [
1292                 self.process_ie_result(_fixup(r), download, extra_info)
1293                 for r in ie_result['entries']
1294             ]
1295             return ie_result
1296         else:
1297             raise Exception('Invalid result type: %s' % result_type)
1298
1299     def _ensure_dir_exists(self, path):
1300         return make_dir(path, self.report_error)
1301
1302     def __process_playlist(self, ie_result, download):
1303         # We process each entry in the playlist
1304         playlist = ie_result.get('title') or ie_result.get('id')
1305         self.to_screen('[download] Downloading playlist: %s' % playlist)
1306
1307         if 'entries' not in ie_result:
1308             raise EntryNotInPlaylist()
1309         incomplete_entries = bool(ie_result.get('requested_entries'))
1310         if incomplete_entries:
1311             def fill_missing_entries(entries, indexes):
1312                 ret = [None] * max(*indexes)
1313                 for i, entry in zip(indexes, entries):
1314                     ret[i - 1] = entry
1315                 return ret
1316             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1317
1318         playlist_results = []
1319
1320         playliststart = self.params.get('playliststart', 1)
1321         playlistend = self.params.get('playlistend')
1322         # For backwards compatibility, interpret -1 as whole list
1323         if playlistend == -1:
1324             playlistend = None
1325
1326         playlistitems_str = self.params.get('playlist_items')
1327         playlistitems = None
1328         if playlistitems_str is not None:
1329             def iter_playlistitems(format):
1330                 for string_segment in format.split(','):
1331                     if '-' in string_segment:
1332                         start, end = string_segment.split('-')
1333                         for item in range(int(start), int(end) + 1):
1334                             yield int(item)
1335                     else:
1336                         yield int(string_segment)
1337             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1338
1339         ie_entries = ie_result['entries']
1340         msg = (
1341             'Downloading %d videos' if not isinstance(ie_entries, list)
1342             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1343         if not isinstance(ie_entries, (list, PagedList)):
1344             ie_entries = LazyList(ie_entries)
1345
1346         entries = []
1347         for i in playlistitems or itertools.count(playliststart):
1348             if playlistitems is None and playlistend is not None and playlistend < i:
1349                 break
1350             entry = None
1351             try:
1352                 entry = ie_entries[i - 1]
1353                 if entry is None:
1354                     raise EntryNotInPlaylist()
1355             except (IndexError, EntryNotInPlaylist):
1356                 if incomplete_entries:
1357                     raise EntryNotInPlaylist()
1358                 elif not playlistitems:
1359                     break
1360             entries.append(entry)
1361             try:
1362                 if entry is not None:
1363                     self._match_entry(entry, incomplete=True, silent=True)
1364             except (ExistingVideoReached, RejectedVideoReached):
1365                 break
1366         ie_result['entries'] = entries
1367
1368         # Save playlist_index before re-ordering
1369         entries = [
1370             ((playlistitems[i - 1] if playlistitems else i), entry)
1371             for i, entry in enumerate(entries, 1)
1372             if entry is not None]
1373         n_entries = len(entries)
1374
1375         if not playlistitems and (playliststart or playlistend):
1376             playlistitems = list(range(playliststart, playliststart + n_entries))
1377         ie_result['requested_entries'] = playlistitems
1378
1379         if self.params.get('allow_playlist_files', True):
1380             ie_copy = {
1381                 'playlist': playlist,
1382                 'playlist_id': ie_result.get('id'),
1383                 'playlist_title': ie_result.get('title'),
1384                 'playlist_uploader': ie_result.get('uploader'),
1385                 'playlist_uploader_id': ie_result.get('uploader_id'),
1386                 'playlist_index': 0,
1387             }
1388             ie_copy.update(dict(ie_result))
1389
1390             if self.params.get('writeinfojson', False):
1391                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1392                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1393                     return
1394                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1395                     self.to_screen('[info] Playlist metadata is already present')
1396                 else:
1397                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1398                     try:
1399                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1400                     except (OSError, IOError):
1401                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1402
1403             # TODO: This should be passed to ThumbnailsConvertor if necessary
1404             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1405
1406             if self.params.get('writedescription', False):
1407                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1408                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1409                     return
1410                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1411                     self.to_screen('[info] Playlist description is already present')
1412                 elif ie_result.get('description') is None:
1413                     self.report_warning('There\'s no playlist description to write.')
1414                 else:
1415                     try:
1416                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1417                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1418                             descfile.write(ie_result['description'])
1419                     except (OSError, IOError):
1420                         self.report_error('Cannot write playlist description file ' + descfn)
1421                         return
1422
1423         if self.params.get('playlistreverse', False):
1424             entries = entries[::-1]
1425         if self.params.get('playlistrandom', False):
1426             random.shuffle(entries)
1427
1428         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1429
1430         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1431         failures = 0
1432         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1433         for i, entry_tuple in enumerate(entries, 1):
1434             playlist_index, entry = entry_tuple
1435             if 'playlist_index' in self.params.get('compat_options', []):
1436                 playlist_index = playlistitems[i - 1] if playlistitems else i
1437             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1438             # This __x_forwarded_for_ip thing is a bit ugly but requires
1439             # minimal changes
1440             if x_forwarded_for:
1441                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1442             extra = {
1443                 'n_entries': n_entries,
1444                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1445                 'playlist_index': playlist_index,
1446                 'playlist_autonumber': i,
1447                 'playlist': playlist,
1448                 'playlist_id': ie_result.get('id'),
1449                 'playlist_title': ie_result.get('title'),
1450                 'playlist_uploader': ie_result.get('uploader'),
1451                 'playlist_uploader_id': ie_result.get('uploader_id'),
1452                 'extractor': ie_result['extractor'],
1453                 'webpage_url': ie_result['webpage_url'],
1454                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1455                 'extractor_key': ie_result['extractor_key'],
1456             }
1457
1458             if self._match_entry(entry, incomplete=True) is not None:
1459                 continue
1460
1461             entry_result = self.__process_iterable_entry(entry, download, extra)
1462             if not entry_result:
1463                 failures += 1
1464             if failures >= max_failures:
1465                 self.report_error(
1466                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1467                 break
1468             # TODO: skip failed (empty) entries?
1469             playlist_results.append(entry_result)
1470         ie_result['entries'] = playlist_results
1471         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1472         return ie_result
1473
1474     @__handle_extraction_exceptions
1475     def __process_iterable_entry(self, entry, download, extra_info):
1476         return self.process_ie_result(
1477             entry, download=download, extra_info=extra_info)
1478
1479     def _build_format_filter(self, filter_spec):
1480         " Returns a function to filter the formats according to the filter_spec "
1481
1482         OPERATORS = {
1483             '<': operator.lt,
1484             '<=': operator.le,
1485             '>': operator.gt,
1486             '>=': operator.ge,
1487             '=': operator.eq,
1488             '!=': operator.ne,
1489         }
1490         operator_rex = re.compile(r'''(?x)\s*
1491             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1492             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1493             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1494             $
1495             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1496         m = operator_rex.search(filter_spec)
1497         if m:
1498             try:
1499                 comparison_value = int(m.group('value'))
1500             except ValueError:
1501                 comparison_value = parse_filesize(m.group('value'))
1502                 if comparison_value is None:
1503                     comparison_value = parse_filesize(m.group('value') + 'B')
1504                 if comparison_value is None:
1505                     raise ValueError(
1506                         'Invalid value %r in format specification %r' % (
1507                             m.group('value'), filter_spec))
1508             op = OPERATORS[m.group('op')]
1509
1510         if not m:
1511             STR_OPERATORS = {
1512                 '=': operator.eq,
1513                 '^=': lambda attr, value: attr.startswith(value),
1514                 '$=': lambda attr, value: attr.endswith(value),
1515                 '*=': lambda attr, value: value in attr,
1516             }
1517             str_operator_rex = re.compile(r'''(?x)
1518                 \s*(?P<key>[a-zA-Z0-9._-]+)
1519                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1520                 \s*(?P<value>[a-zA-Z0-9._-]+)
1521                 \s*$
1522                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1523             m = str_operator_rex.search(filter_spec)
1524             if m:
1525                 comparison_value = m.group('value')
1526                 str_op = STR_OPERATORS[m.group('op')]
1527                 if m.group('negation'):
1528                     op = lambda attr, value: not str_op(attr, value)
1529                 else:
1530                     op = str_op
1531
1532         if not m:
1533             raise ValueError('Invalid filter specification %r' % filter_spec)
1534
1535         def _filter(f):
1536             actual_value = f.get(m.group('key'))
1537             if actual_value is None:
1538                 return m.group('none_inclusive')
1539             return op(actual_value, comparison_value)
1540         return _filter
1541
1542     def _default_format_spec(self, info_dict, download=True):
1543
1544         def can_merge():
1545             merger = FFmpegMergerPP(self)
1546             return merger.available and merger.can_merge()
1547
1548         prefer_best = (
1549             not self.params.get('simulate', False)
1550             and download
1551             and (
1552                 not can_merge()
1553                 or info_dict.get('is_live', False)
1554                 or self.outtmpl_dict['default'] == '-'))
1555         compat = (
1556             prefer_best
1557             or self.params.get('allow_multiple_audio_streams', False)
1558             or 'format-spec' in self.params.get('compat_opts', []))
1559
1560         return (
1561             'best/bestvideo+bestaudio' if prefer_best
1562             else 'bestvideo*+bestaudio/best' if not compat
1563             else 'bestvideo+bestaudio/best')
1564
1565     def build_format_selector(self, format_spec):
1566         def syntax_error(note, start):
1567             message = (
1568                 'Invalid format specification: '
1569                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1570             return SyntaxError(message)
1571
1572         PICKFIRST = 'PICKFIRST'
1573         MERGE = 'MERGE'
1574         SINGLE = 'SINGLE'
1575         GROUP = 'GROUP'
1576         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1577
1578         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1579                                   'video': self.params.get('allow_multiple_video_streams', False)}
1580
1581         check_formats = self.params.get('check_formats')
1582
1583         def _parse_filter(tokens):
1584             filter_parts = []
1585             for type, string, start, _, _ in tokens:
1586                 if type == tokenize.OP and string == ']':
1587                     return ''.join(filter_parts)
1588                 else:
1589                     filter_parts.append(string)
1590
1591         def _remove_unused_ops(tokens):
1592             # Remove operators that we don't use and join them with the surrounding strings
1593             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1594             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1595             last_string, last_start, last_end, last_line = None, None, None, None
1596             for type, string, start, end, line in tokens:
1597                 if type == tokenize.OP and string == '[':
1598                     if last_string:
1599                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1600                         last_string = None
1601                     yield type, string, start, end, line
1602                     # everything inside brackets will be handled by _parse_filter
1603                     for type, string, start, end, line in tokens:
1604                         yield type, string, start, end, line
1605                         if type == tokenize.OP and string == ']':
1606                             break
1607                 elif type == tokenize.OP and string in ALLOWED_OPS:
1608                     if last_string:
1609                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1610                         last_string = None
1611                     yield type, string, start, end, line
1612                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1613                     if not last_string:
1614                         last_string = string
1615                         last_start = start
1616                         last_end = end
1617                     else:
1618                         last_string += string
1619             if last_string:
1620                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1621
1622         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1623             selectors = []
1624             current_selector = None
1625             for type, string, start, _, _ in tokens:
1626                 # ENCODING is only defined in python 3.x
1627                 if type == getattr(tokenize, 'ENCODING', None):
1628                     continue
1629                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1630                     current_selector = FormatSelector(SINGLE, string, [])
1631                 elif type == tokenize.OP:
1632                     if string == ')':
1633                         if not inside_group:
1634                             # ')' will be handled by the parentheses group
1635                             tokens.restore_last_token()
1636                         break
1637                     elif inside_merge and string in ['/', ',']:
1638                         tokens.restore_last_token()
1639                         break
1640                     elif inside_choice and string == ',':
1641                         tokens.restore_last_token()
1642                         break
1643                     elif string == ',':
1644                         if not current_selector:
1645                             raise syntax_error('"," must follow a format selector', start)
1646                         selectors.append(current_selector)
1647                         current_selector = None
1648                     elif string == '/':
1649                         if not current_selector:
1650                             raise syntax_error('"/" must follow a format selector', start)
1651                         first_choice = current_selector
1652                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1653                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1654                     elif string == '[':
1655                         if not current_selector:
1656                             current_selector = FormatSelector(SINGLE, 'best', [])
1657                         format_filter = _parse_filter(tokens)
1658                         current_selector.filters.append(format_filter)
1659                     elif string == '(':
1660                         if current_selector:
1661                             raise syntax_error('Unexpected "("', start)
1662                         group = _parse_format_selection(tokens, inside_group=True)
1663                         current_selector = FormatSelector(GROUP, group, [])
1664                     elif string == '+':
1665                         if not current_selector:
1666                             raise syntax_error('Unexpected "+"', start)
1667                         selector_1 = current_selector
1668                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1669                         if not selector_2:
1670                             raise syntax_error('Expected a selector', start)
1671                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1672                     else:
1673                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1674                 elif type == tokenize.ENDMARKER:
1675                     break
1676             if current_selector:
1677                 selectors.append(current_selector)
1678             return selectors
1679
1680         def _merge(formats_pair):
1681             format_1, format_2 = formats_pair
1682
1683             formats_info = []
1684             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1685             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1686
1687             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1688                 get_no_more = {"video": False, "audio": False}
1689                 for (i, fmt_info) in enumerate(formats_info):
1690                     for aud_vid in ["audio", "video"]:
1691                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1692                             if get_no_more[aud_vid]:
1693                                 formats_info.pop(i)
1694                             get_no_more[aud_vid] = True
1695
1696             if len(formats_info) == 1:
1697                 return formats_info[0]
1698
1699             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1700             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1701
1702             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1703             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1704
1705             output_ext = self.params.get('merge_output_format')
1706             if not output_ext:
1707                 if the_only_video:
1708                     output_ext = the_only_video['ext']
1709                 elif the_only_audio and not video_fmts:
1710                     output_ext = the_only_audio['ext']
1711                 else:
1712                     output_ext = 'mkv'
1713
1714             new_dict = {
1715                 'requested_formats': formats_info,
1716                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1717                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1718                 'ext': output_ext,
1719             }
1720
1721             if the_only_video:
1722                 new_dict.update({
1723                     'width': the_only_video.get('width'),
1724                     'height': the_only_video.get('height'),
1725                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1726                     'fps': the_only_video.get('fps'),
1727                     'vcodec': the_only_video.get('vcodec'),
1728                     'vbr': the_only_video.get('vbr'),
1729                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1730                 })
1731
1732             if the_only_audio:
1733                 new_dict.update({
1734                     'acodec': the_only_audio.get('acodec'),
1735                     'abr': the_only_audio.get('abr'),
1736                 })
1737
1738             return new_dict
1739
1740         def _check_formats(formats):
1741             for f in formats:
1742                 self.to_screen('[info] Testing format %s' % f['format_id'])
1743                 paths = self.params.get('paths', {})
1744                 temp_file = os.path.join(
1745                     expand_path(paths.get('home', '').strip()),
1746                     expand_path(paths.get('temp', '').strip()),
1747                     'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1748                 try:
1749                     dl, _ = self.dl(temp_file, f, test=True)
1750                 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1751                     dl = False
1752                 finally:
1753                     if os.path.exists(temp_file):
1754                         os.remove(temp_file)
1755                 if dl:
1756                     yield f
1757                 else:
1758                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1759
1760         def _build_selector_function(selector):
1761             if isinstance(selector, list):  # ,
1762                 fs = [_build_selector_function(s) for s in selector]
1763
1764                 def selector_function(ctx):
1765                     for f in fs:
1766                         for format in f(ctx):
1767                             yield format
1768                 return selector_function
1769
1770             elif selector.type == GROUP:  # ()
1771                 selector_function = _build_selector_function(selector.selector)
1772
1773             elif selector.type == PICKFIRST:  # /
1774                 fs = [_build_selector_function(s) for s in selector.selector]
1775
1776                 def selector_function(ctx):
1777                     for f in fs:
1778                         picked_formats = list(f(ctx))
1779                         if picked_formats:
1780                             return picked_formats
1781                     return []
1782
1783             elif selector.type == SINGLE:  # atom
1784                 format_spec = selector.selector or 'best'
1785
1786                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1787                 if format_spec == 'all':
1788                     def selector_function(ctx):
1789                         formats = list(ctx['formats'])
1790                         if check_formats:
1791                             formats = _check_formats(formats)
1792                         for f in formats:
1793                             yield f
1794                 elif format_spec == 'mergeall':
1795                     def selector_function(ctx):
1796                         formats = list(_check_formats(ctx['formats']))
1797                         if not formats:
1798                             return
1799                         merged_format = formats[-1]
1800                         for f in formats[-2::-1]:
1801                             merged_format = _merge((merged_format, f))
1802                         yield merged_format
1803
1804                 else:
1805                     format_fallback, format_reverse, format_idx = False, True, 1
1806                     mobj = re.match(
1807                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1808                         format_spec)
1809                     if mobj is not None:
1810                         format_idx = int_or_none(mobj.group('n'), default=1)
1811                         format_reverse = mobj.group('bw')[0] == 'b'
1812                         format_type = (mobj.group('type') or [None])[0]
1813                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1814                         format_modified = mobj.group('mod') is not None
1815
1816                         format_fallback = not format_type and not format_modified  # for b, w
1817                         filter_f = (
1818                             (lambda f: f.get('%scodec' % format_type) != 'none')
1819                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1820                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1821                             if format_type  # bv, ba, wv, wa
1822                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1823                             if not format_modified  # b, w
1824                             else None)  # b*, w*
1825                     else:
1826                         filter_f = ((lambda f: f.get('ext') == format_spec)
1827                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1828                                     else (lambda f: f.get('format_id') == format_spec))  # id
1829
1830                     def selector_function(ctx):
1831                         formats = list(ctx['formats'])
1832                         if not formats:
1833                             return
1834                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1835                         if format_fallback and ctx['incomplete_formats'] and not matches:
1836                             # for extractors with incomplete formats (audio only (soundcloud)
1837                             # or video only (imgur)) best/worst will fallback to
1838                             # best/worst {video,audio}-only format
1839                             matches = formats
1840                         if format_reverse:
1841                             matches = matches[::-1]
1842                         if check_formats:
1843                             matches = list(itertools.islice(_check_formats(matches), format_idx))
1844                         n = len(matches)
1845                         if -n <= format_idx - 1 < n:
1846                             yield matches[format_idx - 1]
1847
1848             elif selector.type == MERGE:        # +
1849                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1850
1851                 def selector_function(ctx):
1852                     for pair in itertools.product(
1853                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1854                         yield _merge(pair)
1855
1856             filters = [self._build_format_filter(f) for f in selector.filters]
1857
1858             def final_selector(ctx):
1859                 ctx_copy = copy.deepcopy(ctx)
1860                 for _filter in filters:
1861                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1862                 return selector_function(ctx_copy)
1863             return final_selector
1864
1865         stream = io.BytesIO(format_spec.encode('utf-8'))
1866         try:
1867             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1868         except tokenize.TokenError:
1869             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1870
1871         class TokenIterator(object):
1872             def __init__(self, tokens):
1873                 self.tokens = tokens
1874                 self.counter = 0
1875
1876             def __iter__(self):
1877                 return self
1878
1879             def __next__(self):
1880                 if self.counter >= len(self.tokens):
1881                     raise StopIteration()
1882                 value = self.tokens[self.counter]
1883                 self.counter += 1
1884                 return value
1885
1886             next = __next__
1887
1888             def restore_last_token(self):
1889                 self.counter -= 1
1890
1891         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1892         return _build_selector_function(parsed_selector)
1893
1894     def _calc_headers(self, info_dict):
1895         res = std_headers.copy()
1896
1897         add_headers = info_dict.get('http_headers')
1898         if add_headers:
1899             res.update(add_headers)
1900
1901         cookies = self._calc_cookies(info_dict)
1902         if cookies:
1903             res['Cookie'] = cookies
1904
1905         if 'X-Forwarded-For' not in res:
1906             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1907             if x_forwarded_for_ip:
1908                 res['X-Forwarded-For'] = x_forwarded_for_ip
1909
1910         return res
1911
1912     def _calc_cookies(self, info_dict):
1913         pr = sanitized_Request(info_dict['url'])
1914         self.cookiejar.add_cookie_header(pr)
1915         return pr.get_header('Cookie')
1916
1917     @staticmethod
1918     def _sanitize_thumbnails(info_dict):
1919         thumbnails = info_dict.get('thumbnails')
1920         if thumbnails is None:
1921             thumbnail = info_dict.get('thumbnail')
1922             if thumbnail:
1923                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1924         if thumbnails:
1925             thumbnails.sort(key=lambda t: (
1926                 t.get('preference') if t.get('preference') is not None else -1,
1927                 t.get('width') if t.get('width') is not None else -1,
1928                 t.get('height') if t.get('height') is not None else -1,
1929                 t.get('id') if t.get('id') is not None else '',
1930                 t.get('url')))
1931             for i, t in enumerate(thumbnails):
1932                 t['url'] = sanitize_url(t['url'])
1933                 if t.get('width') and t.get('height'):
1934                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1935                 if t.get('id') is None:
1936                     t['id'] = '%d' % i
1937
1938     def process_video_result(self, info_dict, download=True):
1939         assert info_dict.get('_type', 'video') == 'video'
1940
1941         if 'id' not in info_dict:
1942             raise ExtractorError('Missing "id" field in extractor result')
1943         if 'title' not in info_dict:
1944             raise ExtractorError('Missing "title" field in extractor result')
1945
1946         def report_force_conversion(field, field_not, conversion):
1947             self.report_warning(
1948                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1949                 % (field, field_not, conversion))
1950
1951         def sanitize_string_field(info, string_field):
1952             field = info.get(string_field)
1953             if field is None or isinstance(field, compat_str):
1954                 return
1955             report_force_conversion(string_field, 'a string', 'string')
1956             info[string_field] = compat_str(field)
1957
1958         def sanitize_numeric_fields(info):
1959             for numeric_field in self._NUMERIC_FIELDS:
1960                 field = info.get(numeric_field)
1961                 if field is None or isinstance(field, compat_numeric_types):
1962                     continue
1963                 report_force_conversion(numeric_field, 'numeric', 'int')
1964                 info[numeric_field] = int_or_none(field)
1965
1966         sanitize_string_field(info_dict, 'id')
1967         sanitize_numeric_fields(info_dict)
1968
1969         if 'playlist' not in info_dict:
1970             # It isn't part of a playlist
1971             info_dict['playlist'] = None
1972             info_dict['playlist_index'] = None
1973
1974         self._sanitize_thumbnails(info_dict)
1975
1976         if self.params.get('list_thumbnails'):
1977             self.list_thumbnails(info_dict)
1978             return
1979
1980         thumbnail = info_dict.get('thumbnail')
1981         thumbnails = info_dict.get('thumbnails')
1982         if thumbnail:
1983             info_dict['thumbnail'] = sanitize_url(thumbnail)
1984         elif thumbnails:
1985             info_dict['thumbnail'] = thumbnails[-1]['url']
1986
1987         if 'display_id' not in info_dict and 'id' in info_dict:
1988             info_dict['display_id'] = info_dict['id']
1989
1990         for ts_key, date_key in (
1991                 ('timestamp', 'upload_date'),
1992                 ('release_timestamp', 'release_date'),
1993         ):
1994             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1995                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1996                 # see http://bugs.python.org/issue1646728)
1997                 try:
1998                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1999                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2000                 except (ValueError, OverflowError, OSError):
2001                     pass
2002
2003         # Auto generate title fields corresponding to the *_number fields when missing
2004         # in order to always have clean titles. This is very common for TV series.
2005         for field in ('chapter', 'season', 'episode'):
2006             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2007                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2008
2009         for cc_kind in ('subtitles', 'automatic_captions'):
2010             cc = info_dict.get(cc_kind)
2011             if cc:
2012                 for _, subtitle in cc.items():
2013                     for subtitle_format in subtitle:
2014                         if subtitle_format.get('url'):
2015                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2016                         if subtitle_format.get('ext') is None:
2017                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2018
2019         automatic_captions = info_dict.get('automatic_captions')
2020         subtitles = info_dict.get('subtitles')
2021
2022         if self.params.get('listsubtitles', False):
2023             if 'automatic_captions' in info_dict:
2024                 self.list_subtitles(
2025                     info_dict['id'], automatic_captions, 'automatic captions')
2026             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2027             return
2028
2029         info_dict['requested_subtitles'] = self.process_subtitles(
2030             info_dict['id'], subtitles, automatic_captions)
2031
2032         # We now pick which formats have to be downloaded
2033         if info_dict.get('formats') is None:
2034             # There's only one format available
2035             formats = [info_dict]
2036         else:
2037             formats = info_dict['formats']
2038
2039         if not formats:
2040             if not self.params.get('ignore_no_formats_error'):
2041                 raise ExtractorError('No video formats found!')
2042             else:
2043                 self.report_warning('No video formats found!')
2044
2045         def is_wellformed(f):
2046             url = f.get('url')
2047             if not url:
2048                 self.report_warning(
2049                     '"url" field is missing or empty - skipping format, '
2050                     'there is an error in extractor')
2051                 return False
2052             if isinstance(url, bytes):
2053                 sanitize_string_field(f, 'url')
2054             return True
2055
2056         # Filter out malformed formats for better extraction robustness
2057         formats = list(filter(is_wellformed, formats))
2058
2059         formats_dict = {}
2060
2061         # We check that all the formats have the format and format_id fields
2062         for i, format in enumerate(formats):
2063             sanitize_string_field(format, 'format_id')
2064             sanitize_numeric_fields(format)
2065             format['url'] = sanitize_url(format['url'])
2066             if not format.get('format_id'):
2067                 format['format_id'] = compat_str(i)
2068             else:
2069                 # Sanitize format_id from characters used in format selector expression
2070                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2071             format_id = format['format_id']
2072             if format_id not in formats_dict:
2073                 formats_dict[format_id] = []
2074             formats_dict[format_id].append(format)
2075
2076         # Make sure all formats have unique format_id
2077         for format_id, ambiguous_formats in formats_dict.items():
2078             if len(ambiguous_formats) > 1:
2079                 for i, format in enumerate(ambiguous_formats):
2080                     format['format_id'] = '%s-%d' % (format_id, i)
2081
2082         for i, format in enumerate(formats):
2083             if format.get('format') is None:
2084                 format['format'] = '{id} - {res}{note}'.format(
2085                     id=format['format_id'],
2086                     res=self.format_resolution(format),
2087                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2088                 )
2089             # Automatically determine file extension if missing
2090             if format.get('ext') is None:
2091                 format['ext'] = determine_ext(format['url']).lower()
2092             # Automatically determine protocol if missing (useful for format
2093             # selection purposes)
2094             if format.get('protocol') is None:
2095                 format['protocol'] = determine_protocol(format)
2096             # Add HTTP headers, so that external programs can use them from the
2097             # json output
2098             full_format_info = info_dict.copy()
2099             full_format_info.update(format)
2100             format['http_headers'] = self._calc_headers(full_format_info)
2101         # Remove private housekeeping stuff
2102         if '__x_forwarded_for_ip' in info_dict:
2103             del info_dict['__x_forwarded_for_ip']
2104
2105         # TODO Central sorting goes here
2106
2107         if formats and formats[0] is not info_dict:
2108             # only set the 'formats' fields if the original info_dict list them
2109             # otherwise we end up with a circular reference, the first (and unique)
2110             # element in the 'formats' field in info_dict is info_dict itself,
2111             # which can't be exported to json
2112             info_dict['formats'] = formats
2113
2114         info_dict, _ = self.pre_process(info_dict)
2115
2116         if self.params.get('listformats'):
2117             if not info_dict.get('formats'):
2118                 raise ExtractorError('No video formats found', expected=True)
2119             self.list_formats(info_dict)
2120             return
2121
2122         req_format = self.params.get('format')
2123         if req_format is None:
2124             req_format = self._default_format_spec(info_dict, download=download)
2125             self.write_debug('Default format spec: %s' % req_format)
2126
2127         format_selector = self.build_format_selector(req_format)
2128
2129         # While in format selection we may need to have an access to the original
2130         # format set in order to calculate some metrics or do some processing.
2131         # For now we need to be able to guess whether original formats provided
2132         # by extractor are incomplete or not (i.e. whether extractor provides only
2133         # video-only or audio-only formats) for proper formats selection for
2134         # extractors with such incomplete formats (see
2135         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2136         # Since formats may be filtered during format selection and may not match
2137         # the original formats the results may be incorrect. Thus original formats
2138         # or pre-calculated metrics should be passed to format selection routines
2139         # as well.
2140         # We will pass a context object containing all necessary additional data
2141         # instead of just formats.
2142         # This fixes incorrect format selection issue (see
2143         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2144         incomplete_formats = (
2145             # All formats are video-only or
2146             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2147             # all formats are audio-only
2148             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2149
2150         ctx = {
2151             'formats': formats,
2152             'incomplete_formats': incomplete_formats,
2153         }
2154
2155         formats_to_download = list(format_selector(ctx))
2156         if not formats_to_download:
2157             if not self.params.get('ignore_no_formats_error'):
2158                 raise ExtractorError('Requested format is not available', expected=True)
2159             else:
2160                 self.report_warning('Requested format is not available')
2161         elif download:
2162             self.to_screen(
2163                 '[info] %s: Downloading %d format(s): %s' % (
2164                     info_dict['id'], len(formats_to_download),
2165                     ", ".join([f['format_id'] for f in formats_to_download])))
2166             for fmt in formats_to_download:
2167                 new_info = dict(info_dict)
2168                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2169                 new_info['__original_infodict'] = info_dict
2170                 new_info.update(fmt)
2171                 self.process_info(new_info)
2172         # We update the info dict with the best quality format (backwards compatibility)
2173         if formats_to_download:
2174             info_dict.update(formats_to_download[-1])
2175         return info_dict
2176
2177     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2178         """Select the requested subtitles and their format"""
2179         available_subs = {}
2180         if normal_subtitles and self.params.get('writesubtitles'):
2181             available_subs.update(normal_subtitles)
2182         if automatic_captions and self.params.get('writeautomaticsub'):
2183             for lang, cap_info in automatic_captions.items():
2184                 if lang not in available_subs:
2185                     available_subs[lang] = cap_info
2186
2187         if (not self.params.get('writesubtitles') and not
2188                 self.params.get('writeautomaticsub') or not
2189                 available_subs):
2190             return None
2191
2192         all_sub_langs = available_subs.keys()
2193         if self.params.get('allsubtitles', False):
2194             requested_langs = all_sub_langs
2195         elif self.params.get('subtitleslangs', False):
2196             requested_langs = set()
2197             for lang in self.params.get('subtitleslangs'):
2198                 if lang == 'all':
2199                     requested_langs.update(all_sub_langs)
2200                     continue
2201                 discard = lang[0] == '-'
2202                 if discard:
2203                     lang = lang[1:]
2204                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2205                 if discard:
2206                     for lang in current_langs:
2207                         requested_langs.discard(lang)
2208                 else:
2209                     requested_langs.update(current_langs)
2210         elif 'en' in available_subs:
2211             requested_langs = ['en']
2212         else:
2213             requested_langs = [list(all_sub_langs)[0]]
2214         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2215
2216         formats_query = self.params.get('subtitlesformat', 'best')
2217         formats_preference = formats_query.split('/') if formats_query else []
2218         subs = {}
2219         for lang in requested_langs:
2220             formats = available_subs.get(lang)
2221             if formats is None:
2222                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2223                 continue
2224             for ext in formats_preference:
2225                 if ext == 'best':
2226                     f = formats[-1]
2227                     break
2228                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2229                 if matches:
2230                     f = matches[-1]
2231                     break
2232             else:
2233                 f = formats[-1]
2234                 self.report_warning(
2235                     'No subtitle format found matching "%s" for language %s, '
2236                     'using %s' % (formats_query, lang, f['ext']))
2237             subs[lang] = f
2238         return subs
2239
2240     def __forced_printings(self, info_dict, filename, incomplete):
2241         def print_mandatory(field, actual_field=None):
2242             if actual_field is None:
2243                 actual_field = field
2244             if (self.params.get('force%s' % field, False)
2245                     and (not incomplete or info_dict.get(actual_field) is not None)):
2246                 self.to_stdout(info_dict[actual_field])
2247
2248         def print_optional(field):
2249             if (self.params.get('force%s' % field, False)
2250                     and info_dict.get(field) is not None):
2251                 self.to_stdout(info_dict[field])
2252
2253         info_dict = info_dict.copy()
2254         if filename is not None:
2255             info_dict['filename'] = filename
2256         if info_dict.get('requested_formats') is not None:
2257             # For RTMP URLs, also include the playpath
2258             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2259         elif 'url' in info_dict:
2260             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2261
2262         for tmpl in self.params.get('forceprint', []):
2263             if re.match(r'\w+$', tmpl):
2264                 tmpl = '%({})s'.format(tmpl)
2265             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2266             self.to_stdout(tmpl % info_copy)
2267
2268         print_mandatory('title')
2269         print_mandatory('id')
2270         print_mandatory('url', 'urls')
2271         print_optional('thumbnail')
2272         print_optional('description')
2273         print_optional('filename')
2274         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2275             self.to_stdout(formatSeconds(info_dict['duration']))
2276         print_mandatory('format')
2277
2278         if self.params.get('forcejson', False):
2279             self.post_extract(info_dict)
2280             self.to_stdout(json.dumps(info_dict, default=repr))
2281
2282     def dl(self, name, info, subtitle=False, test=False):
2283
2284         if test:
2285             verbose = self.params.get('verbose')
2286             params = {
2287                 'test': True,
2288                 'quiet': not verbose,
2289                 'verbose': verbose,
2290                 'noprogress': not verbose,
2291                 'nopart': True,
2292                 'skip_unavailable_fragments': False,
2293                 'keep_fragments': False,
2294                 'overwrites': True,
2295                 '_no_ytdl_file': True,
2296             }
2297         else:
2298             params = self.params
2299         fd = get_suitable_downloader(info, params)(self, params)
2300         if not test:
2301             for ph in self._progress_hooks:
2302                 fd.add_progress_hook(ph)
2303             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2304             self.write_debug('Invoking downloader on "%s"' % urls)
2305         new_info = dict(info)
2306         if new_info.get('http_headers') is None:
2307             new_info['http_headers'] = self._calc_headers(new_info)
2308         return fd.download(name, new_info, subtitle)
2309
2310     def process_info(self, info_dict):
2311         """Process a single resolved IE result."""
2312
2313         assert info_dict.get('_type', 'video') == 'video'
2314
2315         info_dict.setdefault('__postprocessors', [])
2316
2317         max_downloads = self.params.get('max_downloads')
2318         if max_downloads is not None:
2319             if self._num_downloads >= int(max_downloads):
2320                 raise MaxDownloadsReached()
2321
2322         # TODO: backward compatibility, to be removed
2323         info_dict['fulltitle'] = info_dict['title']
2324
2325         if 'format' not in info_dict:
2326             info_dict['format'] = info_dict['ext']
2327
2328         if self._match_entry(info_dict) is not None:
2329             return
2330
2331         self.post_extract(info_dict)
2332         self._num_downloads += 1
2333
2334         # info_dict['_filename'] needs to be set for backward compatibility
2335         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2336         temp_filename = self.prepare_filename(info_dict, 'temp')
2337         files_to_move = {}
2338
2339         # Forced printings
2340         self.__forced_printings(info_dict, full_filename, incomplete=False)
2341
2342         if self.params.get('simulate', False):
2343             if self.params.get('force_write_download_archive', False):
2344                 self.record_download_archive(info_dict)
2345
2346             # Do nothing else if in simulate mode
2347             return
2348
2349         if full_filename is None:
2350             return
2351
2352         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2353             return
2354         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2355             return
2356
2357         if self.params.get('writedescription', False):
2358             descfn = self.prepare_filename(info_dict, 'description')
2359             if not self._ensure_dir_exists(encodeFilename(descfn)):
2360                 return
2361             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2362                 self.to_screen('[info] Video description is already present')
2363             elif info_dict.get('description') is None:
2364                 self.report_warning('There\'s no description to write.')
2365             else:
2366                 try:
2367                     self.to_screen('[info] Writing video description to: ' + descfn)
2368                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2369                         descfile.write(info_dict['description'])
2370                 except (OSError, IOError):
2371                     self.report_error('Cannot write description file ' + descfn)
2372                     return
2373
2374         if self.params.get('writeannotations', False):
2375             annofn = self.prepare_filename(info_dict, 'annotation')
2376             if not self._ensure_dir_exists(encodeFilename(annofn)):
2377                 return
2378             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2379                 self.to_screen('[info] Video annotations are already present')
2380             elif not info_dict.get('annotations'):
2381                 self.report_warning('There are no annotations to write.')
2382             else:
2383                 try:
2384                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2385                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2386                         annofile.write(info_dict['annotations'])
2387                 except (KeyError, TypeError):
2388                     self.report_warning('There are no annotations to write.')
2389                 except (OSError, IOError):
2390                     self.report_error('Cannot write annotations file: ' + annofn)
2391                     return
2392
2393         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2394                                        self.params.get('writeautomaticsub')])
2395
2396         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2397             # subtitles download errors are already managed as troubles in relevant IE
2398             # that way it will silently go on when used with unsupporting IE
2399             subtitles = info_dict['requested_subtitles']
2400             # ie = self.get_info_extractor(info_dict['extractor_key'])
2401             for sub_lang, sub_info in subtitles.items():
2402                 sub_format = sub_info['ext']
2403                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2404                 sub_filename_final = subtitles_filename(
2405                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2406                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2407                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2408                     sub_info['filepath'] = sub_filename
2409                     files_to_move[sub_filename] = sub_filename_final
2410                 else:
2411                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2412                     if sub_info.get('data') is not None:
2413                         try:
2414                             # Use newline='' to prevent conversion of newline characters
2415                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2416                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2417                                 subfile.write(sub_info['data'])
2418                             sub_info['filepath'] = sub_filename
2419                             files_to_move[sub_filename] = sub_filename_final
2420                         except (OSError, IOError):
2421                             self.report_error('Cannot write subtitles file ' + sub_filename)
2422                             return
2423                     else:
2424                         try:
2425                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2426                             sub_info['filepath'] = sub_filename
2427                             files_to_move[sub_filename] = sub_filename_final
2428                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2429                             self.report_warning('Unable to download subtitle for "%s": %s' %
2430                                                 (sub_lang, error_to_compat_str(err)))
2431                             continue
2432
2433         if self.params.get('writeinfojson', False):
2434             infofn = self.prepare_filename(info_dict, 'infojson')
2435             if not self._ensure_dir_exists(encodeFilename(infofn)):
2436                 return
2437             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2438                 self.to_screen('[info] Video metadata is already present')
2439             else:
2440                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2441                 try:
2442                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2443                 except (OSError, IOError):
2444                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2445                     return
2446             info_dict['__infojson_filename'] = infofn
2447
2448         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2449             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2450             thumb_filename = replace_extension(
2451                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2452             files_to_move[thumb_filename_temp] = thumb_filename
2453
2454         # Write internet shortcut files
2455         url_link = webloc_link = desktop_link = False
2456         if self.params.get('writelink', False):
2457             if sys.platform == "darwin":  # macOS.
2458                 webloc_link = True
2459             elif sys.platform.startswith("linux"):
2460                 desktop_link = True
2461             else:  # if sys.platform in ['win32', 'cygwin']:
2462                 url_link = True
2463         if self.params.get('writeurllink', False):
2464             url_link = True
2465         if self.params.get('writewebloclink', False):
2466             webloc_link = True
2467         if self.params.get('writedesktoplink', False):
2468             desktop_link = True
2469
2470         if url_link or webloc_link or desktop_link:
2471             if 'webpage_url' not in info_dict:
2472                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2473                 return
2474             ascii_url = iri_to_uri(info_dict['webpage_url'])
2475
2476         def _write_link_file(extension, template, newline, embed_filename):
2477             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2478             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2479                 self.to_screen('[info] Internet shortcut is already present')
2480             else:
2481                 try:
2482                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2483                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2484                         template_vars = {'url': ascii_url}
2485                         if embed_filename:
2486                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2487                         linkfile.write(template % template_vars)
2488                 except (OSError, IOError):
2489                     self.report_error('Cannot write internet shortcut ' + linkfn)
2490                     return False
2491             return True
2492
2493         if url_link:
2494             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2495                 return
2496         if webloc_link:
2497             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2498                 return
2499         if desktop_link:
2500             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2501                 return
2502
2503         try:
2504             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2505         except PostProcessingError as err:
2506             self.report_error('Preprocessing: %s' % str(err))
2507             return
2508
2509         must_record_download_archive = False
2510         if self.params.get('skip_download', False):
2511             info_dict['filepath'] = temp_filename
2512             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2513             info_dict['__files_to_move'] = files_to_move
2514             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2515         else:
2516             # Download
2517             try:
2518
2519                 def existing_file(*filepaths):
2520                     ext = info_dict.get('ext')
2521                     final_ext = self.params.get('final_ext', ext)
2522                     existing_files = []
2523                     for file in orderedSet(filepaths):
2524                         if final_ext != ext:
2525                             converted = replace_extension(file, final_ext, ext)
2526                             if os.path.exists(encodeFilename(converted)):
2527                                 existing_files.append(converted)
2528                         if os.path.exists(encodeFilename(file)):
2529                             existing_files.append(file)
2530
2531                     if not existing_files or self.params.get('overwrites', False):
2532                         for file in orderedSet(existing_files):
2533                             self.report_file_delete(file)
2534                             os.remove(encodeFilename(file))
2535                         return None
2536
2537                     self.report_file_already_downloaded(existing_files[0])
2538                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2539                     return existing_files[0]
2540
2541                 success = True
2542                 if info_dict.get('requested_formats') is not None:
2543
2544                     def compatible_formats(formats):
2545                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2546                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2547                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2548                         if len(video_formats) > 2 or len(audio_formats) > 2:
2549                             return False
2550
2551                         # Check extension
2552                         exts = set(format.get('ext') for format in formats)
2553                         COMPATIBLE_EXTS = (
2554                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2555                             set(('webm',)),
2556                         )
2557                         for ext_sets in COMPATIBLE_EXTS:
2558                             if ext_sets.issuperset(exts):
2559                                 return True
2560                         # TODO: Check acodec/vcodec
2561                         return False
2562
2563                     requested_formats = info_dict['requested_formats']
2564                     old_ext = info_dict['ext']
2565                     if self.params.get('merge_output_format') is None:
2566                         if not compatible_formats(requested_formats):
2567                             info_dict['ext'] = 'mkv'
2568                             self.report_warning(
2569                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2570                         if (info_dict['ext'] == 'webm'
2571                                 and self.params.get('writethumbnail', False)
2572                                 and info_dict.get('thumbnails')):
2573                             info_dict['ext'] = 'mkv'
2574                             self.report_warning(
2575                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2576
2577                     def correct_ext(filename):
2578                         filename_real_ext = os.path.splitext(filename)[1][1:]
2579                         filename_wo_ext = (
2580                             os.path.splitext(filename)[0]
2581                             if filename_real_ext == old_ext
2582                             else filename)
2583                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2584
2585                     # Ensure filename always has a correct extension for successful merge
2586                     full_filename = correct_ext(full_filename)
2587                     temp_filename = correct_ext(temp_filename)
2588                     dl_filename = existing_file(full_filename, temp_filename)
2589                     info_dict['__real_download'] = False
2590
2591                     _protocols = set(determine_protocol(f) for f in requested_formats)
2592                     if len(_protocols) == 1:
2593                         info_dict['protocol'] = _protocols.pop()
2594                     directly_mergable = (
2595                         'no-direct-merge' not in self.params.get('compat_opts', [])
2596                         and info_dict.get('protocol') is not None  # All requested formats have same protocol
2597                         and not self.params.get('allow_unplayable_formats')
2598                         and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2599                     if directly_mergable:
2600                         info_dict['url'] = requested_formats[0]['url']
2601                         # Treat it as a single download
2602                         dl_filename = existing_file(full_filename, temp_filename)
2603                         if dl_filename is None:
2604                             success, real_download = self.dl(temp_filename, info_dict)
2605                             info_dict['__real_download'] = real_download
2606                     else:
2607                         downloaded = []
2608                         merger = FFmpegMergerPP(self)
2609                         if self.params.get('allow_unplayable_formats'):
2610                             self.report_warning(
2611                                 'You have requested merging of multiple formats '
2612                                 'while also allowing unplayable formats to be downloaded. '
2613                                 'The formats won\'t be merged to prevent data corruption.')
2614                         elif not merger.available:
2615                             self.report_warning(
2616                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2617                                 'The formats won\'t be merged.')
2618
2619                         if dl_filename is None:
2620                             for f in requested_formats:
2621                                 new_info = dict(info_dict)
2622                                 del new_info['requested_formats']
2623                                 new_info.update(f)
2624                                 fname = prepend_extension(
2625                                     self.prepare_filename(new_info, 'temp'),
2626                                     'f%s' % f['format_id'], new_info['ext'])
2627                                 if not self._ensure_dir_exists(fname):
2628                                     return
2629                                 downloaded.append(fname)
2630                                 partial_success, real_download = self.dl(fname, new_info)
2631                                 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2632                                 success = success and partial_success
2633                             if merger.available and not self.params.get('allow_unplayable_formats'):
2634                                 info_dict['__postprocessors'].append(merger)
2635                                 info_dict['__files_to_merge'] = downloaded
2636                                 # Even if there were no downloads, it is being merged only now
2637                                 info_dict['__real_download'] = True
2638                             else:
2639                                 for file in downloaded:
2640                                     files_to_move[file] = None
2641                 else:
2642                     # Just a single file
2643                     dl_filename = existing_file(full_filename, temp_filename)
2644                     if dl_filename is None:
2645                         success, real_download = self.dl(temp_filename, info_dict)
2646                         info_dict['__real_download'] = real_download
2647
2648                 dl_filename = dl_filename or temp_filename
2649                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2650
2651             except network_exceptions as err:
2652                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2653                 return
2654             except (OSError, IOError) as err:
2655                 raise UnavailableVideoError(err)
2656             except (ContentTooShortError, ) as err:
2657                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2658                 return
2659
2660             if success and full_filename != '-':
2661                 # Fixup content
2662                 fixup_policy = self.params.get('fixup')
2663                 if fixup_policy is None:
2664                     fixup_policy = 'detect_or_warn'
2665
2666                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2667
2668                 stretched_ratio = info_dict.get('stretched_ratio')
2669                 if stretched_ratio is not None and stretched_ratio != 1:
2670                     if fixup_policy == 'warn':
2671                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2672                             info_dict['id'], stretched_ratio))
2673                     elif fixup_policy == 'detect_or_warn':
2674                         stretched_pp = FFmpegFixupStretchedPP(self)
2675                         if stretched_pp.available:
2676                             info_dict['__postprocessors'].append(stretched_pp)
2677                         else:
2678                             self.report_warning(
2679                                 '%s: Non-uniform pixel ratio (%s). %s'
2680                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2681                     else:
2682                         assert fixup_policy in ('ignore', 'never')
2683
2684                 if (info_dict.get('requested_formats') is None
2685                         and info_dict.get('container') == 'm4a_dash'
2686                         and info_dict.get('ext') == 'm4a'):
2687                     if fixup_policy == 'warn':
2688                         self.report_warning(
2689                             '%s: writing DASH m4a. '
2690                             'Only some players support this container.'
2691                             % info_dict['id'])
2692                     elif fixup_policy == 'detect_or_warn':
2693                         fixup_pp = FFmpegFixupM4aPP(self)
2694                         if fixup_pp.available:
2695                             info_dict['__postprocessors'].append(fixup_pp)
2696                         else:
2697                             self.report_warning(
2698                                 '%s: writing DASH m4a. '
2699                                 'Only some players support this container. %s'
2700                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2701                     else:
2702                         assert fixup_policy in ('ignore', 'never')
2703
2704                 if ('protocol' in info_dict
2705                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2706                     if fixup_policy == 'warn':
2707                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2708                             info_dict['id']))
2709                     elif fixup_policy == 'detect_or_warn':
2710                         fixup_pp = FFmpegFixupM3u8PP(self)
2711                         if fixup_pp.available:
2712                             info_dict['__postprocessors'].append(fixup_pp)
2713                         else:
2714                             self.report_warning(
2715                                 '%s: malformed AAC bitstream detected. %s'
2716                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2717                     else:
2718                         assert fixup_policy in ('ignore', 'never')
2719
2720                 try:
2721                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2722                 except PostProcessingError as err:
2723                     self.report_error('Postprocessing: %s' % str(err))
2724                     return
2725                 try:
2726                     for ph in self._post_hooks:
2727                         ph(info_dict['filepath'])
2728                 except Exception as err:
2729                     self.report_error('post hooks: %s' % str(err))
2730                     return
2731                 must_record_download_archive = True
2732
2733         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2734             self.record_download_archive(info_dict)
2735         max_downloads = self.params.get('max_downloads')
2736         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2737             raise MaxDownloadsReached()
2738
2739     def download(self, url_list):
2740         """Download a given list of URLs."""
2741         outtmpl = self.outtmpl_dict['default']
2742         if (len(url_list) > 1
2743                 and outtmpl != '-'
2744                 and '%' not in outtmpl
2745                 and self.params.get('max_downloads') != 1):
2746             raise SameFileError(outtmpl)
2747
2748         for url in url_list:
2749             try:
2750                 # It also downloads the videos
2751                 res = self.extract_info(
2752                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2753             except UnavailableVideoError:
2754                 self.report_error('unable to download video')
2755             except MaxDownloadsReached:
2756                 self.to_screen('[info] Maximum number of downloaded files reached')
2757                 raise
2758             except ExistingVideoReached:
2759                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2760                 raise
2761             except RejectedVideoReached:
2762                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2763                 raise
2764             else:
2765                 if self.params.get('dump_single_json', False):
2766                     self.post_extract(res)
2767                     self.to_stdout(json.dumps(res, default=repr))
2768
2769         return self._download_retcode
2770
2771     def download_with_info_file(self, info_filename):
2772         with contextlib.closing(fileinput.FileInput(
2773                 [info_filename], mode='r',
2774                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2775             # FileInput doesn't have a read method, we can't call json.load
2776             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2777         try:
2778             self.process_ie_result(info, download=True)
2779         except (DownloadError, EntryNotInPlaylist):
2780             webpage_url = info.get('webpage_url')
2781             if webpage_url is not None:
2782                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2783                 return self.download([webpage_url])
2784             else:
2785                 raise
2786         return self._download_retcode
2787
2788     @staticmethod
2789     def filter_requested_info(info_dict, actually_filter=True):
2790         remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
2791         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2792         if actually_filter:
2793             remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2794             empty_values = (None, {}, [], set(), tuple())
2795             reject = lambda k, v: k not in keep_keys and (
2796                 k.startswith('_') or k in remove_keys or v in empty_values)
2797         else:
2798             info_dict['epoch'] = int(time.time())
2799             reject = lambda k, v: k in remove_keys
2800         filter_fn = lambda obj: (
2801             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
2802             else obj if not isinstance(obj, dict)
2803             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2804         return filter_fn(info_dict)
2805
2806     def run_pp(self, pp, infodict):
2807         files_to_delete = []
2808         if '__files_to_move' not in infodict:
2809             infodict['__files_to_move'] = {}
2810         files_to_delete, infodict = pp.run(infodict)
2811         if not files_to_delete:
2812             return infodict
2813
2814         if self.params.get('keepvideo', False):
2815             for f in files_to_delete:
2816                 infodict['__files_to_move'].setdefault(f, '')
2817         else:
2818             for old_filename in set(files_to_delete):
2819                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2820                 try:
2821                     os.remove(encodeFilename(old_filename))
2822                 except (IOError, OSError):
2823                     self.report_warning('Unable to remove downloaded original file')
2824                 if old_filename in infodict['__files_to_move']:
2825                     del infodict['__files_to_move'][old_filename]
2826         return infodict
2827
2828     @staticmethod
2829     def post_extract(info_dict):
2830         def actual_post_extract(info_dict):
2831             if info_dict.get('_type') in ('playlist', 'multi_video'):
2832                 for video_dict in info_dict.get('entries', {}):
2833                     actual_post_extract(video_dict or {})
2834                 return
2835
2836             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2837             extra = post_extractor().items()
2838             info_dict.update(extra)
2839             info_dict.pop('__post_extractor', None)
2840
2841             original_infodict = info_dict.get('__original_infodict') or {}
2842             original_infodict.update(extra)
2843             original_infodict.pop('__post_extractor', None)
2844
2845         actual_post_extract(info_dict or {})
2846
2847     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2848         info = dict(ie_info)
2849         info['__files_to_move'] = files_to_move or {}
2850         for pp in self._pps[key]:
2851             info = self.run_pp(pp, info)
2852         return info, info.pop('__files_to_move', None)
2853
2854     def post_process(self, filename, ie_info, files_to_move=None):
2855         """Run all the postprocessors on the given file."""
2856         info = dict(ie_info)
2857         info['filepath'] = filename
2858         info['__files_to_move'] = files_to_move or {}
2859
2860         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2861             info = self.run_pp(pp, info)
2862         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2863         del info['__files_to_move']
2864         for pp in self._pps['after_move']:
2865             info = self.run_pp(pp, info)
2866         return info
2867
2868     def _make_archive_id(self, info_dict):
2869         video_id = info_dict.get('id')
2870         if not video_id:
2871             return
2872         # Future-proof against any change in case
2873         # and backwards compatibility with prior versions
2874         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2875         if extractor is None:
2876             url = str_or_none(info_dict.get('url'))
2877             if not url:
2878                 return
2879             # Try to find matching extractor for the URL and take its ie_key
2880             for ie in self._ies:
2881                 if ie.suitable(url):
2882                     extractor = ie.ie_key()
2883                     break
2884             else:
2885                 return
2886         return '%s %s' % (extractor.lower(), video_id)
2887
2888     def in_download_archive(self, info_dict):
2889         fn = self.params.get('download_archive')
2890         if fn is None:
2891             return False
2892
2893         vid_id = self._make_archive_id(info_dict)
2894         if not vid_id:
2895             return False  # Incomplete video information
2896
2897         return vid_id in self.archive
2898
2899     def record_download_archive(self, info_dict):
2900         fn = self.params.get('download_archive')
2901         if fn is None:
2902             return
2903         vid_id = self._make_archive_id(info_dict)
2904         assert vid_id
2905         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2906             archive_file.write(vid_id + '\n')
2907         self.archive.add(vid_id)
2908
2909     @staticmethod
2910     def format_resolution(format, default='unknown'):
2911         if format.get('vcodec') == 'none':
2912             return 'audio only'
2913         if format.get('resolution') is not None:
2914             return format['resolution']
2915         if format.get('width') and format.get('height'):
2916             res = '%dx%d' % (format['width'], format['height'])
2917         elif format.get('height'):
2918             res = '%sp' % format['height']
2919         elif format.get('width'):
2920             res = '%dx?' % format['width']
2921         else:
2922             res = default
2923         return res
2924
2925     def _format_note(self, fdict):
2926         res = ''
2927         if fdict.get('ext') in ['f4f', 'f4m']:
2928             res += '(unsupported) '
2929         if fdict.get('language'):
2930             if res:
2931                 res += ' '
2932             res += '[%s] ' % fdict['language']
2933         if fdict.get('format_note') is not None:
2934             res += fdict['format_note'] + ' '
2935         if fdict.get('tbr') is not None:
2936             res += '%4dk ' % fdict['tbr']
2937         if fdict.get('container') is not None:
2938             if res:
2939                 res += ', '
2940             res += '%s container' % fdict['container']
2941         if (fdict.get('vcodec') is not None
2942                 and fdict.get('vcodec') != 'none'):
2943             if res:
2944                 res += ', '
2945             res += fdict['vcodec']
2946             if fdict.get('vbr') is not None:
2947                 res += '@'
2948         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2949             res += 'video@'
2950         if fdict.get('vbr') is not None:
2951             res += '%4dk' % fdict['vbr']
2952         if fdict.get('fps') is not None:
2953             if res:
2954                 res += ', '
2955             res += '%sfps' % fdict['fps']
2956         if fdict.get('acodec') is not None:
2957             if res:
2958                 res += ', '
2959             if fdict['acodec'] == 'none':
2960                 res += 'video only'
2961             else:
2962                 res += '%-5s' % fdict['acodec']
2963         elif fdict.get('abr') is not None:
2964             if res:
2965                 res += ', '
2966             res += 'audio'
2967         if fdict.get('abr') is not None:
2968             res += '@%3dk' % fdict['abr']
2969         if fdict.get('asr') is not None:
2970             res += ' (%5dHz)' % fdict['asr']
2971         if fdict.get('filesize') is not None:
2972             if res:
2973                 res += ', '
2974             res += format_bytes(fdict['filesize'])
2975         elif fdict.get('filesize_approx') is not None:
2976             if res:
2977                 res += ', '
2978             res += '~' + format_bytes(fdict['filesize_approx'])
2979         return res
2980
2981     def _format_note_table(self, f):
2982         def join_fields(*vargs):
2983             return ', '.join((val for val in vargs if val != ''))
2984
2985         return join_fields(
2986             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2987             format_field(f, 'language', '[%s]'),
2988             format_field(f, 'format_note'),
2989             format_field(f, 'container', ignore=(None, f.get('ext'))),
2990             format_field(f, 'asr', '%5dHz'))
2991
2992     def list_formats(self, info_dict):
2993         formats = info_dict.get('formats', [info_dict])
2994         new_format = (
2995             'list-formats' not in self.params.get('compat_opts', [])
2996             and self.params.get('list_formats_as_table', True) is not False)
2997         if new_format:
2998             table = [
2999                 [
3000                     format_field(f, 'format_id'),
3001                     format_field(f, 'ext'),
3002                     self.format_resolution(f),
3003                     format_field(f, 'fps', '%d'),
3004                     '|',
3005                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3006                     format_field(f, 'tbr', '%4dk'),
3007                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3008                     '|',
3009                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3010                     format_field(f, 'vbr', '%4dk'),
3011                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3012                     format_field(f, 'abr', '%3dk'),
3013                     format_field(f, 'asr', '%5dHz'),
3014                     self._format_note_table(f)]
3015                 for f in formats
3016                 if f.get('preference') is None or f['preference'] >= -1000]
3017             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3018                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3019         else:
3020             table = [
3021                 [
3022                     format_field(f, 'format_id'),
3023                     format_field(f, 'ext'),
3024                     self.format_resolution(f),
3025                     self._format_note(f)]
3026                 for f in formats
3027                 if f.get('preference') is None or f['preference'] >= -1000]
3028             header_line = ['format code', 'extension', 'resolution', 'note']
3029
3030         self.to_screen(
3031             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3032                 header_line,
3033                 table,
3034                 delim=new_format,
3035                 extraGap=(0 if new_format else 1),
3036                 hideEmpty=new_format)))
3037
3038     def list_thumbnails(self, info_dict):
3039         thumbnails = info_dict.get('thumbnails')
3040         if not thumbnails:
3041             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3042             return
3043
3044         self.to_screen(
3045             '[info] Thumbnails for %s:' % info_dict['id'])
3046         self.to_screen(render_table(
3047             ['ID', 'width', 'height', 'URL'],
3048             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3049
3050     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3051         if not subtitles:
3052             self.to_screen('%s has no %s' % (video_id, name))
3053             return
3054         self.to_screen(
3055             'Available %s for %s:' % (name, video_id))
3056
3057         def _row(lang, formats):
3058             exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
3059             if len(set(names)) == 1:
3060                 names = [] if names[0] == 'unknown' else names[:1]
3061             return [lang, ', '.join(names), ', '.join(exts)]
3062
3063         self.to_screen(render_table(
3064             ['Language', 'Name', 'Formats'],
3065             [_row(lang, formats) for lang, formats in subtitles.items()],
3066             hideEmpty=True))
3067
3068     def urlopen(self, req):
3069         """ Start an HTTP download """
3070         if isinstance(req, compat_basestring):
3071             req = sanitized_Request(req)
3072         return self._opener.open(req, timeout=self._socket_timeout)
3073
3074     def print_debug_header(self):
3075         if not self.params.get('verbose'):
3076             return
3077
3078         if type('') is not compat_str:
3079             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3080             self.report_warning(
3081                 'Your Python is broken! Update to a newer and supported version')
3082
3083         stdout_encoding = getattr(
3084             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3085         encoding_str = (
3086             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3087                 locale.getpreferredencoding(),
3088                 sys.getfilesystemencoding(),
3089                 stdout_encoding,
3090                 self.get_encoding()))
3091         write_string(encoding_str, encoding=None)
3092
3093         source = (
3094             '(exe)' if hasattr(sys, 'frozen')
3095             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3096             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3097             else '')
3098         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3099         if _LAZY_LOADER:
3100             self._write_string('[debug] Lazy loading extractors enabled\n')
3101         if _PLUGIN_CLASSES:
3102             self._write_string(
3103                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3104         if self.params.get('compat_opts'):
3105             self._write_string(
3106                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3107         try:
3108             sp = subprocess.Popen(
3109                 ['git', 'rev-parse', '--short', 'HEAD'],
3110                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3111                 cwd=os.path.dirname(os.path.abspath(__file__)))
3112             out, err = process_communicate_or_kill(sp)
3113             out = out.decode().strip()
3114             if re.match('[0-9a-f]+', out):
3115                 self._write_string('[debug] Git HEAD: %s\n' % out)
3116         except Exception:
3117             try:
3118                 sys.exc_clear()
3119             except Exception:
3120                 pass
3121
3122         def python_implementation():
3123             impl_name = platform.python_implementation()
3124             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3125                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3126             return impl_name
3127
3128         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3129             platform.python_version(),
3130             python_implementation(),
3131             platform.architecture()[0],
3132             platform_name()))
3133
3134         exe_versions = FFmpegPostProcessor.get_versions(self)
3135         exe_versions['rtmpdump'] = rtmpdump_version()
3136         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3137         exe_str = ', '.join(
3138             '%s %s' % (exe, v)
3139             for exe, v in sorted(exe_versions.items())
3140             if v
3141         )
3142         if not exe_str:
3143             exe_str = 'none'
3144         self._write_string('[debug] exe versions: %s\n' % exe_str)
3145
3146         proxy_map = {}
3147         for handler in self._opener.handlers:
3148             if hasattr(handler, 'proxies'):
3149                 proxy_map.update(handler.proxies)
3150         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3151
3152         if self.params.get('call_home', False):
3153             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3154             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3155             return
3156             latest_version = self.urlopen(
3157                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3158             if version_tuple(latest_version) > version_tuple(__version__):
3159                 self.report_warning(
3160                     'You are using an outdated version (newest version: %s)! '
3161                     'See https://yt-dl.org/update if you need help updating.' %
3162                     latest_version)
3163
3164     def _setup_opener(self):
3165         timeout_val = self.params.get('socket_timeout')
3166         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3167
3168         opts_cookiefile = self.params.get('cookiefile')
3169         opts_proxy = self.params.get('proxy')
3170
3171         if opts_cookiefile is None:
3172             self.cookiejar = compat_cookiejar.CookieJar()
3173         else:
3174             opts_cookiefile = expand_path(opts_cookiefile)
3175             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3176             if os.access(opts_cookiefile, os.R_OK):
3177                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3178
3179         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3180         if opts_proxy is not None:
3181             if opts_proxy == '':
3182                 proxies = {}
3183             else:
3184                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3185         else:
3186             proxies = compat_urllib_request.getproxies()
3187             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3188             if 'http' in proxies and 'https' not in proxies:
3189                 proxies['https'] = proxies['http']
3190         proxy_handler = PerRequestProxyHandler(proxies)
3191
3192         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3193         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3194         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3195         redirect_handler = YoutubeDLRedirectHandler()
3196         data_handler = compat_urllib_request_DataHandler()
3197
3198         # When passing our own FileHandler instance, build_opener won't add the
3199         # default FileHandler and allows us to disable the file protocol, which
3200         # can be used for malicious purposes (see
3201         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3202         file_handler = compat_urllib_request.FileHandler()
3203
3204         def file_open(*args, **kwargs):
3205             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3206         file_handler.file_open = file_open
3207
3208         opener = compat_urllib_request.build_opener(
3209             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3210
3211         # Delete the default user-agent header, which would otherwise apply in
3212         # cases where our custom HTTP handler doesn't come into play
3213         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3214         opener.addheaders = []
3215         self._opener = opener
3216
3217     def encode(self, s):
3218         if isinstance(s, bytes):
3219             return s  # Already encoded
3220
3221         try:
3222             return s.encode(self.get_encoding())
3223         except UnicodeEncodeError as err:
3224             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3225             raise
3226
3227     def get_encoding(self):
3228         encoding = self.params.get('encoding')
3229         if encoding is None:
3230             encoding = preferredencoding()
3231         return encoding
3232
3233     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3234         write_all = self.params.get('write_all_thumbnails', False)
3235         thumbnails = []
3236         if write_all or self.params.get('writethumbnail', False):
3237             thumbnails = info_dict.get('thumbnails') or []
3238         multiple = write_all and len(thumbnails) > 1
3239
3240         ret = []
3241         for t in thumbnails[::1 if write_all else -1]:
3242             thumb_ext = determine_ext(t['url'], 'jpg')
3243             suffix = '%s.' % t['id'] if multiple else ''
3244             thumb_display_id = '%s ' % t['id'] if multiple else ''
3245             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3246
3247             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3248                 ret.append(suffix + thumb_ext)
3249                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3250                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3251             else:
3252                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3253                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3254                 try:
3255                     uf = self.urlopen(t['url'])
3256                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3257                         shutil.copyfileobj(uf, thumbf)
3258                     ret.append(suffix + thumb_ext)
3259                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3260                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3261                     t['filepath'] = thumb_filename
3262                 except network_exceptions as err:
3263                     self.report_warning('Unable to download thumbnail "%s": %s' %
3264                                         (t['url'], error_to_compat_str(err)))
3265             if ret and not write_all:
3266                 break
3267         return ret