yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_shlex_quote,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .cookies import load_cookies
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     determine_ext,
  54     determine_protocol,
  55     DOT_DESKTOP_LINK_TEMPLATE,
  56     DOT_URL_LINK_TEMPLATE,
  57     DOT_WEBLOC_LINK_TEMPLATE,
  58     DownloadError,
  59     encode_compat_str,
  60     encodeFilename,
  61     EntryNotInPlaylist,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     STR_FORMAT_RE_TMPL,
  70     STR_FORMAT_TYPES,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     LazyList,
  78     locked_file,
  79     make_dir,
  80     make_HTTPS_handler,
  81     MaxDownloadsReached,
  82     network_exceptions,
  83     orderedSet,
  84     OUTTMPL_TYPES,
  85     PagedList,
  86     parse_filesize,
  87     PerRequestProxyHandler,
  88     platform_name,
  89     PostProcessingError,
  90     preferredencoding,
  91     prepend_extension,
  92     process_communicate_or_kill,
  93     register_socks_protocols,
  94     RejectedVideoReached,
  95     render_table,
  96     replace_extension,
  97     SameFileError,
  98     sanitize_filename,
  99     sanitize_path,
 100     sanitize_url,
 101     sanitized_Request,
 102     std_headers,
 103     str_or_none,
 104     strftime_or_none,
 105     subtitles_filename,
 106     ThrottledDownload,
 107     to_high_limit_path,
 108     traverse_obj,
 109     try_get,
 110     UnavailableVideoError,
 111     url_basename,
 112     variadic,
 113     version_tuple,
 114     write_json_file,
 115     write_string,
 116     YoutubeDLCookieProcessor,
 117     YoutubeDLHandler,
 118     YoutubeDLRedirectHandler,
 119 )
 120 from .cache import Cache
 121 from .extractor import (
 122     gen_extractor_classes,
 123     get_info_extractor,
 124     _LAZY_LOADER,
 125     _PLUGIN_CLASSES
 126 )
 127 from .extractor.openload import PhantomJSwrapper
 128 from .downloader import (
 129     FFmpegFD,
 130     get_suitable_downloader,
 131     shorten_protocol_name
 132 )
 133 from .downloader.rtmp import rtmpdump_version
 134 from .postprocessor import (
 135     get_postprocessor,
 136     FFmpegFixupDurationPP,
 137     FFmpegFixupM3u8PP,
 138     FFmpegFixupM4aPP,
 139     FFmpegFixupStretchedPP,
 140     FFmpegFixupTimestampPP,
 141     FFmpegMergerPP,
 142     FFmpegPostProcessor,
 143     MoveFilesAfterDownloadPP,
 144 )
 145 from .version import __version__
 146
 147 if compat_os_name == 'nt':
 148     import ctypes
 149
 150
 151 class YoutubeDL(object):
 152     """YoutubeDL class.
 153
 154     YoutubeDL objects are the ones responsible of downloading the
 155     actual video file and writing it to disk if the user has requested
 156     it, among some other tasks. In most cases there should be one per
 157     program. As, given a video URL, the downloader doesn't know how to
 158     extract all the needed information, task that InfoExtractors do, it
 159     has to pass the URL to one of them.
 160
 161     For this, YoutubeDL objects have a method that allows
 162     InfoExtractors to be registered in a given order. When it is passed
 163     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 164     finds that reports being able to handle it. The InfoExtractor extracts
 165     all the information about the video or videos the URL refers to, and
 166     YoutubeDL process the extracted information, possibly using a File
 167     Downloader to download the video.
 168
 169     YoutubeDL objects accept a lot of parameters. In order not to saturate
 170     the object constructor with arguments, it receives a dictionary of
 171     options instead. These options are available through the params
 172     attribute for the InfoExtractors to use. The YoutubeDL also
 173     registers itself as the downloader in charge for the InfoExtractors
 174     that are added to it, so this is a "mutual registration".
 175
 176     Available options:
 177
 178     username:          Username for authentication purposes.
 179     password:          Password for authentication purposes.
 180     videopassword:     Password for accessing a video.
 181     ap_mso:            Adobe Pass multiple-system operator identifier.
 182     ap_username:       Multiple-system operator account username.
 183     ap_password:       Multiple-system operator account password.
 184     usenetrc:          Use netrc for authentication instead.
 185     verbose:           Print additional info to stdout.
 186     quiet:             Do not print messages to stdout.
 187     no_warnings:       Do not print out anything for warnings.
 188     forceprint:        A list of templates to force print
 189     forceurl:          Force printing final URL. (Deprecated)
 190     forcetitle:        Force printing title. (Deprecated)
 191     forceid:           Force printing ID. (Deprecated)
 192     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 193     forcedescription:  Force printing description. (Deprecated)
 194     forcefilename:     Force printing final filename. (Deprecated)
 195     forceduration:     Force printing duration. (Deprecated)
 196     forcejson:         Force printing info_dict as JSON.
 197     dump_single_json:  Force printing the info_dict of the whole playlist
 198                        (or video) as a single JSON line.
 199     force_write_download_archive: Force writing download archive regardless
 200                        of 'skip_download' or 'simulate'.
 201     simulate:          Do not download the video files. If unset (or None),
 202                        simulate only if listsubtitles, listformats or list_thumbnails is used
 203     format:            Video format code. see "FORMAT SELECTION" for more details.
 204     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 205     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 206                        extracting metadata even if the video is not actually
 207                        available for download (experimental)
 208     format_sort:       How to sort the video formats. see "Sorting Formats"
 209                        for more details.
 210     format_sort_force: Force the given format_sort. see "Sorting Formats"
 211                        for more details.
 212     allow_multiple_video_streams:   Allow multiple video streams to be merged
 213                        into a single file
 214     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 215                        into a single file
 216     check_formats      Whether to test if the formats are downloadable.
 217                        Can be True (check all), False (check none)
 218                        or None (check only if requested by extractor)
 219     paths:             Dictionary of output paths. The allowed keys are 'home'
 220                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 221     outtmpl:           Dictionary of templates for output names. Allowed keys
 222                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 223                        For compatibility with youtube-dl, a single string can also be used
 224     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 225     restrictfilenames: Do not allow "&" and spaces in file names
 226     trim_file_name:    Limit length of filename (extension excluded)
 227     windowsfilenames:  Force the filenames to be windows compatible
 228     ignoreerrors:      Do not stop on download errors
 229                        (Default True when running yt-dlp,
 230                        but False when directly accessing YoutubeDL class)
 231     skip_playlist_after_errors: Number of allowed failures until the rest of
 232                        the playlist is skipped
 233     force_generic_extractor: Force downloader to use the generic extractor
 234     overwrites:        Overwrite all video and metadata files if True,
 235                        overwrite only non-video files if None
 236                        and don't overwrite any file if False
 237                        For compatibility with youtube-dl,
 238                        "nooverwrites" may also be used instead
 239     playliststart:     Playlist item to start at.
 240     playlistend:       Playlist item to end at.
 241     playlist_items:    Specific indices of playlist to download.
 242     playlistreverse:   Download playlist items in reverse order.
 243     playlistrandom:    Download playlist items in random order.
 244     matchtitle:        Download only matching titles.
 245     rejecttitle:       Reject downloads for matching titles.
 246     logger:            Log messages to a logging.Logger instance.
 247     logtostderr:       Log messages to stderr instead of stdout.
 248     writedescription:  Write the video description to a .description file
 249     writeinfojson:     Write the video description to a .info.json file
 250     clean_infojson:    Remove private fields from the infojson
 251     getcomments:       Extract video comments. This will not be written to disk
 252                        unless writeinfojson is also given
 253     writeannotations:  Write the video annotations to a .annotations.xml file
 254     writethumbnail:    Write the thumbnail image to a file
 255     allow_playlist_files: Whether to write playlists' description, infojson etc
 256                        also to disk when using the 'write*' options
 257     write_all_thumbnails:  Write all thumbnail formats to files
 258     writelink:         Write an internet shortcut file, depending on the
 259                        current platform (.url/.webloc/.desktop)
 260     writeurllink:      Write a Windows internet shortcut file (.url)
 261     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 262     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 263     writesubtitles:    Write the video subtitles to a file
 264     writeautomaticsub: Write the automatically generated subtitles to a file
 265     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 266                        Downloads all the subtitles of the video
 267                        (requires writesubtitles or writeautomaticsub)
 268     listsubtitles:     Lists all available subtitles for the video
 269     subtitlesformat:   The format code for subtitles
 270     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 271                        The list may contain "all" to refer to all the available
 272                        subtitles. The language can be prefixed with a "-" to
 273                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 274     keepvideo:         Keep the video file after post-processing
 275     daterange:         A DateRange object, download only if the upload_date is in the range.
 276     skip_download:     Skip the actual download of the video file
 277     cachedir:          Location of the cache files in the filesystem.
 278                        False to disable filesystem cache.
 279     noplaylist:        Download single video instead of a playlist if in doubt.
 280     age_limit:         An integer representing the user's age in years.
 281                        Unsuitable videos for the given age are skipped.
 282     min_views:         An integer representing the minimum view count the video
 283                        must have in order to not be skipped.
 284                        Videos without view count information are always
 285                        downloaded. None for no limit.
 286     max_views:         An integer representing the maximum view count.
 287                        Videos that are more popular than that are not
 288                        downloaded.
 289                        Videos without view count information are always
 290                        downloaded. None for no limit.
 291     download_archive:  File name of a file where all downloads are recorded.
 292                        Videos already present in the file are not downloaded
 293                        again.
 294     break_on_existing: Stop the download process after attempting to download a
 295                        file that is in the archive.
 296     break_on_reject:   Stop the download process when encountering a video that
 297                        has been filtered out.
 298     cookiefile:        File name where cookies should be read from and dumped to
 299     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 300                        name/path from where cookies are loaded.
 301                        Eg: ('chrome', ) or (vivaldi, 'default')
 302     nocheckcertificate:Do not verify SSL certificates
 303     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 304                        At the moment, this is only supported by YouTube.
 305     proxy:             URL of the proxy server to use
 306     geo_verification_proxy:  URL of the proxy to use for IP address verification
 307                        on geo-restricted sites.
 308     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 309     bidi_workaround:   Work around buggy terminals without bidirectional text
 310                        support, using fridibi
 311     debug_printtraffic:Print out sent and received HTTP traffic
 312     include_ads:       Download ads as well
 313     default_search:    Prepend this string if an input url is not valid.
 314                        'auto' for elaborate guessing
 315     encoding:          Use this encoding instead of the system-specified.
 316     extract_flat:      Do not resolve URLs, return the immediate result.
 317                        Pass in 'in_playlist' to only show this behavior for
 318                        playlist items.
 319     postprocessors:    A list of dictionaries, each with an entry
 320                        * key:  The name of the postprocessor. See
 321                                yt_dlp/postprocessor/__init__.py for a list.
 322                        * when: When to run the postprocessor. Can be one of
 323                                pre_process|before_dl|post_process|after_move.
 324                                Assumed to be 'post_process' if not given
 325     post_hooks:        A list of functions that get called as the final step
 326                        for each video file, after all postprocessors have been
 327                        called. The filename will be passed as the only argument.
 328     progress_hooks:    A list of functions that get called on download
 329                        progress, with a dictionary with the entries
 330                        * status: One of "downloading", "error", or "finished".
 331                                  Check this first and ignore unknown values.
 332                        * info_dict: The extracted info_dict
 333
 334                        If status is one of "downloading", or "finished", the
 335                        following properties may also be present:
 336                        * filename: The final filename (always present)
 337                        * tmpfilename: The filename we're currently writing to
 338                        * downloaded_bytes: Bytes on disk
 339                        * total_bytes: Size of the whole file, None if unknown
 340                        * total_bytes_estimate: Guess of the eventual file size,
 341                                                None if unavailable.
 342                        * elapsed: The number of seconds since download started.
 343                        * eta: The estimated time in seconds, None if unknown
 344                        * speed: The download speed in bytes/second, None if
 345                                 unknown
 346                        * fragment_index: The counter of the currently
 347                                          downloaded video fragment.
 348                        * fragment_count: The number of fragments (= individual
 349                                          files that will be merged)
 350
 351                        Progress hooks are guaranteed to be called at least once
 352                        (with status "finished") if the download is successful.
 353     merge_output_format: Extension to use when merging formats.
 354     final_ext:         Expected final extension; used to detect when the file was
 355                        already downloaded and converted. "merge_output_format" is
 356                        replaced by this extension when given
 357     fixup:             Automatically correct known faults of the file.
 358                        One of:
 359                        - "never": do nothing
 360                        - "warn": only emit a warning
 361                        - "detect_or_warn": check whether we can do anything
 362                                            about it, warn otherwise (default)
 363     source_address:    Client-side IP address to bind to.
 364     call_home:         Boolean, true iff we are allowed to contact the
 365                        yt-dlp servers for debugging. (BROKEN)
 366     sleep_interval_requests: Number of seconds to sleep between requests
 367                        during extraction
 368     sleep_interval:    Number of seconds to sleep before each download when
 369                        used alone or a lower bound of a range for randomized
 370                        sleep before each download (minimum possible number
 371                        of seconds to sleep) when used along with
 372                        max_sleep_interval.
 373     max_sleep_interval:Upper bound of a range for randomized sleep before each
 374                        download (maximum possible number of seconds to sleep).
 375                        Must only be used along with sleep_interval.
 376                        Actual sleep time will be a random float from range
 377                        [sleep_interval; max_sleep_interval].
 378     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 379     listformats:       Print an overview of available video formats and exit.
 380     list_thumbnails:   Print a table of all thumbnails and exit.
 381     match_filter:      A function that gets called with the info_dict of
 382                        every video.
 383                        If it returns a message, the video is ignored.
 384                        If it returns None, the video is downloaded.
 385                        match_filter_func in utils.py is one example for this.
 386     no_color:          Do not emit color codes in output.
 387     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 388                        HTTP header
 389     geo_bypass_country:
 390                        Two-letter ISO 3166-2 country code that will be used for
 391                        explicit geographic restriction bypassing via faking
 392                        X-Forwarded-For HTTP header
 393     geo_bypass_ip_block:
 394                        IP range in CIDR notation that will be used similarly to
 395                        geo_bypass_country
 396
 397     The following options determine which downloader is picked:
 398     external_downloader: A dictionary of protocol keys and the executable of the
 399                        external downloader to use for it. The allowed protocols
 400                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 401                        Set the value to 'native' to use the native downloader
 402     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 403                        or {'m3u8': 'ffmpeg'} instead.
 404                        Use the native HLS downloader instead of ffmpeg/avconv
 405                        if True, otherwise use ffmpeg/avconv if False, otherwise
 406                        use downloader suggested by extractor if None.
 407     compat_opts:       Compatibility options. See "Differences in default behavior".
 408                        The following options do not work when used through the API:
 409                        filename, abort-on-error, multistreams, no-live-chat,
 410                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 411                        Refer __init__.py for their implementation
 412
 413     The following parameters are not used by YoutubeDL itself, they are used by
 414     the downloader (see yt_dlp/downloader/common.py):
 415     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 416     max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
 417     xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
 418
 419     The following options are used by the post processors:
 420     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 421                        otherwise prefer ffmpeg. (avconv support is deprecated)
 422     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 423                        to the binary or its containing directory.
 424     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 425                        and a list of additional command-line arguments for the
 426                        postprocessor/executable. The dict can also have "PP+EXE" keys
 427                        which are used when the given exe is used by the given PP.
 428                        Use 'default' as the name for arguments to passed to all PP
 429                        For compatibility with youtube-dl, a single list of args
 430                        can also be used
 431
 432     The following options are used by the extractors:
 433     extractor_retries: Number of times to retry for known errors
 434     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 435     hls_split_discontinuity: Split HLS playlists to different formats at
 436                        discontinuities such as ad breaks (default: False)
 437     extractor_args:    A dictionary of arguments to be passed to the extractors.
 438                        See "EXTRACTOR ARGUMENTS" for details.
 439                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 440     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 441                        If True (default), DASH manifests and related
 442                        data will be downloaded and processed by extractor.
 443                        You can reduce network I/O by disabling it if you don't
 444                        care about DASH. (only for youtube)
 445     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 446                        If True (default), HLS manifests and related
 447                        data will be downloaded and processed by extractor.
 448                        You can reduce network I/O by disabling it if you don't
 449                        care about HLS. (only for youtube)
 450     """
 451
 452     _NUMERIC_FIELDS = set((
 453         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 454         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 455         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 456         'average_rating', 'comment_count', 'age_limit',
 457         'start_time', 'end_time',
 458         'chapter_number', 'season_number', 'episode_number',
 459         'track_number', 'disc_number', 'release_year',
 460         'playlist_index',
 461     ))
 462
 463     params = None
 464     _ies = {}
 465     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 466     _printed_messages = set()
 467     _first_webpage_request = True
 468     _download_retcode = None
 469     _num_downloads = None
 470     _playlist_level = 0
 471     _playlist_urls = set()
 472     _screen_file = None
 473
 474     def __init__(self, params=None, auto_init=True):
 475         """Create a FileDownloader object with the given options."""
 476         if params is None:
 477             params = {}
 478         self._ies = {}
 479         self._ies_instances = {}
 480         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 481         self._printed_messages = set()
 482         self._first_webpage_request = True
 483         self._post_hooks = []
 484         self._progress_hooks = []
 485         self._download_retcode = 0
 486         self._num_downloads = 0
 487         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 488         self._err_file = sys.stderr
 489         self.params = {
 490             # Default parameters
 491             'nocheckcertificate': False,
 492         }
 493         self.params.update(params)
 494         self.cache = Cache(self)
 495
 496         if sys.version_info < (3, 6):
 497             self.report_warning(
 498                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 499
 500         if self.params.get('allow_unplayable_formats'):
 501             self.report_warning(
 502                 'You have asked for unplayable formats to be listed/downloaded. '
 503                 'This is a developer option intended for debugging. '
 504                 'If you experience any issues while using this option, DO NOT open a bug report')
 505
 506         def check_deprecated(param, option, suggestion):
 507             if self.params.get(param) is not None:
 508                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 509                 return True
 510             return False
 511
 512         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 513             if self.params.get('geo_verification_proxy') is None:
 514                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 515
 516         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 517         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 518         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 519
 520         for msg in self.params.get('warnings', []):
 521             self.report_warning(msg)
 522
 523         if self.params.get('overwrites') is None:
 524             self.params.pop('overwrites', None)
 525         elif self.params.get('nooverwrites') is not None:
 526             # nooverwrites was unnecessarily changed to overwrites
 527             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 528             # This ensures compatibility with both keys
 529             self.params['overwrites'] = not self.params['nooverwrites']
 530         else:
 531             self.params['nooverwrites'] = not self.params['overwrites']
 532
 533         if params.get('bidi_workaround', False):
 534             try:
 535                 import pty
 536                 master, slave = pty.openpty()
 537                 width = compat_get_terminal_size().columns
 538                 if width is None:
 539                     width_args = []
 540                 else:
 541                     width_args = ['-w', str(width)]
 542                 sp_kwargs = dict(
 543                     stdin=subprocess.PIPE,
 544                     stdout=slave,
 545                     stderr=self._err_file)
 546                 try:
 547                     self._output_process = subprocess.Popen(
 548                         ['bidiv'] + width_args, **sp_kwargs
 549                     )
 550                 except OSError:
 551                     self._output_process = subprocess.Popen(
 552                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 553                 self._output_channel = os.fdopen(master, 'rb')
 554             except OSError as ose:
 555                 if ose.errno == errno.ENOENT:
 556                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 557                 else:
 558                     raise
 559
 560         if (sys.platform != 'win32'
 561                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 562                 and not params.get('restrictfilenames', False)):
 563             # Unicode filesystem API will throw errors (#1474, #13027)
 564             self.report_warning(
 565                 'Assuming --restrict-filenames since file system encoding '
 566                 'cannot encode all characters. '
 567                 'Set the LC_ALL environment variable to fix this.')
 568             self.params['restrictfilenames'] = True
 569
 570         self.outtmpl_dict = self.parse_outtmpl()
 571
 572         # Creating format selector here allows us to catch syntax errors before the extraction
 573         self.format_selector = (
 574             None if self.params.get('format') is None
 575             else self.build_format_selector(self.params['format']))
 576
 577         self._setup_opener()
 578
 579         """Preload the archive, if any is specified"""
 580         def preload_download_archive(fn):
 581             if fn is None:
 582                 return False
 583             self.write_debug('Loading archive file %r\n' % fn)
 584             try:
 585                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 586                     for line in archive_file:
 587                         self.archive.add(line.strip())
 588             except IOError as ioe:
 589                 if ioe.errno != errno.ENOENT:
 590                     raise
 591                 return False
 592             return True
 593
 594         self.archive = set()
 595         preload_download_archive(self.params.get('download_archive'))
 596
 597         if auto_init:
 598             self.print_debug_header()
 599             self.add_default_info_extractors()
 600
 601         for pp_def_raw in self.params.get('postprocessors', []):
 602             pp_def = dict(pp_def_raw)
 603             when = pp_def.pop('when', 'post_process')
 604             pp_class = get_postprocessor(pp_def.pop('key'))
 605             pp = pp_class(self, **compat_kwargs(pp_def))
 606             self.add_post_processor(pp, when=when)
 607
 608         for ph in self.params.get('post_hooks', []):
 609             self.add_post_hook(ph)
 610
 611         for ph in self.params.get('progress_hooks', []):
 612             self.add_progress_hook(ph)
 613
 614         register_socks_protocols()
 615
 616     def warn_if_short_id(self, argv):
 617         # short YouTube ID starting with dash?
 618         idxs = [
 619             i for i, a in enumerate(argv)
 620             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 621         if idxs:
 622             correct_argv = (
 623                 ['yt-dlp']
 624                 + [a for i, a in enumerate(argv) if i not in idxs]
 625                 + ['--'] + [argv[i] for i in idxs]
 626             )
 627             self.report_warning(
 628                 'Long argument string detected. '
 629                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 630                 args_to_str(correct_argv))
 631
 632     def add_info_extractor(self, ie):
 633         """Add an InfoExtractor object to the end of the list."""
 634         ie_key = ie.ie_key()
 635         self._ies[ie_key] = ie
 636         if not isinstance(ie, type):
 637             self._ies_instances[ie_key] = ie
 638             ie.set_downloader(self)
 639
 640     def _get_info_extractor_class(self, ie_key):
 641         ie = self._ies.get(ie_key)
 642         if ie is None:
 643             ie = get_info_extractor(ie_key)
 644             self.add_info_extractor(ie)
 645         return ie
 646
 647     def get_info_extractor(self, ie_key):
 648         """
 649         Get an instance of an IE with name ie_key, it will try to get one from
 650         the _ies list, if there's no instance it will create a new one and add
 651         it to the extractor list.
 652         """
 653         ie = self._ies_instances.get(ie_key)
 654         if ie is None:
 655             ie = get_info_extractor(ie_key)()
 656             self.add_info_extractor(ie)
 657         return ie
 658
 659     def add_default_info_extractors(self):
 660         """
 661         Add the InfoExtractors returned by gen_extractors to the end of the list
 662         """
 663         for ie in gen_extractor_classes():
 664             self.add_info_extractor(ie)
 665
 666     def add_post_processor(self, pp, when='post_process'):
 667         """Add a PostProcessor object to the end of the chain."""
 668         self._pps[when].append(pp)
 669         pp.set_downloader(self)
 670
 671     def add_post_hook(self, ph):
 672         """Add the post hook"""
 673         self._post_hooks.append(ph)
 674
 675     def add_progress_hook(self, ph):
 676         """Add the progress hook (currently only for the file downloader)"""
 677         self._progress_hooks.append(ph)
 678
 679     def _bidi_workaround(self, message):
 680         if not hasattr(self, '_output_channel'):
 681             return message
 682
 683         assert hasattr(self, '_output_process')
 684         assert isinstance(message, compat_str)
 685         line_count = message.count('\n') + 1
 686         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 687         self._output_process.stdin.flush()
 688         res = ''.join(self._output_channel.readline().decode('utf-8')
 689                       for _ in range(line_count))
 690         return res[:-len('\n')]
 691
 692     def _write_string(self, message, out=None, only_once=False):
 693         if only_once:
 694             if message in self._printed_messages:
 695                 return
 696             self._printed_messages.add(message)
 697         write_string(message, out=out, encoding=self.params.get('encoding'))
 698
 699     def to_stdout(self, message, skip_eol=False, quiet=False):
 700         """Print message to stdout"""
 701         if self.params.get('logger'):
 702             self.params['logger'].debug(message)
 703         elif not quiet or self.params.get('verbose'):
 704             self._write_string(
 705                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 706                 self._err_file if quiet else self._screen_file)
 707
 708     def to_stderr(self, message, only_once=False):
 709         """Print message to stderr"""
 710         assert isinstance(message, compat_str)
 711         if self.params.get('logger'):
 712             self.params['logger'].error(message)
 713         else:
 714             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 715
 716     def to_console_title(self, message):
 717         if not self.params.get('consoletitle', False):
 718             return
 719         if compat_os_name == 'nt':
 720             if ctypes.windll.kernel32.GetConsoleWindow():
 721                 # c_wchar_p() might not be necessary if `message` is
 722                 # already of type unicode()
 723                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 724         elif 'TERM' in os.environ:
 725             self._write_string('\033]0;%s\007' % message, self._screen_file)
 726
 727     def save_console_title(self):
 728         if not self.params.get('consoletitle', False):
 729             return
 730         if self.params.get('simulate'):
 731             return
 732         if compat_os_name != 'nt' and 'TERM' in os.environ:
 733             # Save the title on stack
 734             self._write_string('\033[22;0t', self._screen_file)
 735
 736     def restore_console_title(self):
 737         if not self.params.get('consoletitle', False):
 738             return
 739         if self.params.get('simulate'):
 740             return
 741         if compat_os_name != 'nt' and 'TERM' in os.environ:
 742             # Restore the title from stack
 743             self._write_string('\033[23;0t', self._screen_file)
 744
 745     def __enter__(self):
 746         self.save_console_title()
 747         return self
 748
 749     def __exit__(self, *args):
 750         self.restore_console_title()
 751
 752         if self.params.get('cookiefile') is not None:
 753             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 754
 755     def trouble(self, message=None, tb=None):
 756         """Determine action to take when a download problem appears.
 757
 758         Depending on if the downloader has been configured to ignore
 759         download errors or not, this method may throw an exception or
 760         not when errors are found, after printing the message.
 761
 762         tb, if given, is additional traceback information.
 763         """
 764         if message is not None:
 765             self.to_stderr(message)
 766         if self.params.get('verbose'):
 767             if tb is None:
 768                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 769                     tb = ''
 770                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 771                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 772                     tb += encode_compat_str(traceback.format_exc())
 773                 else:
 774                     tb_data = traceback.format_list(traceback.extract_stack())
 775                     tb = ''.join(tb_data)
 776             if tb:
 777                 self.to_stderr(tb)
 778         if not self.params.get('ignoreerrors', False):
 779             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 780                 exc_info = sys.exc_info()[1].exc_info
 781             else:
 782                 exc_info = sys.exc_info()
 783             raise DownloadError(message, exc_info)
 784         self._download_retcode = 1
 785
 786     def to_screen(self, message, skip_eol=False):
 787         """Print message to stdout if not in quiet mode"""
 788         self.to_stdout(
 789             message, skip_eol, quiet=self.params.get('quiet', False))
 790
 791     def report_warning(self, message, only_once=False):
 792         '''
 793         Print the message to stderr, it will be prefixed with 'WARNING:'
 794         If stderr is a tty file the 'WARNING:' will be colored
 795         '''
 796         if self.params.get('logger') is not None:
 797             self.params['logger'].warning(message)
 798         else:
 799             if self.params.get('no_warnings'):
 800                 return
 801             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 802                 _msg_header = '\033[0;33mWARNING:\033[0m'
 803             else:
 804                 _msg_header = 'WARNING:'
 805             warning_message = '%s %s' % (_msg_header, message)
 806             self.to_stderr(warning_message, only_once)
 807
 808     def report_error(self, message, tb=None):
 809         '''
 810         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 811         in red if stderr is a tty file.
 812         '''
 813         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 814             _msg_header = '\033[0;31mERROR:\033[0m'
 815         else:
 816             _msg_header = 'ERROR:'
 817         error_message = '%s %s' % (_msg_header, message)
 818         self.trouble(error_message, tb)
 819
 820     def write_debug(self, message, only_once=False):
 821         '''Log debug message or Print message to stderr'''
 822         if not self.params.get('verbose', False):
 823             return
 824         message = '[debug] %s' % message
 825         if self.params.get('logger'):
 826             self.params['logger'].debug(message)
 827         else:
 828             self.to_stderr(message, only_once)
 829
 830     def report_file_already_downloaded(self, file_name):
 831         """Report file has already been fully downloaded."""
 832         try:
 833             self.to_screen('[download] %s has already been downloaded' % file_name)
 834         except UnicodeEncodeError:
 835             self.to_screen('[download] The file has already been downloaded')
 836
 837     def report_file_delete(self, file_name):
 838         """Report that existing file will be deleted."""
 839         try:
 840             self.to_screen('Deleting existing file %s' % file_name)
 841         except UnicodeEncodeError:
 842             self.to_screen('Deleting existing file')
 843
 844     def raise_no_formats(self, info, forced=False):
 845         has_drm = info.get('__has_drm')
 846         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 847         expected = self.params.get('ignore_no_formats_error')
 848         if forced or not expected:
 849             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 850                                  expected=has_drm or expected)
 851         else:
 852             self.report_warning(msg)
 853
 854     def parse_outtmpl(self):
 855         outtmpl_dict = self.params.get('outtmpl', {})
 856         if not isinstance(outtmpl_dict, dict):
 857             outtmpl_dict = {'default': outtmpl_dict}
 858         outtmpl_dict.update({
 859             k: v for k, v in DEFAULT_OUTTMPL.items()
 860             if not outtmpl_dict.get(k)})
 861         for key, val in outtmpl_dict.items():
 862             if isinstance(val, bytes):
 863                 self.report_warning(
 864                     'Parameter outtmpl is bytes, but should be a unicode string. '
 865                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 866         return outtmpl_dict
 867
 868     def get_output_path(self, dir_type='', filename=None):
 869         paths = self.params.get('paths', {})
 870         assert isinstance(paths, dict)
 871         path = os.path.join(
 872             expand_path(paths.get('home', '').strip()),
 873             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 874             filename or '')
 875
 876         # Temporary fix for #4787
 877         # 'Treat' all problem characters by passing filename through preferredencoding
 878         # to workaround encoding issues with subprocess on python2 @ Windows
 879         if sys.version_info < (3, 0) and sys.platform == 'win32':
 880             path = encodeFilename(path, True).decode(preferredencoding())
 881         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 882
 883     @staticmethod
 884     def _outtmpl_expandpath(outtmpl):
 885         # expand_path translates '%%' into '%' and '$$' into '$'
 886         # correspondingly that is not what we want since we need to keep
 887         # '%%' intact for template dict substitution step. Working around
 888         # with boundary-alike separator hack.
 889         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 890         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 891
 892         # outtmpl should be expand_path'ed before template dict substitution
 893         # because meta fields may contain env variables we don't want to
 894         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 895         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 896         return expand_path(outtmpl).replace(sep, '')
 897
 898     @staticmethod
 899     def escape_outtmpl(outtmpl):
 900         ''' Escape any remaining strings like %s, %abc% etc. '''
 901         return re.sub(
 902             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 903             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 904             outtmpl)
 905
 906     @classmethod
 907     def validate_outtmpl(cls, outtmpl):
 908         ''' @return None or Exception object '''
 909         outtmpl = re.sub(
 910             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'),
 911             lambda mobj: f'{mobj.group(0)[:-1]}s',
 912             cls._outtmpl_expandpath(outtmpl))
 913         try:
 914             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 915             return None
 916         except ValueError as err:
 917             return err
 918
 919     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 920         """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
 921         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 922
 923         info_dict = dict(info_dict)  # Do not sanitize so as not to consume LazyList
 924         for key in ('__original_infodict', '__postprocessors'):
 925             info_dict.pop(key, None)
 926         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 927             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 928             if info_dict.get('duration', None) is not None
 929             else None)
 930         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 931         if info_dict.get('resolution') is None:
 932             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 933
 934         # For fields playlist_index and autonumber convert all occurrences
 935         # of %(field)s to %(field)0Nd for backward compatibility
 936         field_size_compat_map = {
 937             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 938             'autonumber': self.params.get('autonumber_size') or 5,
 939         }
 940
 941         TMPL_DICT = {}
 942         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]'))
 943         MATH_FUNCTIONS = {
 944             '+': float.__add__,
 945             '-': float.__sub__,
 946         }
 947         # Field is of the form key1.key2...
 948         # where keys (except first) can be string, int or slice
 949         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 950         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 951         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 952         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 953             (?P<negate>-)?
 954             (?P<fields>{field})
 955             (?P<maths>(?:{math_op}{math_field})*)
 956             (?:>(?P<strf_format>.+?))?
 957             (?:\|(?P<default>.*?))?
 958             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
 959
 960         def _traverse_infodict(k):
 961             k = k.split('.')
 962             if k[0] == '':
 963                 k.pop(0)
 964             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
 965
 966         def get_value(mdict):
 967             # Object traversal
 968             value = _traverse_infodict(mdict['fields'])
 969             # Negative
 970             if mdict['negate']:
 971                 value = float_or_none(value)
 972                 if value is not None:
 973                     value *= -1
 974             # Do maths
 975             offset_key = mdict['maths']
 976             if offset_key:
 977                 value = float_or_none(value)
 978                 operator = None
 979                 while offset_key:
 980                     item = re.match(
 981                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
 982                         offset_key).group(0)
 983                     offset_key = offset_key[len(item):]
 984                     if operator is None:
 985                         operator = MATH_FUNCTIONS[item]
 986                         continue
 987                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 988                     offset = float_or_none(item)
 989                     if offset is None:
 990                         offset = float_or_none(_traverse_infodict(item))
 991                     try:
 992                         value = operator(value, multiplier * offset)
 993                     except (TypeError, ZeroDivisionError):
 994                         return None
 995                     operator = None
 996             # Datetime formatting
 997             if mdict['strf_format']:
 998                 value = strftime_or_none(value, mdict['strf_format'])
 999
1000             return value
1001
1002         na = self.params.get('outtmpl_na_placeholder', 'NA')
1003
1004         def _dumpjson_default(obj):
1005             if isinstance(obj, (set, LazyList)):
1006                 return list(obj)
1007             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1008
1009         def create_key(outer_mobj):
1010             if not outer_mobj.group('has_key'):
1011                 return f'%{outer_mobj.group(0)}'
1012             key = outer_mobj.group('key')
1013             mobj = re.match(INTERNAL_FORMAT_RE, key)
1014             if mobj is None:
1015                 value, default, mobj = None, na, {'fields': ''}
1016             else:
1017                 mobj = mobj.groupdict()
1018                 default = mobj['default'] if mobj['default'] is not None else na
1019                 value = get_value(mobj)
1020
1021             fmt = outer_mobj.group('format')
1022             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1023                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1024
1025             value = default if value is None else value
1026
1027             str_fmt = f'{fmt[:-1]}s'
1028             if fmt[-1] == 'l':
1029                 value, fmt = ', '.join(variadic(value)), str_fmt
1030             elif fmt[-1] == 'j':
1031                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1032             elif fmt[-1] == 'q':
1033                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1034             elif fmt[-1] == 'c':
1035                 value = str(value)
1036                 if value is None:
1037                     value, fmt = default, 's'
1038                 else:
1039                     value = value[0]
1040             elif fmt[-1] not in 'rs':  # numeric
1041                 value = float_or_none(value)
1042                 if value is None:
1043                     value, fmt = default, 's'
1044
1045             if sanitize:
1046                 if fmt[-1] == 'r':
1047                     # If value is an object, sanitize might convert it to a string
1048                     # So we convert it to repr first
1049                     value, fmt = repr(value), str_fmt
1050                 if fmt[-1] in 'csr':
1051                     value = sanitize(mobj['fields'].split('.')[-1], value)
1052
1053             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1054             TMPL_DICT[key] = value
1055             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1056
1057         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1058
1059     def _prepare_filename(self, info_dict, tmpl_type='default'):
1060         try:
1061             sanitize = lambda k, v: sanitize_filename(
1062                 compat_str(v),
1063                 restricted=self.params.get('restrictfilenames'),
1064                 is_id=(k == 'id' or k.endswith('_id')))
1065             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
1066             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
1067             outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
1068             filename = outtmpl % template_dict
1069
1070             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1071             if force_ext is not None:
1072                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1073
1074             # https://github.com/blackjack4494/youtube-dlc/issues/85
1075             trim_file_name = self.params.get('trim_file_name', False)
1076             if trim_file_name:
1077                 fn_groups = filename.rsplit('.')
1078                 ext = fn_groups[-1]
1079                 sub_ext = ''
1080                 if len(fn_groups) > 2:
1081                     sub_ext = fn_groups[-2]
1082                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1083
1084             return filename
1085         except ValueError as err:
1086             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1087             return None
1088
1089     def prepare_filename(self, info_dict, dir_type='', warn=False):
1090         """Generate the output filename."""
1091
1092         filename = self._prepare_filename(info_dict, dir_type or 'default')
1093
1094         if warn:
1095             if not self.params.get('paths'):
1096                 pass
1097             elif filename == '-':
1098                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1099             elif os.path.isabs(filename):
1100                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1101         if filename == '-' or not filename:
1102             return filename
1103
1104         return self.get_output_path(dir_type, filename)
1105
1106     def _match_entry(self, info_dict, incomplete=False, silent=False):
1107         """ Returns None if the file should be downloaded """
1108
1109         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1110
1111         def check_filter():
1112             if 'title' in info_dict:
1113                 # This can happen when we're just evaluating the playlist
1114                 title = info_dict['title']
1115                 matchtitle = self.params.get('matchtitle', False)
1116                 if matchtitle:
1117                     if not re.search(matchtitle, title, re.IGNORECASE):
1118                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1119                 rejecttitle = self.params.get('rejecttitle', False)
1120                 if rejecttitle:
1121                     if re.search(rejecttitle, title, re.IGNORECASE):
1122                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1123             date = info_dict.get('upload_date')
1124             if date is not None:
1125                 dateRange = self.params.get('daterange', DateRange())
1126                 if date not in dateRange:
1127                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1128             view_count = info_dict.get('view_count')
1129             if view_count is not None:
1130                 min_views = self.params.get('min_views')
1131                 if min_views is not None and view_count < min_views:
1132                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1133                 max_views = self.params.get('max_views')
1134                 if max_views is not None and view_count > max_views:
1135                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1136             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1137                 return 'Skipping "%s" because it is age restricted' % video_title
1138
1139             match_filter = self.params.get('match_filter')
1140             if match_filter is not None:
1141                 try:
1142                     ret = match_filter(info_dict, incomplete=incomplete)
1143                 except TypeError:
1144                     # For backward compatibility
1145                     ret = None if incomplete else match_filter(info_dict)
1146                 if ret is not None:
1147                     return ret
1148             return None
1149
1150         if self.in_download_archive(info_dict):
1151             reason = '%s has already been recorded in the archive' % video_title
1152             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1153         else:
1154             reason = check_filter()
1155             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1156         if reason is not None:
1157             if not silent:
1158                 self.to_screen('[download] ' + reason)
1159             if self.params.get(break_opt, False):
1160                 raise break_err()
1161         return reason
1162
1163     @staticmethod
1164     def add_extra_info(info_dict, extra_info):
1165         '''Set the keys from extra_info in info dict if they are missing'''
1166         for key, value in extra_info.items():
1167             info_dict.setdefault(key, value)
1168
1169     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1170                      process=True, force_generic_extractor=False):
1171         """
1172         Return a list with a dictionary for each video extracted.
1173
1174         Arguments:
1175         url -- URL to extract
1176
1177         Keyword arguments:
1178         download -- whether to download videos during extraction
1179         ie_key -- extractor key hint
1180         extra_info -- dictionary containing the extra values to add to each result
1181         process -- whether to resolve all unresolved references (URLs, playlist items),
1182             must be True for download to work.
1183         force_generic_extractor -- force using the generic extractor
1184         """
1185
1186         if extra_info is None:
1187             extra_info = {}
1188
1189         if not ie_key and force_generic_extractor:
1190             ie_key = 'Generic'
1191
1192         if ie_key:
1193             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1194         else:
1195             ies = self._ies
1196
1197         for ie_key, ie in ies.items():
1198             if not ie.suitable(url):
1199                 continue
1200
1201             if not ie.working():
1202                 self.report_warning('The program functionality for this site has been marked as broken, '
1203                                     'and will probably not work.')
1204
1205             temp_id = ie.get_temp_id(url)
1206             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1207                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1208                                ie_key, temp_id))
1209                 break
1210             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1211         else:
1212             self.report_error('no suitable InfoExtractor for URL %s' % url)
1213
1214     def __handle_extraction_exceptions(func):
1215
1216         def wrapper(self, *args, **kwargs):
1217             try:
1218                 return func(self, *args, **kwargs)
1219             except GeoRestrictedError as e:
1220                 msg = e.msg
1221                 if e.countries:
1222                     msg += '\nThis video is available in %s.' % ', '.join(
1223                         map(ISO3166Utils.short2full, e.countries))
1224                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1225                 self.report_error(msg)
1226             except ExtractorError as e:  # An error we somewhat expected
1227                 self.report_error(compat_str(e), e.format_traceback())
1228             except ThrottledDownload:
1229                 self.to_stderr('\r')
1230                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1231                 return wrapper(self, *args, **kwargs)
1232             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1233                 raise
1234             except Exception as e:
1235                 if self.params.get('ignoreerrors', False):
1236                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1237                 else:
1238                     raise
1239         return wrapper
1240
1241     @__handle_extraction_exceptions
1242     def __extract_info(self, url, ie, download, extra_info, process):
1243         ie_result = ie.extract(url)
1244         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1245             return
1246         if isinstance(ie_result, list):
1247             # Backwards compatibility: old IE result format
1248             ie_result = {
1249                 '_type': 'compat_list',
1250                 'entries': ie_result,
1251             }
1252         if extra_info.get('original_url'):
1253             ie_result.setdefault('original_url', extra_info['original_url'])
1254         self.add_default_extra_info(ie_result, ie, url)
1255         if process:
1256             return self.process_ie_result(ie_result, download, extra_info)
1257         else:
1258             return ie_result
1259
1260     def add_default_extra_info(self, ie_result, ie, url):
1261         if url is not None:
1262             self.add_extra_info(ie_result, {
1263                 'webpage_url': url,
1264                 'original_url': url,
1265                 'webpage_url_basename': url_basename(url),
1266             })
1267         if ie is not None:
1268             self.add_extra_info(ie_result, {
1269                 'extractor': ie.IE_NAME,
1270                 'extractor_key': ie.ie_key(),
1271             })
1272
1273     def process_ie_result(self, ie_result, download=True, extra_info=None):
1274         """
1275         Take the result of the ie(may be modified) and resolve all unresolved
1276         references (URLs, playlist items).
1277
1278         It will also download the videos if 'download'.
1279         Returns the resolved ie_result.
1280         """
1281         if extra_info is None:
1282             extra_info = {}
1283         result_type = ie_result.get('_type', 'video')
1284
1285         if result_type in ('url', 'url_transparent'):
1286             ie_result['url'] = sanitize_url(ie_result['url'])
1287             if ie_result.get('original_url'):
1288                 extra_info.setdefault('original_url', ie_result['original_url'])
1289
1290             extract_flat = self.params.get('extract_flat', False)
1291             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1292                     or extract_flat is True):
1293                 info_copy = ie_result.copy()
1294                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1295                 if not ie_result.get('id'):
1296                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1297                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1298                 self.add_extra_info(info_copy, extra_info)
1299                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1300                 if self.params.get('force_write_download_archive', False):
1301                     self.record_download_archive(info_copy)
1302                 return ie_result
1303
1304         if result_type == 'video':
1305             self.add_extra_info(ie_result, extra_info)
1306             ie_result = self.process_video_result(ie_result, download=download)
1307             additional_urls = (ie_result or {}).get('additional_urls')
1308             if additional_urls:
1309                 # TODO: Improve MetadataParserPP to allow setting a list
1310                 if isinstance(additional_urls, compat_str):
1311                     additional_urls = [additional_urls]
1312                 self.to_screen(
1313                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1314                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1315                 ie_result['additional_entries'] = [
1316                     self.extract_info(
1317                         url, download, extra_info,
1318                         force_generic_extractor=self.params.get('force_generic_extractor'))
1319                     for url in additional_urls
1320                 ]
1321             return ie_result
1322         elif result_type == 'url':
1323             # We have to add extra_info to the results because it may be
1324             # contained in a playlist
1325             return self.extract_info(
1326                 ie_result['url'], download,
1327                 ie_key=ie_result.get('ie_key'),
1328                 extra_info=extra_info)
1329         elif result_type == 'url_transparent':
1330             # Use the information from the embedding page
1331             info = self.extract_info(
1332                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1333                 extra_info=extra_info, download=False, process=False)
1334
1335             # extract_info may return None when ignoreerrors is enabled and
1336             # extraction failed with an error, don't crash and return early
1337             # in this case
1338             if not info:
1339                 return info
1340
1341             force_properties = dict(
1342                 (k, v) for k, v in ie_result.items() if v is not None)
1343             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1344                 if f in force_properties:
1345                     del force_properties[f]
1346             new_result = info.copy()
1347             new_result.update(force_properties)
1348
1349             # Extracted info may not be a video result (i.e.
1350             # info.get('_type', 'video') != video) but rather an url or
1351             # url_transparent. In such cases outer metadata (from ie_result)
1352             # should be propagated to inner one (info). For this to happen
1353             # _type of info should be overridden with url_transparent. This
1354             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1355             if new_result.get('_type') == 'url':
1356                 new_result['_type'] = 'url_transparent'
1357
1358             return self.process_ie_result(
1359                 new_result, download=download, extra_info=extra_info)
1360         elif result_type in ('playlist', 'multi_video'):
1361             # Protect from infinite recursion due to recursively nested playlists
1362             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1363             webpage_url = ie_result['webpage_url']
1364             if webpage_url in self._playlist_urls:
1365                 self.to_screen(
1366                     '[download] Skipping already downloaded playlist: %s'
1367                     % ie_result.get('title') or ie_result.get('id'))
1368                 return
1369
1370             self._playlist_level += 1
1371             self._playlist_urls.add(webpage_url)
1372             self._sanitize_thumbnails(ie_result)
1373             try:
1374                 return self.__process_playlist(ie_result, download)
1375             finally:
1376                 self._playlist_level -= 1
1377                 if not self._playlist_level:
1378                     self._playlist_urls.clear()
1379         elif result_type == 'compat_list':
1380             self.report_warning(
1381                 'Extractor %s returned a compat_list result. '
1382                 'It needs to be updated.' % ie_result.get('extractor'))
1383
1384             def _fixup(r):
1385                 self.add_extra_info(r, {
1386                     'extractor': ie_result['extractor'],
1387                     'webpage_url': ie_result['webpage_url'],
1388                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1389                     'extractor_key': ie_result['extractor_key'],
1390                 })
1391                 return r
1392             ie_result['entries'] = [
1393                 self.process_ie_result(_fixup(r), download, extra_info)
1394                 for r in ie_result['entries']
1395             ]
1396             return ie_result
1397         else:
1398             raise Exception('Invalid result type: %s' % result_type)
1399
1400     def _ensure_dir_exists(self, path):
1401         return make_dir(path, self.report_error)
1402
1403     def __process_playlist(self, ie_result, download):
1404         # We process each entry in the playlist
1405         playlist = ie_result.get('title') or ie_result.get('id')
1406         self.to_screen('[download] Downloading playlist: %s' % playlist)
1407
1408         if 'entries' not in ie_result:
1409             raise EntryNotInPlaylist()
1410         incomplete_entries = bool(ie_result.get('requested_entries'))
1411         if incomplete_entries:
1412             def fill_missing_entries(entries, indexes):
1413                 ret = [None] * max(*indexes)
1414                 for i, entry in zip(indexes, entries):
1415                     ret[i - 1] = entry
1416                 return ret
1417             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1418
1419         playlist_results = []
1420
1421         playliststart = self.params.get('playliststart', 1)
1422         playlistend = self.params.get('playlistend')
1423         # For backwards compatibility, interpret -1 as whole list
1424         if playlistend == -1:
1425             playlistend = None
1426
1427         playlistitems_str = self.params.get('playlist_items')
1428         playlistitems = None
1429         if playlistitems_str is not None:
1430             def iter_playlistitems(format):
1431                 for string_segment in format.split(','):
1432                     if '-' in string_segment:
1433                         start, end = string_segment.split('-')
1434                         for item in range(int(start), int(end) + 1):
1435                             yield int(item)
1436                     else:
1437                         yield int(string_segment)
1438             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1439
1440         ie_entries = ie_result['entries']
1441         msg = (
1442             'Downloading %d videos' if not isinstance(ie_entries, list)
1443             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1444
1445         if isinstance(ie_entries, list):
1446             def get_entry(i):
1447                 return ie_entries[i - 1]
1448         else:
1449             if not isinstance(ie_entries, PagedList):
1450                 ie_entries = LazyList(ie_entries)
1451
1452             def get_entry(i):
1453                 return YoutubeDL.__handle_extraction_exceptions(
1454                     lambda self, i: ie_entries[i - 1]
1455                 )(self, i)
1456
1457         entries = []
1458         for i in playlistitems or itertools.count(playliststart):
1459             if playlistitems is None and playlistend is not None and playlistend < i:
1460                 break
1461             entry = None
1462             try:
1463                 entry = get_entry(i)
1464                 if entry is None:
1465                     raise EntryNotInPlaylist()
1466             except (IndexError, EntryNotInPlaylist):
1467                 if incomplete_entries:
1468                     raise EntryNotInPlaylist()
1469                 elif not playlistitems:
1470                     break
1471             entries.append(entry)
1472             try:
1473                 if entry is not None:
1474                     self._match_entry(entry, incomplete=True, silent=True)
1475             except (ExistingVideoReached, RejectedVideoReached):
1476                 break
1477         ie_result['entries'] = entries
1478
1479         # Save playlist_index before re-ordering
1480         entries = [
1481             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1482             for i, entry in enumerate(entries, 1)
1483             if entry is not None]
1484         n_entries = len(entries)
1485
1486         if not playlistitems and (playliststart or playlistend):
1487             playlistitems = list(range(playliststart, playliststart + n_entries))
1488         ie_result['requested_entries'] = playlistitems
1489
1490         if self.params.get('allow_playlist_files', True):
1491             ie_copy = {
1492                 'playlist': playlist,
1493                 'playlist_id': ie_result.get('id'),
1494                 'playlist_title': ie_result.get('title'),
1495                 'playlist_uploader': ie_result.get('uploader'),
1496                 'playlist_uploader_id': ie_result.get('uploader_id'),
1497                 'playlist_index': 0,
1498             }
1499             ie_copy.update(dict(ie_result))
1500
1501             if self.params.get('writeinfojson', False):
1502                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1503                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1504                     return
1505                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1506                     self.to_screen('[info] Playlist metadata is already present')
1507                 else:
1508                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1509                     try:
1510                         write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1511                     except (OSError, IOError):
1512                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1513
1514             # TODO: This should be passed to ThumbnailsConvertor if necessary
1515             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1516
1517             if self.params.get('writedescription', False):
1518                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1519                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1520                     return
1521                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1522                     self.to_screen('[info] Playlist description is already present')
1523                 elif ie_result.get('description') is None:
1524                     self.report_warning('There\'s no playlist description to write.')
1525                 else:
1526                     try:
1527                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1528                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1529                             descfile.write(ie_result['description'])
1530                     except (OSError, IOError):
1531                         self.report_error('Cannot write playlist description file ' + descfn)
1532                         return
1533
1534         if self.params.get('playlistreverse', False):
1535             entries = entries[::-1]
1536         if self.params.get('playlistrandom', False):
1537             random.shuffle(entries)
1538
1539         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1540
1541         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1542         failures = 0
1543         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1544         for i, entry_tuple in enumerate(entries, 1):
1545             playlist_index, entry = entry_tuple
1546             if 'playlist-index' in self.params.get('compat_opts', []):
1547                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1548             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1549             # This __x_forwarded_for_ip thing is a bit ugly but requires
1550             # minimal changes
1551             if x_forwarded_for:
1552                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1553             extra = {
1554                 'n_entries': n_entries,
1555                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1556                 'playlist_index': playlist_index,
1557                 'playlist_autonumber': i,
1558                 'playlist': playlist,
1559                 'playlist_id': ie_result.get('id'),
1560                 'playlist_title': ie_result.get('title'),
1561                 'playlist_uploader': ie_result.get('uploader'),
1562                 'playlist_uploader_id': ie_result.get('uploader_id'),
1563                 'extractor': ie_result['extractor'],
1564                 'webpage_url': ie_result['webpage_url'],
1565                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1566                 'extractor_key': ie_result['extractor_key'],
1567             }
1568
1569             if self._match_entry(entry, incomplete=True) is not None:
1570                 continue
1571
1572             entry_result = self.__process_iterable_entry(entry, download, extra)
1573             if not entry_result:
1574                 failures += 1
1575             if failures >= max_failures:
1576                 self.report_error(
1577                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1578                 break
1579             # TODO: skip failed (empty) entries?
1580             playlist_results.append(entry_result)
1581         ie_result['entries'] = playlist_results
1582         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1583         return ie_result
1584
1585     @__handle_extraction_exceptions
1586     def __process_iterable_entry(self, entry, download, extra_info):
1587         return self.process_ie_result(
1588             entry, download=download, extra_info=extra_info)
1589
1590     def _build_format_filter(self, filter_spec):
1591         " Returns a function to filter the formats according to the filter_spec "
1592
1593         OPERATORS = {
1594             '<': operator.lt,
1595             '<=': operator.le,
1596             '>': operator.gt,
1597             '>=': operator.ge,
1598             '=': operator.eq,
1599             '!=': operator.ne,
1600         }
1601         operator_rex = re.compile(r'''(?x)\s*
1602             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1603             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1604             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1605             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1606         m = operator_rex.fullmatch(filter_spec)
1607         if m:
1608             try:
1609                 comparison_value = int(m.group('value'))
1610             except ValueError:
1611                 comparison_value = parse_filesize(m.group('value'))
1612                 if comparison_value is None:
1613                     comparison_value = parse_filesize(m.group('value') + 'B')
1614                 if comparison_value is None:
1615                     raise ValueError(
1616                         'Invalid value %r in format specification %r' % (
1617                             m.group('value'), filter_spec))
1618             op = OPERATORS[m.group('op')]
1619
1620         if not m:
1621             STR_OPERATORS = {
1622                 '=': operator.eq,
1623                 '^=': lambda attr, value: attr.startswith(value),
1624                 '$=': lambda attr, value: attr.endswith(value),
1625                 '*=': lambda attr, value: value in attr,
1626             }
1627             str_operator_rex = re.compile(r'''(?x)\s*
1628                 (?P<key>[a-zA-Z0-9._-]+)\s*
1629                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1630                 (?P<value>[a-zA-Z0-9._-]+)\s*
1631                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1632             m = str_operator_rex.fullmatch(filter_spec)
1633             if m:
1634                 comparison_value = m.group('value')
1635                 str_op = STR_OPERATORS[m.group('op')]
1636                 if m.group('negation'):
1637                     op = lambda attr, value: not str_op(attr, value)
1638                 else:
1639                     op = str_op
1640
1641         if not m:
1642             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1643
1644         def _filter(f):
1645             actual_value = f.get(m.group('key'))
1646             if actual_value is None:
1647                 return m.group('none_inclusive')
1648             return op(actual_value, comparison_value)
1649         return _filter
1650
1651     def _default_format_spec(self, info_dict, download=True):
1652
1653         def can_merge():
1654             merger = FFmpegMergerPP(self)
1655             return merger.available and merger.can_merge()
1656
1657         prefer_best = (
1658             not self.params.get('simulate')
1659             and download
1660             and (
1661                 not can_merge()
1662                 or info_dict.get('is_live', False)
1663                 or self.outtmpl_dict['default'] == '-'))
1664         compat = (
1665             prefer_best
1666             or self.params.get('allow_multiple_audio_streams', False)
1667             or 'format-spec' in self.params.get('compat_opts', []))
1668
1669         return (
1670             'best/bestvideo+bestaudio' if prefer_best
1671             else 'bestvideo*+bestaudio/best' if not compat
1672             else 'bestvideo+bestaudio/best')
1673
1674     def build_format_selector(self, format_spec):
1675         def syntax_error(note, start):
1676             message = (
1677                 'Invalid format specification: '
1678                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1679             return SyntaxError(message)
1680
1681         PICKFIRST = 'PICKFIRST'
1682         MERGE = 'MERGE'
1683         SINGLE = 'SINGLE'
1684         GROUP = 'GROUP'
1685         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1686
1687         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1688                                   'video': self.params.get('allow_multiple_video_streams', False)}
1689
1690         check_formats = self.params.get('check_formats')
1691
1692         def _parse_filter(tokens):
1693             filter_parts = []
1694             for type, string, start, _, _ in tokens:
1695                 if type == tokenize.OP and string == ']':
1696                     return ''.join(filter_parts)
1697                 else:
1698                     filter_parts.append(string)
1699
1700         def _remove_unused_ops(tokens):
1701             # Remove operators that we don't use and join them with the surrounding strings
1702             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1703             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1704             last_string, last_start, last_end, last_line = None, None, None, None
1705             for type, string, start, end, line in tokens:
1706                 if type == tokenize.OP and string == '[':
1707                     if last_string:
1708                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1709                         last_string = None
1710                     yield type, string, start, end, line
1711                     # everything inside brackets will be handled by _parse_filter
1712                     for type, string, start, end, line in tokens:
1713                         yield type, string, start, end, line
1714                         if type == tokenize.OP and string == ']':
1715                             break
1716                 elif type == tokenize.OP and string in ALLOWED_OPS:
1717                     if last_string:
1718                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1719                         last_string = None
1720                     yield type, string, start, end, line
1721                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1722                     if not last_string:
1723                         last_string = string
1724                         last_start = start
1725                         last_end = end
1726                     else:
1727                         last_string += string
1728             if last_string:
1729                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1730
1731         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1732             selectors = []
1733             current_selector = None
1734             for type, string, start, _, _ in tokens:
1735                 # ENCODING is only defined in python 3.x
1736                 if type == getattr(tokenize, 'ENCODING', None):
1737                     continue
1738                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1739                     current_selector = FormatSelector(SINGLE, string, [])
1740                 elif type == tokenize.OP:
1741                     if string == ')':
1742                         if not inside_group:
1743                             # ')' will be handled by the parentheses group
1744                             tokens.restore_last_token()
1745                         break
1746                     elif inside_merge and string in ['/', ',']:
1747                         tokens.restore_last_token()
1748                         break
1749                     elif inside_choice and string == ',':
1750                         tokens.restore_last_token()
1751                         break
1752                     elif string == ',':
1753                         if not current_selector:
1754                             raise syntax_error('"," must follow a format selector', start)
1755                         selectors.append(current_selector)
1756                         current_selector = None
1757                     elif string == '/':
1758                         if not current_selector:
1759                             raise syntax_error('"/" must follow a format selector', start)
1760                         first_choice = current_selector
1761                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1762                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1763                     elif string == '[':
1764                         if not current_selector:
1765                             current_selector = FormatSelector(SINGLE, 'best', [])
1766                         format_filter = _parse_filter(tokens)
1767                         current_selector.filters.append(format_filter)
1768                     elif string == '(':
1769                         if current_selector:
1770                             raise syntax_error('Unexpected "("', start)
1771                         group = _parse_format_selection(tokens, inside_group=True)
1772                         current_selector = FormatSelector(GROUP, group, [])
1773                     elif string == '+':
1774                         if not current_selector:
1775                             raise syntax_error('Unexpected "+"', start)
1776                         selector_1 = current_selector
1777                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1778                         if not selector_2:
1779                             raise syntax_error('Expected a selector', start)
1780                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1781                     else:
1782                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1783                 elif type == tokenize.ENDMARKER:
1784                     break
1785             if current_selector:
1786                 selectors.append(current_selector)
1787             return selectors
1788
1789         def _merge(formats_pair):
1790             format_1, format_2 = formats_pair
1791
1792             formats_info = []
1793             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1794             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1795
1796             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1797                 get_no_more = {'video': False, 'audio': False}
1798                 for (i, fmt_info) in enumerate(formats_info):
1799                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1800                         formats_info.pop(i)
1801                         continue
1802                     for aud_vid in ['audio', 'video']:
1803                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1804                             if get_no_more[aud_vid]:
1805                                 formats_info.pop(i)
1806                                 break
1807                             get_no_more[aud_vid] = True
1808
1809             if len(formats_info) == 1:
1810                 return formats_info[0]
1811
1812             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1813             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1814
1815             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1816             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1817
1818             output_ext = self.params.get('merge_output_format')
1819             if not output_ext:
1820                 if the_only_video:
1821                     output_ext = the_only_video['ext']
1822                 elif the_only_audio and not video_fmts:
1823                     output_ext = the_only_audio['ext']
1824                 else:
1825                     output_ext = 'mkv'
1826
1827             new_dict = {
1828                 'requested_formats': formats_info,
1829                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1830                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1831                 'ext': output_ext,
1832             }
1833
1834             if the_only_video:
1835                 new_dict.update({
1836                     'width': the_only_video.get('width'),
1837                     'height': the_only_video.get('height'),
1838                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1839                     'fps': the_only_video.get('fps'),
1840                     'vcodec': the_only_video.get('vcodec'),
1841                     'vbr': the_only_video.get('vbr'),
1842                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1843                 })
1844
1845             if the_only_audio:
1846                 new_dict.update({
1847                     'acodec': the_only_audio.get('acodec'),
1848                     'abr': the_only_audio.get('abr'),
1849                 })
1850
1851             return new_dict
1852
1853         def _check_formats(formats):
1854             if not check_formats:
1855                 yield from formats
1856                 return
1857             for f in formats:
1858                 self.to_screen('[info] Testing format %s' % f['format_id'])
1859                 temp_file = tempfile.NamedTemporaryFile(
1860                     suffix='.tmp', delete=False,
1861                     dir=self.get_output_path('temp') or None)
1862                 temp_file.close()
1863                 try:
1864                     success, _ = self.dl(temp_file.name, f, test=True)
1865                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1866                     success = False
1867                 finally:
1868                     if os.path.exists(temp_file.name):
1869                         try:
1870                             os.remove(temp_file.name)
1871                         except OSError:
1872                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1873                 if success:
1874                     yield f
1875                 else:
1876                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1877
1878         def _build_selector_function(selector):
1879             if isinstance(selector, list):  # ,
1880                 fs = [_build_selector_function(s) for s in selector]
1881
1882                 def selector_function(ctx):
1883                     for f in fs:
1884                         yield from f(ctx)
1885                 return selector_function
1886
1887             elif selector.type == GROUP:  # ()
1888                 selector_function = _build_selector_function(selector.selector)
1889
1890             elif selector.type == PICKFIRST:  # /
1891                 fs = [_build_selector_function(s) for s in selector.selector]
1892
1893                 def selector_function(ctx):
1894                     for f in fs:
1895                         picked_formats = list(f(ctx))
1896                         if picked_formats:
1897                             return picked_formats
1898                     return []
1899
1900             elif selector.type == MERGE:  # +
1901                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1902
1903                 def selector_function(ctx):
1904                     for pair in itertools.product(
1905                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1906                         yield _merge(pair)
1907
1908             elif selector.type == SINGLE:  # atom
1909                 format_spec = selector.selector or 'best'
1910
1911                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1912                 if format_spec == 'all':
1913                     def selector_function(ctx):
1914                         yield from _check_formats(ctx['formats'])
1915                 elif format_spec == 'mergeall':
1916                     def selector_function(ctx):
1917                         formats = list(_check_formats(ctx['formats']))
1918                         if not formats:
1919                             return
1920                         merged_format = formats[-1]
1921                         for f in formats[-2::-1]:
1922                             merged_format = _merge((merged_format, f))
1923                         yield merged_format
1924
1925                 else:
1926                     format_fallback, format_reverse, format_idx = False, True, 1
1927                     mobj = re.match(
1928                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1929                         format_spec)
1930                     if mobj is not None:
1931                         format_idx = int_or_none(mobj.group('n'), default=1)
1932                         format_reverse = mobj.group('bw')[0] == 'b'
1933                         format_type = (mobj.group('type') or [None])[0]
1934                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1935                         format_modified = mobj.group('mod') is not None
1936
1937                         format_fallback = not format_type and not format_modified  # for b, w
1938                         _filter_f = (
1939                             (lambda f: f.get('%scodec' % format_type) != 'none')
1940                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1941                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1942                             if format_type  # bv, ba, wv, wa
1943                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1944                             if not format_modified  # b, w
1945                             else lambda f: True)  # b*, w*
1946                         filter_f = lambda f: _filter_f(f) and (
1947                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1948                     else:
1949                         filter_f = ((lambda f: f.get('ext') == format_spec)
1950                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1951                                     else (lambda f: f.get('format_id') == format_spec))  # id
1952
1953                     def selector_function(ctx):
1954                         formats = list(ctx['formats'])
1955                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1956                         if format_fallback and ctx['incomplete_formats'] and not matches:
1957                             # for extractors with incomplete formats (audio only (soundcloud)
1958                             # or video only (imgur)) best/worst will fallback to
1959                             # best/worst {video,audio}-only format
1960                             matches = formats
1961                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1962                         try:
1963                             yield matches[format_idx - 1]
1964                         except IndexError:
1965                             return
1966
1967             filters = [self._build_format_filter(f) for f in selector.filters]
1968
1969             def final_selector(ctx):
1970                 ctx_copy = copy.deepcopy(ctx)
1971                 for _filter in filters:
1972                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1973                 return selector_function(ctx_copy)
1974             return final_selector
1975
1976         stream = io.BytesIO(format_spec.encode('utf-8'))
1977         try:
1978             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1979         except tokenize.TokenError:
1980             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1981
1982         class TokenIterator(object):
1983             def __init__(self, tokens):
1984                 self.tokens = tokens
1985                 self.counter = 0
1986
1987             def __iter__(self):
1988                 return self
1989
1990             def __next__(self):
1991                 if self.counter >= len(self.tokens):
1992                     raise StopIteration()
1993                 value = self.tokens[self.counter]
1994                 self.counter += 1
1995                 return value
1996
1997             next = __next__
1998
1999             def restore_last_token(self):
2000                 self.counter -= 1
2001
2002         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2003         return _build_selector_function(parsed_selector)
2004
2005     def _calc_headers(self, info_dict):
2006         res = std_headers.copy()
2007
2008         add_headers = info_dict.get('http_headers')
2009         if add_headers:
2010             res.update(add_headers)
2011
2012         cookies = self._calc_cookies(info_dict)
2013         if cookies:
2014             res['Cookie'] = cookies
2015
2016         if 'X-Forwarded-For' not in res:
2017             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2018             if x_forwarded_for_ip:
2019                 res['X-Forwarded-For'] = x_forwarded_for_ip
2020
2021         return res
2022
2023     def _calc_cookies(self, info_dict):
2024         pr = sanitized_Request(info_dict['url'])
2025         self.cookiejar.add_cookie_header(pr)
2026         return pr.get_header('Cookie')
2027
2028     def _sanitize_thumbnails(self, info_dict):
2029         thumbnails = info_dict.get('thumbnails')
2030         if thumbnails is None:
2031             thumbnail = info_dict.get('thumbnail')
2032             if thumbnail:
2033                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2034         if thumbnails:
2035             thumbnails.sort(key=lambda t: (
2036                 t.get('preference') if t.get('preference') is not None else -1,
2037                 t.get('width') if t.get('width') is not None else -1,
2038                 t.get('height') if t.get('height') is not None else -1,
2039                 t.get('id') if t.get('id') is not None else '',
2040                 t.get('url')))
2041
2042             def thumbnail_tester():
2043                 if self.params.get('check_formats'):
2044                     test_all = True
2045                     to_screen = lambda msg: self.to_screen(f'[info] {msg}')
2046                 else:
2047                     test_all = False
2048                     to_screen = self.write_debug
2049
2050                 def test_thumbnail(t):
2051                     if not test_all and not t.get('_test_url'):
2052                         return True
2053                     to_screen('Testing thumbnail %s' % t['id'])
2054                     try:
2055                         self.urlopen(HEADRequest(t['url']))
2056                     except network_exceptions as err:
2057                         to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2058                             t['id'], t['url'], error_to_compat_str(err)))
2059                         return False
2060                     return True
2061
2062                 return test_thumbnail
2063
2064             for i, t in enumerate(thumbnails):
2065                 if t.get('id') is None:
2066                     t['id'] = '%d' % i
2067                 if t.get('width') and t.get('height'):
2068                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2069                 t['url'] = sanitize_url(t['url'])
2070
2071             if self.params.get('check_formats') is not False:
2072                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2073             else:
2074                 info_dict['thumbnails'] = thumbnails
2075
2076     def process_video_result(self, info_dict, download=True):
2077         assert info_dict.get('_type', 'video') == 'video'
2078
2079         if 'id' not in info_dict:
2080             raise ExtractorError('Missing "id" field in extractor result')
2081         if 'title' not in info_dict:
2082             raise ExtractorError('Missing "title" field in extractor result',
2083                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2084
2085         def report_force_conversion(field, field_not, conversion):
2086             self.report_warning(
2087                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2088                 % (field, field_not, conversion))
2089
2090         def sanitize_string_field(info, string_field):
2091             field = info.get(string_field)
2092             if field is None or isinstance(field, compat_str):
2093                 return
2094             report_force_conversion(string_field, 'a string', 'string')
2095             info[string_field] = compat_str(field)
2096
2097         def sanitize_numeric_fields(info):
2098             for numeric_field in self._NUMERIC_FIELDS:
2099                 field = info.get(numeric_field)
2100                 if field is None or isinstance(field, compat_numeric_types):
2101                     continue
2102                 report_force_conversion(numeric_field, 'numeric', 'int')
2103                 info[numeric_field] = int_or_none(field)
2104
2105         sanitize_string_field(info_dict, 'id')
2106         sanitize_numeric_fields(info_dict)
2107
2108         if 'playlist' not in info_dict:
2109             # It isn't part of a playlist
2110             info_dict['playlist'] = None
2111             info_dict['playlist_index'] = None
2112
2113         self._sanitize_thumbnails(info_dict)
2114
2115         thumbnail = info_dict.get('thumbnail')
2116         thumbnails = info_dict.get('thumbnails')
2117         if thumbnail:
2118             info_dict['thumbnail'] = sanitize_url(thumbnail)
2119         elif thumbnails:
2120             info_dict['thumbnail'] = thumbnails[-1]['url']
2121
2122         if info_dict.get('display_id') is None and 'id' in info_dict:
2123             info_dict['display_id'] = info_dict['id']
2124
2125         for ts_key, date_key in (
2126                 ('timestamp', 'upload_date'),
2127                 ('release_timestamp', 'release_date'),
2128         ):
2129             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2130                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2131                 # see http://bugs.python.org/issue1646728)
2132                 try:
2133                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2134                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2135                 except (ValueError, OverflowError, OSError):
2136                     pass
2137
2138         live_keys = ('is_live', 'was_live')
2139         live_status = info_dict.get('live_status')
2140         if live_status is None:
2141             for key in live_keys:
2142                 if info_dict.get(key) is False:
2143                     continue
2144                 if info_dict.get(key):
2145                     live_status = key
2146                 break
2147             if all(info_dict.get(key) is False for key in live_keys):
2148                 live_status = 'not_live'
2149         if live_status:
2150             info_dict['live_status'] = live_status
2151             for key in live_keys:
2152                 if info_dict.get(key) is None:
2153                     info_dict[key] = (live_status == key)
2154
2155         # Auto generate title fields corresponding to the *_number fields when missing
2156         # in order to always have clean titles. This is very common for TV series.
2157         for field in ('chapter', 'season', 'episode'):
2158             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2159                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2160
2161         for cc_kind in ('subtitles', 'automatic_captions'):
2162             cc = info_dict.get(cc_kind)
2163             if cc:
2164                 for _, subtitle in cc.items():
2165                     for subtitle_format in subtitle:
2166                         if subtitle_format.get('url'):
2167                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2168                         if subtitle_format.get('ext') is None:
2169                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2170
2171         automatic_captions = info_dict.get('automatic_captions')
2172         subtitles = info_dict.get('subtitles')
2173
2174         info_dict['requested_subtitles'] = self.process_subtitles(
2175             info_dict['id'], subtitles, automatic_captions)
2176
2177         # We now pick which formats have to be downloaded
2178         if info_dict.get('formats') is None:
2179             # There's only one format available
2180             formats = [info_dict]
2181         else:
2182             formats = info_dict['formats']
2183
2184         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2185         if not self.params.get('allow_unplayable_formats'):
2186             formats = [f for f in formats if not f.get('has_drm')]
2187
2188         if not formats:
2189             self.raise_no_formats(info_dict)
2190
2191         def is_wellformed(f):
2192             url = f.get('url')
2193             if not url:
2194                 self.report_warning(
2195                     '"url" field is missing or empty - skipping format, '
2196                     'there is an error in extractor')
2197                 return False
2198             if isinstance(url, bytes):
2199                 sanitize_string_field(f, 'url')
2200             return True
2201
2202         # Filter out malformed formats for better extraction robustness
2203         formats = list(filter(is_wellformed, formats))
2204
2205         formats_dict = {}
2206
2207         # We check that all the formats have the format and format_id fields
2208         for i, format in enumerate(formats):
2209             sanitize_string_field(format, 'format_id')
2210             sanitize_numeric_fields(format)
2211             format['url'] = sanitize_url(format['url'])
2212             if not format.get('format_id'):
2213                 format['format_id'] = compat_str(i)
2214             else:
2215                 # Sanitize format_id from characters used in format selector expression
2216                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2217             format_id = format['format_id']
2218             if format_id not in formats_dict:
2219                 formats_dict[format_id] = []
2220             formats_dict[format_id].append(format)
2221
2222         # Make sure all formats have unique format_id
2223         for format_id, ambiguous_formats in formats_dict.items():
2224             if len(ambiguous_formats) > 1:
2225                 for i, format in enumerate(ambiguous_formats):
2226                     format['format_id'] = '%s-%d' % (format_id, i)
2227
2228         for i, format in enumerate(formats):
2229             if format.get('format') is None:
2230                 format['format'] = '{id} - {res}{note}'.format(
2231                     id=format['format_id'],
2232                     res=self.format_resolution(format),
2233                     note=format_field(format, 'format_note', ' (%s)'),
2234                 )
2235             # Automatically determine file extension if missing
2236             if format.get('ext') is None:
2237                 format['ext'] = determine_ext(format['url']).lower()
2238             # Automatically determine protocol if missing (useful for format
2239             # selection purposes)
2240             if format.get('protocol') is None:
2241                 format['protocol'] = determine_protocol(format)
2242             # Add HTTP headers, so that external programs can use them from the
2243             # json output
2244             full_format_info = info_dict.copy()
2245             full_format_info.update(format)
2246             format['http_headers'] = self._calc_headers(full_format_info)
2247         # Remove private housekeeping stuff
2248         if '__x_forwarded_for_ip' in info_dict:
2249             del info_dict['__x_forwarded_for_ip']
2250
2251         # TODO Central sorting goes here
2252
2253         if not formats or formats[0] is not info_dict:
2254             # only set the 'formats' fields if the original info_dict list them
2255             # otherwise we end up with a circular reference, the first (and unique)
2256             # element in the 'formats' field in info_dict is info_dict itself,
2257             # which can't be exported to json
2258             info_dict['formats'] = formats
2259
2260         info_dict, _ = self.pre_process(info_dict)
2261
2262         if self.params.get('list_thumbnails'):
2263             self.list_thumbnails(info_dict)
2264         if self.params.get('listformats'):
2265             if not info_dict.get('formats') and not info_dict.get('url'):
2266                 self.to_screen('%s has no formats' % info_dict['id'])
2267             else:
2268                 self.list_formats(info_dict)
2269         if self.params.get('listsubtitles'):
2270             if 'automatic_captions' in info_dict:
2271                 self.list_subtitles(
2272                     info_dict['id'], automatic_captions, 'automatic captions')
2273             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2274         list_only = self.params.get('simulate') is None and (
2275             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2276         if list_only:
2277             # Without this printing, -F --print-json will not work
2278             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2279             return
2280
2281         format_selector = self.format_selector
2282         if format_selector is None:
2283             req_format = self._default_format_spec(info_dict, download=download)
2284             self.write_debug('Default format spec: %s' % req_format)
2285             format_selector = self.build_format_selector(req_format)
2286
2287         # While in format selection we may need to have an access to the original
2288         # format set in order to calculate some metrics or do some processing.
2289         # For now we need to be able to guess whether original formats provided
2290         # by extractor are incomplete or not (i.e. whether extractor provides only
2291         # video-only or audio-only formats) for proper formats selection for
2292         # extractors with such incomplete formats (see
2293         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2294         # Since formats may be filtered during format selection and may not match
2295         # the original formats the results may be incorrect. Thus original formats
2296         # or pre-calculated metrics should be passed to format selection routines
2297         # as well.
2298         # We will pass a context object containing all necessary additional data
2299         # instead of just formats.
2300         # This fixes incorrect format selection issue (see
2301         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2302         incomplete_formats = (
2303             # All formats are video-only or
2304             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2305             # all formats are audio-only
2306             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2307
2308         ctx = {
2309             'formats': formats,
2310             'incomplete_formats': incomplete_formats,
2311         }
2312
2313         formats_to_download = list(format_selector(ctx))
2314         if not formats_to_download:
2315             if not self.params.get('ignore_no_formats_error'):
2316                 raise ExtractorError('Requested format is not available', expected=True,
2317                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2318             else:
2319                 self.report_warning('Requested format is not available')
2320                 # Process what we can, even without any available formats.
2321                 self.process_info(dict(info_dict))
2322         elif download:
2323             self.to_screen(
2324                 '[info] %s: Downloading %d format(s): %s' % (
2325                     info_dict['id'], len(formats_to_download),
2326                     ", ".join([f['format_id'] for f in formats_to_download])))
2327             for fmt in formats_to_download:
2328                 new_info = dict(info_dict)
2329                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2330                 new_info['__original_infodict'] = info_dict
2331                 new_info.update(fmt)
2332                 self.process_info(new_info)
2333         # We update the info dict with the best quality format (backwards compatibility)
2334         if formats_to_download:
2335             info_dict.update(formats_to_download[-1])
2336         return info_dict
2337
2338     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2339         """Select the requested subtitles and their format"""
2340         available_subs = {}
2341         if normal_subtitles and self.params.get('writesubtitles'):
2342             available_subs.update(normal_subtitles)
2343         if automatic_captions and self.params.get('writeautomaticsub'):
2344             for lang, cap_info in automatic_captions.items():
2345                 if lang not in available_subs:
2346                     available_subs[lang] = cap_info
2347
2348         if (not self.params.get('writesubtitles') and not
2349                 self.params.get('writeautomaticsub') or not
2350                 available_subs):
2351             return None
2352
2353         all_sub_langs = available_subs.keys()
2354         if self.params.get('allsubtitles', False):
2355             requested_langs = all_sub_langs
2356         elif self.params.get('subtitleslangs', False):
2357             requested_langs = set()
2358             for lang in self.params.get('subtitleslangs'):
2359                 if lang == 'all':
2360                     requested_langs.update(all_sub_langs)
2361                     continue
2362                 discard = lang[0] == '-'
2363                 if discard:
2364                     lang = lang[1:]
2365                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2366                 if discard:
2367                     for lang in current_langs:
2368                         requested_langs.discard(lang)
2369                 else:
2370                     requested_langs.update(current_langs)
2371         elif 'en' in available_subs:
2372             requested_langs = ['en']
2373         else:
2374             requested_langs = [list(all_sub_langs)[0]]
2375         if requested_langs:
2376             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2377
2378         formats_query = self.params.get('subtitlesformat', 'best')
2379         formats_preference = formats_query.split('/') if formats_query else []
2380         subs = {}
2381         for lang in requested_langs:
2382             formats = available_subs.get(lang)
2383             if formats is None:
2384                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2385                 continue
2386             for ext in formats_preference:
2387                 if ext == 'best':
2388                     f = formats[-1]
2389                     break
2390                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2391                 if matches:
2392                     f = matches[-1]
2393                     break
2394             else:
2395                 f = formats[-1]
2396                 self.report_warning(
2397                     'No subtitle format found matching "%s" for language %s, '
2398                     'using %s' % (formats_query, lang, f['ext']))
2399             subs[lang] = f
2400         return subs
2401
2402     def __forced_printings(self, info_dict, filename, incomplete):
2403         def print_mandatory(field, actual_field=None):
2404             if actual_field is None:
2405                 actual_field = field
2406             if (self.params.get('force%s' % field, False)
2407                     and (not incomplete or info_dict.get(actual_field) is not None)):
2408                 self.to_stdout(info_dict[actual_field])
2409
2410         def print_optional(field):
2411             if (self.params.get('force%s' % field, False)
2412                     and info_dict.get(field) is not None):
2413                 self.to_stdout(info_dict[field])
2414
2415         info_dict = info_dict.copy()
2416         if filename is not None:
2417             info_dict['filename'] = filename
2418         if info_dict.get('requested_formats') is not None:
2419             # For RTMP URLs, also include the playpath
2420             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2421         elif 'url' in info_dict:
2422             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2423
2424         if self.params.get('forceprint') or self.params.get('forcejson'):
2425             self.post_extract(info_dict)
2426         for tmpl in self.params.get('forceprint', []):
2427             if re.match(r'\w+$', tmpl):
2428                 tmpl = '%({})s'.format(tmpl)
2429             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2430             self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
2431
2432         print_mandatory('title')
2433         print_mandatory('id')
2434         print_mandatory('url', 'urls')
2435         print_optional('thumbnail')
2436         print_optional('description')
2437         print_optional('filename')
2438         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2439             self.to_stdout(formatSeconds(info_dict['duration']))
2440         print_mandatory('format')
2441
2442         if self.params.get('forcejson'):
2443             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2444
2445     def dl(self, name, info, subtitle=False, test=False):
2446         if not info.get('url'):
2447             self.raise_no_formats(info, True)
2448
2449         if test:
2450             verbose = self.params.get('verbose')
2451             params = {
2452                 'test': True,
2453                 'quiet': not verbose,
2454                 'verbose': verbose,
2455                 'noprogress': not verbose,
2456                 'nopart': True,
2457                 'skip_unavailable_fragments': False,
2458                 'keep_fragments': False,
2459                 'overwrites': True,
2460                 '_no_ytdl_file': True,
2461             }
2462         else:
2463             params = self.params
2464         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2465         if not test:
2466             for ph in self._progress_hooks:
2467                 fd.add_progress_hook(ph)
2468             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2469             self.write_debug('Invoking downloader on "%s"' % urls)
2470         new_info = dict(info)
2471         if new_info.get('http_headers') is None:
2472             new_info['http_headers'] = self._calc_headers(new_info)
2473         return fd.download(name, new_info, subtitle)
2474
2475     def process_info(self, info_dict):
2476         """Process a single resolved IE result."""
2477
2478         assert info_dict.get('_type', 'video') == 'video'
2479
2480         max_downloads = self.params.get('max_downloads')
2481         if max_downloads is not None:
2482             if self._num_downloads >= int(max_downloads):
2483                 raise MaxDownloadsReached()
2484
2485         # TODO: backward compatibility, to be removed
2486         info_dict['fulltitle'] = info_dict['title']
2487
2488         if 'format' not in info_dict and 'ext' in info_dict:
2489             info_dict['format'] = info_dict['ext']
2490
2491         if self._match_entry(info_dict) is not None:
2492             return
2493
2494         self.post_extract(info_dict)
2495         self._num_downloads += 1
2496
2497         # info_dict['_filename'] needs to be set for backward compatibility
2498         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2499         temp_filename = self.prepare_filename(info_dict, 'temp')
2500         files_to_move = {}
2501
2502         # Forced printings
2503         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2504
2505         if self.params.get('simulate'):
2506             if self.params.get('force_write_download_archive', False):
2507                 self.record_download_archive(info_dict)
2508
2509             # Do nothing else if in simulate mode
2510             return
2511
2512         if full_filename is None:
2513             return
2514
2515         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2516             return
2517         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2518             return
2519
2520         if self.params.get('writedescription', False):
2521             descfn = self.prepare_filename(info_dict, 'description')
2522             if not self._ensure_dir_exists(encodeFilename(descfn)):
2523                 return
2524             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2525                 self.to_screen('[info] Video description is already present')
2526             elif info_dict.get('description') is None:
2527                 self.report_warning('There\'s no description to write.')
2528             else:
2529                 try:
2530                     self.to_screen('[info] Writing video description to: ' + descfn)
2531                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2532                         descfile.write(info_dict['description'])
2533                 except (OSError, IOError):
2534                     self.report_error('Cannot write description file ' + descfn)
2535                     return
2536
2537         if self.params.get('writeannotations', False):
2538             annofn = self.prepare_filename(info_dict, 'annotation')
2539             if not self._ensure_dir_exists(encodeFilename(annofn)):
2540                 return
2541             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2542                 self.to_screen('[info] Video annotations are already present')
2543             elif not info_dict.get('annotations'):
2544                 self.report_warning('There are no annotations to write.')
2545             else:
2546                 try:
2547                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2548                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2549                         annofile.write(info_dict['annotations'])
2550                 except (KeyError, TypeError):
2551                     self.report_warning('There are no annotations to write.')
2552                 except (OSError, IOError):
2553                     self.report_error('Cannot write annotations file: ' + annofn)
2554                     return
2555
2556         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2557                                        self.params.get('writeautomaticsub')])
2558
2559         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2560             # subtitles download errors are already managed as troubles in relevant IE
2561             # that way it will silently go on when used with unsupporting IE
2562             subtitles = info_dict['requested_subtitles']
2563             # ie = self.get_info_extractor(info_dict['extractor_key'])
2564             for sub_lang, sub_info in subtitles.items():
2565                 sub_format = sub_info['ext']
2566                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2567                 sub_filename_final = subtitles_filename(
2568                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2569                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2570                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2571                     sub_info['filepath'] = sub_filename
2572                     files_to_move[sub_filename] = sub_filename_final
2573                 else:
2574                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2575                     if sub_info.get('data') is not None:
2576                         try:
2577                             # Use newline='' to prevent conversion of newline characters
2578                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2579                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2580                                 subfile.write(sub_info['data'])
2581                             sub_info['filepath'] = sub_filename
2582                             files_to_move[sub_filename] = sub_filename_final
2583                         except (OSError, IOError):
2584                             self.report_error('Cannot write subtitles file ' + sub_filename)
2585                             return
2586                     else:
2587                         try:
2588                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2589                             sub_info['filepath'] = sub_filename
2590                             files_to_move[sub_filename] = sub_filename_final
2591                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2592                             self.report_warning('Unable to download subtitle for "%s": %s' %
2593                                                 (sub_lang, error_to_compat_str(err)))
2594                             continue
2595
2596         if self.params.get('writeinfojson', False):
2597             infofn = self.prepare_filename(info_dict, 'infojson')
2598             if not self._ensure_dir_exists(encodeFilename(infofn)):
2599                 return
2600             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2601                 self.to_screen('[info] Video metadata is already present')
2602             else:
2603                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2604                 try:
2605                     write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2606                 except (OSError, IOError):
2607                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2608                     return
2609             info_dict['__infojson_filename'] = infofn
2610
2611         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2612             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2613             thumb_filename = replace_extension(
2614                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2615             files_to_move[thumb_filename_temp] = thumb_filename
2616
2617         # Write internet shortcut files
2618         url_link = webloc_link = desktop_link = False
2619         if self.params.get('writelink', False):
2620             if sys.platform == "darwin":  # macOS.
2621                 webloc_link = True
2622             elif sys.platform.startswith("linux"):
2623                 desktop_link = True
2624             else:  # if sys.platform in ['win32', 'cygwin']:
2625                 url_link = True
2626         if self.params.get('writeurllink', False):
2627             url_link = True
2628         if self.params.get('writewebloclink', False):
2629             webloc_link = True
2630         if self.params.get('writedesktoplink', False):
2631             desktop_link = True
2632
2633         if url_link or webloc_link or desktop_link:
2634             if 'webpage_url' not in info_dict:
2635                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2636                 return
2637             ascii_url = iri_to_uri(info_dict['webpage_url'])
2638
2639         def _write_link_file(extension, template, newline, embed_filename):
2640             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2641             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2642                 self.to_screen('[info] Internet shortcut is already present')
2643             else:
2644                 try:
2645                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2646                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2647                         template_vars = {'url': ascii_url}
2648                         if embed_filename:
2649                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2650                         linkfile.write(template % template_vars)
2651                 except (OSError, IOError):
2652                     self.report_error('Cannot write internet shortcut ' + linkfn)
2653                     return False
2654             return True
2655
2656         if url_link:
2657             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2658                 return
2659         if webloc_link:
2660             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2661                 return
2662         if desktop_link:
2663             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2664                 return
2665
2666         try:
2667             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2668         except PostProcessingError as err:
2669             self.report_error('Preprocessing: %s' % str(err))
2670             return
2671
2672         must_record_download_archive = False
2673         if self.params.get('skip_download', False):
2674             info_dict['filepath'] = temp_filename
2675             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2676             info_dict['__files_to_move'] = files_to_move
2677             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2678         else:
2679             # Download
2680             info_dict.setdefault('__postprocessors', [])
2681             try:
2682
2683                 def existing_file(*filepaths):
2684                     ext = info_dict.get('ext')
2685                     final_ext = self.params.get('final_ext', ext)
2686                     existing_files = []
2687                     for file in orderedSet(filepaths):
2688                         if final_ext != ext:
2689                             converted = replace_extension(file, final_ext, ext)
2690                             if os.path.exists(encodeFilename(converted)):
2691                                 existing_files.append(converted)
2692                         if os.path.exists(encodeFilename(file)):
2693                             existing_files.append(file)
2694
2695                     if not existing_files or self.params.get('overwrites', False):
2696                         for file in orderedSet(existing_files):
2697                             self.report_file_delete(file)
2698                             os.remove(encodeFilename(file))
2699                         return None
2700
2701                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2702                     return existing_files[0]
2703
2704                 success = True
2705                 if info_dict.get('requested_formats') is not None:
2706
2707                     def compatible_formats(formats):
2708                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2709                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2710                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2711                         if len(video_formats) > 2 or len(audio_formats) > 2:
2712                             return False
2713
2714                         # Check extension
2715                         exts = set(format.get('ext') for format in formats)
2716                         COMPATIBLE_EXTS = (
2717                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2718                             set(('webm',)),
2719                         )
2720                         for ext_sets in COMPATIBLE_EXTS:
2721                             if ext_sets.issuperset(exts):
2722                                 return True
2723                         # TODO: Check acodec/vcodec
2724                         return False
2725
2726                     requested_formats = info_dict['requested_formats']
2727                     old_ext = info_dict['ext']
2728                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2729                         info_dict['ext'] = 'mkv'
2730                         self.report_warning(
2731                             'Requested formats are incompatible for merge and will be merged into mkv.')
2732                     new_ext = info_dict['ext']
2733
2734                     def correct_ext(filename, ext=new_ext):
2735                         if filename == '-':
2736                             return filename
2737                         filename_real_ext = os.path.splitext(filename)[1][1:]
2738                         filename_wo_ext = (
2739                             os.path.splitext(filename)[0]
2740                             if filename_real_ext in (old_ext, new_ext)
2741                             else filename)
2742                         return '%s.%s' % (filename_wo_ext, ext)
2743
2744                     # Ensure filename always has a correct extension for successful merge
2745                     full_filename = correct_ext(full_filename)
2746                     temp_filename = correct_ext(temp_filename)
2747                     dl_filename = existing_file(full_filename, temp_filename)
2748                     info_dict['__real_download'] = False
2749
2750                     _protocols = set(determine_protocol(f) for f in requested_formats)
2751                     if len(_protocols) == 1:  # All requested formats have same protocol
2752                         info_dict['protocol'] = _protocols.pop()
2753                     directly_mergable = FFmpegFD.can_merge_formats(info_dict)
2754                     if dl_filename is not None:
2755                         self.report_file_already_downloaded(dl_filename)
2756                     elif (directly_mergable and get_suitable_downloader(
2757                             info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):
2758                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2759                         success, real_download = self.dl(temp_filename, info_dict)
2760                         info_dict['__real_download'] = real_download
2761                     else:
2762                         downloaded = []
2763                         merger = FFmpegMergerPP(self)
2764                         if self.params.get('allow_unplayable_formats'):
2765                             self.report_warning(
2766                                 'You have requested merging of multiple formats '
2767                                 'while also allowing unplayable formats to be downloaded. '
2768                                 'The formats won\'t be merged to prevent data corruption.')
2769                         elif not merger.available:
2770                             self.report_warning(
2771                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2772                                 'The formats won\'t be merged.')
2773
2774                         if temp_filename == '-':
2775                             reason = ('using a downloader other than ffmpeg' if directly_mergable
2776                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2777                                       else 'but ffmpeg is not installed')
2778                             self.report_warning(
2779                                 f'You have requested downloading multiple formats to stdout {reason}. '
2780                                 'The formats will be streamed one after the other')
2781                             fname = temp_filename
2782                         for f in requested_formats:
2783                             new_info = dict(info_dict)
2784                             del new_info['requested_formats']
2785                             new_info.update(f)
2786                             if temp_filename != '-':
2787                                 fname = prepend_extension(
2788                                     correct_ext(temp_filename, new_info['ext']),
2789                                     'f%s' % f['format_id'], new_info['ext'])
2790                                 if not self._ensure_dir_exists(fname):
2791                                     return
2792                                 downloaded.append(fname)
2793                             partial_success, real_download = self.dl(fname, new_info)
2794                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2795                             success = success and partial_success
2796                         if merger.available and not self.params.get('allow_unplayable_formats'):
2797                             info_dict['__postprocessors'].append(merger)
2798                             info_dict['__files_to_merge'] = downloaded
2799                             # Even if there were no downloads, it is being merged only now
2800                             info_dict['__real_download'] = True
2801                         else:
2802                             for file in downloaded:
2803                                 files_to_move[file] = None
2804                 else:
2805                     # Just a single file
2806                     dl_filename = existing_file(full_filename, temp_filename)
2807                     if dl_filename is None or dl_filename == temp_filename:
2808                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2809                         # So we should try to resume the download
2810                         success, real_download = self.dl(temp_filename, info_dict)
2811                         info_dict['__real_download'] = real_download
2812                     else:
2813                         self.report_file_already_downloaded(dl_filename)
2814
2815                 dl_filename = dl_filename or temp_filename
2816                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2817
2818             except network_exceptions as err:
2819                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2820                 return
2821             except (OSError, IOError) as err:
2822                 raise UnavailableVideoError(err)
2823             except (ContentTooShortError, ) as err:
2824                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2825                 return
2826
2827             if success and full_filename != '-':
2828
2829                 def fixup():
2830                     do_fixup = True
2831                     fixup_policy = self.params.get('fixup')
2832                     vid = info_dict['id']
2833
2834                     if fixup_policy in ('ignore', 'never'):
2835                         return
2836                     elif fixup_policy == 'warn':
2837                         do_fixup = False
2838                     elif fixup_policy != 'force':
2839                         assert fixup_policy in ('detect_or_warn', None)
2840                         if not info_dict.get('__real_download'):
2841                             do_fixup = False
2842
2843                     def ffmpeg_fixup(cndn, msg, cls):
2844                         if not cndn:
2845                             return
2846                         if not do_fixup:
2847                             self.report_warning(f'{vid}: {msg}')
2848                             return
2849                         pp = cls(self)
2850                         if pp.available:
2851                             info_dict['__postprocessors'].append(pp)
2852                         else:
2853                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2854
2855                     stretched_ratio = info_dict.get('stretched_ratio')
2856                     ffmpeg_fixup(
2857                         stretched_ratio not in (1, None),
2858                         f'Non-uniform pixel ratio {stretched_ratio}',
2859                         FFmpegFixupStretchedPP)
2860
2861                     ffmpeg_fixup(
2862                         (info_dict.get('requested_formats') is None
2863                          and info_dict.get('container') == 'm4a_dash'
2864                          and info_dict.get('ext') == 'm4a'),
2865                         'writing DASH m4a. Only some players support this container',
2866                         FFmpegFixupM4aPP)
2867
2868                     downloader = (get_suitable_downloader(info_dict, self.params).__name__
2869                                   if 'protocol' in info_dict else None)
2870                     ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2871                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2872                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2873
2874                 fixup()
2875                 try:
2876                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2877                 except PostProcessingError as err:
2878                     self.report_error('Postprocessing: %s' % str(err))
2879                     return
2880                 try:
2881                     for ph in self._post_hooks:
2882                         ph(info_dict['filepath'])
2883                 except Exception as err:
2884                     self.report_error('post hooks: %s' % str(err))
2885                     return
2886                 must_record_download_archive = True
2887
2888         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2889             self.record_download_archive(info_dict)
2890         max_downloads = self.params.get('max_downloads')
2891         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2892             raise MaxDownloadsReached()
2893
2894     def download(self, url_list):
2895         """Download a given list of URLs."""
2896         outtmpl = self.outtmpl_dict['default']
2897         if (len(url_list) > 1
2898                 and outtmpl != '-'
2899                 and '%' not in outtmpl
2900                 and self.params.get('max_downloads') != 1):
2901             raise SameFileError(outtmpl)
2902
2903         for url in url_list:
2904             try:
2905                 # It also downloads the videos
2906                 res = self.extract_info(
2907                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2908             except UnavailableVideoError:
2909                 self.report_error('unable to download video')
2910             except MaxDownloadsReached:
2911                 self.to_screen('[info] Maximum number of downloads reached')
2912                 raise
2913             except ExistingVideoReached:
2914                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2915                 raise
2916             except RejectedVideoReached:
2917                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2918                 raise
2919             else:
2920                 if self.params.get('dump_single_json', False):
2921                     self.post_extract(res)
2922                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2923
2924         return self._download_retcode
2925
2926     def download_with_info_file(self, info_filename):
2927         with contextlib.closing(fileinput.FileInput(
2928                 [info_filename], mode='r',
2929                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2930             # FileInput doesn't have a read method, we can't call json.load
2931             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2932         try:
2933             self.process_ie_result(info, download=True)
2934         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2935             webpage_url = info.get('webpage_url')
2936             if webpage_url is not None:
2937                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2938                 return self.download([webpage_url])
2939             else:
2940                 raise
2941         return self._download_retcode
2942
2943     @staticmethod
2944     def sanitize_info(info_dict, remove_private_keys=False):
2945         ''' Sanitize the infodict for converting to json '''
2946         if info_dict is None:
2947             return info_dict
2948         info_dict.setdefault('epoch', int(time.time()))
2949         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2950         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2951         if remove_private_keys:
2952             remove_keys |= {
2953                 'requested_formats', 'requested_subtitles', 'requested_entries',
2954                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2955             }
2956             empty_values = (None, {}, [], set(), tuple())
2957             reject = lambda k, v: k not in keep_keys and (
2958                 k.startswith('_') or k in remove_keys or v in empty_values)
2959         else:
2960             reject = lambda k, v: k in remove_keys
2961         filter_fn = lambda obj: (
2962             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2963             else obj if not isinstance(obj, dict)
2964             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2965         return filter_fn(info_dict)
2966
2967     @staticmethod
2968     def filter_requested_info(info_dict, actually_filter=True):
2969         ''' Alias of sanitize_info for backward compatibility '''
2970         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2971
2972     def run_pp(self, pp, infodict):
2973         files_to_delete = []
2974         if '__files_to_move' not in infodict:
2975             infodict['__files_to_move'] = {}
2976         files_to_delete, infodict = pp.run(infodict)
2977         if not files_to_delete:
2978             return infodict
2979
2980         if self.params.get('keepvideo', False):
2981             for f in files_to_delete:
2982                 infodict['__files_to_move'].setdefault(f, '')
2983         else:
2984             for old_filename in set(files_to_delete):
2985                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2986                 try:
2987                     os.remove(encodeFilename(old_filename))
2988                 except (IOError, OSError):
2989                     self.report_warning('Unable to remove downloaded original file')
2990                 if old_filename in infodict['__files_to_move']:
2991                     del infodict['__files_to_move'][old_filename]
2992         return infodict
2993
2994     @staticmethod
2995     def post_extract(info_dict):
2996         def actual_post_extract(info_dict):
2997             if info_dict.get('_type') in ('playlist', 'multi_video'):
2998                 for video_dict in info_dict.get('entries', {}):
2999                     actual_post_extract(video_dict or {})
3000                 return
3001
3002             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3003             extra = post_extractor().items()
3004             info_dict.update(extra)
3005             info_dict.pop('__post_extractor', None)
3006
3007             original_infodict = info_dict.get('__original_infodict') or {}
3008             original_infodict.update(extra)
3009             original_infodict.pop('__post_extractor', None)
3010
3011         actual_post_extract(info_dict or {})
3012
3013     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3014         info = dict(ie_info)
3015         info['__files_to_move'] = files_to_move or {}
3016         for pp in self._pps[key]:
3017             info = self.run_pp(pp, info)
3018         return info, info.pop('__files_to_move', None)
3019
3020     def post_process(self, filename, ie_info, files_to_move=None):
3021         """Run all the postprocessors on the given file."""
3022         info = dict(ie_info)
3023         info['filepath'] = filename
3024         info['__files_to_move'] = files_to_move or {}
3025
3026         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3027             info = self.run_pp(pp, info)
3028         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3029         del info['__files_to_move']
3030         for pp in self._pps['after_move']:
3031             info = self.run_pp(pp, info)
3032         return info
3033
3034     def _make_archive_id(self, info_dict):
3035         video_id = info_dict.get('id')
3036         if not video_id:
3037             return
3038         # Future-proof against any change in case
3039         # and backwards compatibility with prior versions
3040         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3041         if extractor is None:
3042             url = str_or_none(info_dict.get('url'))
3043             if not url:
3044                 return
3045             # Try to find matching extractor for the URL and take its ie_key
3046             for ie_key, ie in self._ies.items():
3047                 if ie.suitable(url):
3048                     extractor = ie_key
3049                     break
3050             else:
3051                 return
3052         return '%s %s' % (extractor.lower(), video_id)
3053
3054     def in_download_archive(self, info_dict):
3055         fn = self.params.get('download_archive')
3056         if fn is None:
3057             return False
3058
3059         vid_id = self._make_archive_id(info_dict)
3060         if not vid_id:
3061             return False  # Incomplete video information
3062
3063         return vid_id in self.archive
3064
3065     def record_download_archive(self, info_dict):
3066         fn = self.params.get('download_archive')
3067         if fn is None:
3068             return
3069         vid_id = self._make_archive_id(info_dict)
3070         assert vid_id
3071         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3072             archive_file.write(vid_id + '\n')
3073         self.archive.add(vid_id)
3074
3075     @staticmethod
3076     def format_resolution(format, default='unknown'):
3077         if format.get('vcodec') == 'none':
3078             if format.get('acodec') == 'none':
3079                 return 'images'
3080             return 'audio only'
3081         if format.get('resolution') is not None:
3082             return format['resolution']
3083         if format.get('width') and format.get('height'):
3084             res = '%dx%d' % (format['width'], format['height'])
3085         elif format.get('height'):
3086             res = '%sp' % format['height']
3087         elif format.get('width'):
3088             res = '%dx?' % format['width']
3089         else:
3090             res = default
3091         return res
3092
3093     def _format_note(self, fdict):
3094         res = ''
3095         if fdict.get('ext') in ['f4f', 'f4m']:
3096             res += '(unsupported) '
3097         if fdict.get('language'):
3098             if res:
3099                 res += ' '
3100             res += '[%s] ' % fdict['language']
3101         if fdict.get('format_note') is not None:
3102             res += fdict['format_note'] + ' '
3103         if fdict.get('tbr') is not None:
3104             res += '%4dk ' % fdict['tbr']
3105         if fdict.get('container') is not None:
3106             if res:
3107                 res += ', '
3108             res += '%s container' % fdict['container']
3109         if (fdict.get('vcodec') is not None
3110                 and fdict.get('vcodec') != 'none'):
3111             if res:
3112                 res += ', '
3113             res += fdict['vcodec']
3114             if fdict.get('vbr') is not None:
3115                 res += '@'
3116         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3117             res += 'video@'
3118         if fdict.get('vbr') is not None:
3119             res += '%4dk' % fdict['vbr']
3120         if fdict.get('fps') is not None:
3121             if res:
3122                 res += ', '
3123             res += '%sfps' % fdict['fps']
3124         if fdict.get('acodec') is not None:
3125             if res:
3126                 res += ', '
3127             if fdict['acodec'] == 'none':
3128                 res += 'video only'
3129             else:
3130                 res += '%-5s' % fdict['acodec']
3131         elif fdict.get('abr') is not None:
3132             if res:
3133                 res += ', '
3134             res += 'audio'
3135         if fdict.get('abr') is not None:
3136             res += '@%3dk' % fdict['abr']
3137         if fdict.get('asr') is not None:
3138             res += ' (%5dHz)' % fdict['asr']
3139         if fdict.get('filesize') is not None:
3140             if res:
3141                 res += ', '
3142             res += format_bytes(fdict['filesize'])
3143         elif fdict.get('filesize_approx') is not None:
3144             if res:
3145                 res += ', '
3146             res += '~' + format_bytes(fdict['filesize_approx'])
3147         return res
3148
3149     def list_formats(self, info_dict):
3150         formats = info_dict.get('formats', [info_dict])
3151         new_format = (
3152             'list-formats' not in self.params.get('compat_opts', [])
3153             and self.params.get('listformats_table', True) is not False)
3154         if new_format:
3155             table = [
3156                 [
3157                     format_field(f, 'format_id'),
3158                     format_field(f, 'ext'),
3159                     self.format_resolution(f),
3160                     format_field(f, 'fps', '%d'),
3161                     '|',
3162                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3163                     format_field(f, 'tbr', '%4dk'),
3164                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3165                     '|',
3166                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3167                     format_field(f, 'vbr', '%4dk'),
3168                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3169                     format_field(f, 'abr', '%3dk'),
3170                     format_field(f, 'asr', '%5dHz'),
3171                     ', '.join(filter(None, (
3172                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3173                         format_field(f, 'language', '[%s]'),
3174                         format_field(f, 'format_note'),
3175                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3176                     ))),
3177                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3178             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
3179                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3180         else:
3181             table = [
3182                 [
3183                     format_field(f, 'format_id'),
3184                     format_field(f, 'ext'),
3185                     self.format_resolution(f),
3186                     self._format_note(f)]
3187                 for f in formats
3188                 if f.get('preference') is None or f['preference'] >= -1000]
3189             header_line = ['format code', 'extension', 'resolution', 'note']
3190
3191         self.to_screen(
3192             '[info] Available formats for %s:' % info_dict['id'])
3193         self.to_stdout(render_table(
3194             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3195
3196     def list_thumbnails(self, info_dict):
3197         thumbnails = list(info_dict.get('thumbnails'))
3198         if not thumbnails:
3199             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3200             return
3201
3202         self.to_screen(
3203             '[info] Thumbnails for %s:' % info_dict['id'])
3204         self.to_stdout(render_table(
3205             ['ID', 'width', 'height', 'URL'],
3206             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3207
3208     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3209         if not subtitles:
3210             self.to_screen('%s has no %s' % (video_id, name))
3211             return
3212         self.to_screen(
3213             'Available %s for %s:' % (name, video_id))
3214
3215         def _row(lang, formats):
3216             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3217             if len(set(names)) == 1:
3218                 names = [] if names[0] == 'unknown' else names[:1]
3219             return [lang, ', '.join(names), ', '.join(exts)]
3220
3221         self.to_stdout(render_table(
3222             ['Language', 'Name', 'Formats'],
3223             [_row(lang, formats) for lang, formats in subtitles.items()],
3224             hideEmpty=True))
3225
3226     def urlopen(self, req):
3227         """ Start an HTTP download """
3228         if isinstance(req, compat_basestring):
3229             req = sanitized_Request(req)
3230         return self._opener.open(req, timeout=self._socket_timeout)
3231
3232     def print_debug_header(self):
3233         if not self.params.get('verbose'):
3234             return
3235
3236         stdout_encoding = getattr(
3237             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3238         encoding_str = (
3239             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3240                 locale.getpreferredencoding(),
3241                 sys.getfilesystemencoding(),
3242                 stdout_encoding,
3243                 self.get_encoding()))
3244         write_string(encoding_str, encoding=None)
3245
3246         source = (
3247             '(exe)' if hasattr(sys, 'frozen')
3248             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3249             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3250             else '')
3251         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3252         if _LAZY_LOADER:
3253             self._write_string('[debug] Lazy loading extractors enabled\n')
3254         if _PLUGIN_CLASSES:
3255             self._write_string(
3256                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3257         if self.params.get('compat_opts'):
3258             self._write_string(
3259                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3260         try:
3261             sp = subprocess.Popen(
3262                 ['git', 'rev-parse', '--short', 'HEAD'],
3263                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3264                 cwd=os.path.dirname(os.path.abspath(__file__)))
3265             out, err = process_communicate_or_kill(sp)
3266             out = out.decode().strip()
3267             if re.match('[0-9a-f]+', out):
3268                 self._write_string('[debug] Git HEAD: %s\n' % out)
3269         except Exception:
3270             try:
3271                 sys.exc_clear()
3272             except Exception:
3273                 pass
3274
3275         def python_implementation():
3276             impl_name = platform.python_implementation()
3277             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3278                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3279             return impl_name
3280
3281         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3282             platform.python_version(),
3283             python_implementation(),
3284             platform.architecture()[0],
3285             platform_name()))
3286
3287         exe_versions = FFmpegPostProcessor.get_versions(self)
3288         exe_versions['rtmpdump'] = rtmpdump_version()
3289         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3290         exe_str = ', '.join(
3291             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3292         ) or 'none'
3293         self._write_string('[debug] exe versions: %s\n' % exe_str)
3294
3295         from .downloader.fragment import can_decrypt_frag
3296         from .downloader.websocket import has_websockets
3297         from .postprocessor.embedthumbnail import has_mutagen
3298         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3299
3300         lib_str = ', '.join(sorted(filter(None, (
3301             can_decrypt_frag and 'pycryptodome',
3302             has_websockets and 'websockets',
3303             has_mutagen and 'mutagen',
3304             SQLITE_AVAILABLE and 'sqlite',
3305             KEYRING_AVAILABLE and 'keyring',
3306         )))) or 'none'
3307         self._write_string('[debug] Optional libraries: %s\n' % lib_str)
3308
3309         proxy_map = {}
3310         for handler in self._opener.handlers:
3311             if hasattr(handler, 'proxies'):
3312                 proxy_map.update(handler.proxies)
3313         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3314
3315         if self.params.get('call_home', False):
3316             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3317             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3318             return
3319             latest_version = self.urlopen(
3320                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3321             if version_tuple(latest_version) > version_tuple(__version__):
3322                 self.report_warning(
3323                     'You are using an outdated version (newest version: %s)! '
3324                     'See https://yt-dl.org/update if you need help updating.' %
3325                     latest_version)
3326
3327     def _setup_opener(self):
3328         timeout_val = self.params.get('socket_timeout')
3329         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3330
3331         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3332         opts_cookiefile = self.params.get('cookiefile')
3333         opts_proxy = self.params.get('proxy')
3334
3335         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3336
3337         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3338         if opts_proxy is not None:
3339             if opts_proxy == '':
3340                 proxies = {}
3341             else:
3342                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3343         else:
3344             proxies = compat_urllib_request.getproxies()
3345             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3346             if 'http' in proxies and 'https' not in proxies:
3347                 proxies['https'] = proxies['http']
3348         proxy_handler = PerRequestProxyHandler(proxies)
3349
3350         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3351         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3352         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3353         redirect_handler = YoutubeDLRedirectHandler()
3354         data_handler = compat_urllib_request_DataHandler()
3355
3356         # When passing our own FileHandler instance, build_opener won't add the
3357         # default FileHandler and allows us to disable the file protocol, which
3358         # can be used for malicious purposes (see
3359         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3360         file_handler = compat_urllib_request.FileHandler()
3361
3362         def file_open(*args, **kwargs):
3363             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3364         file_handler.file_open = file_open
3365
3366         opener = compat_urllib_request.build_opener(
3367             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3368
3369         # Delete the default user-agent header, which would otherwise apply in
3370         # cases where our custom HTTP handler doesn't come into play
3371         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3372         opener.addheaders = []
3373         self._opener = opener
3374
3375     def encode(self, s):
3376         if isinstance(s, bytes):
3377             return s  # Already encoded
3378
3379         try:
3380             return s.encode(self.get_encoding())
3381         except UnicodeEncodeError as err:
3382             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3383             raise
3384
3385     def get_encoding(self):
3386         encoding = self.params.get('encoding')
3387         if encoding is None:
3388             encoding = preferredencoding()
3389         return encoding
3390
3391     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3392         write_all = self.params.get('write_all_thumbnails', False)
3393         thumbnails = []
3394         if write_all or self.params.get('writethumbnail', False):
3395             thumbnails = info_dict.get('thumbnails') or []
3396         multiple = write_all and len(thumbnails) > 1
3397
3398         ret = []
3399         for t in thumbnails[::-1]:
3400             thumb_ext = determine_ext(t['url'], 'jpg')
3401             suffix = '%s.' % t['id'] if multiple else ''
3402             thumb_display_id = '%s ' % t['id'] if multiple else ''
3403             thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3404
3405             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3406                 ret.append(suffix + thumb_ext)
3407                 t['filepath'] = thumb_filename
3408                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3409                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3410             else:
3411                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3412                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3413                 try:
3414                     uf = self.urlopen(t['url'])
3415                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3416                         shutil.copyfileobj(uf, thumbf)
3417                     ret.append(suffix + thumb_ext)
3418                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3419                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3420                     t['filepath'] = thumb_filename
3421                 except network_exceptions as err:
3422                     self.report_warning('Unable to download thumbnail "%s": %s' %
3423                                         (t['url'], error_to_compat_str(err)))
3424             if ret and not write_all:
3425                 break
3426         return ret