yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_http_client,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     OUTTMPL_TYPES,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     error_to_compat_str,
  63     EntryNotInPlaylist,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     FORMAT_RE,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     int_or_none,
  74     iri_to_uri,
  75     ISO3166Utils,
  76     locked_file,
  77     make_dir,
  78     make_HTTPS_handler,
  79     MaxDownloadsReached,
  80     orderedSet,
  81     PagedList,
  82     parse_filesize,
  83     PerRequestProxyHandler,
  84     platform_name,
  85     PostProcessingError,
  86     preferredencoding,
  87     prepend_extension,
  88     register_socks_protocols,
  89     render_table,
  90     replace_extension,
  91     RejectedVideoReached,
  92     SameFileError,
  93     sanitize_filename,
  94     sanitize_path,
  95     sanitize_url,
  96     sanitized_Request,
  97     std_headers,
  98     str_or_none,
  99     strftime_or_none,
 100     subtitles_filename,
 101     to_high_limit_path,
 102     UnavailableVideoError,
 103     url_basename,
 104     version_tuple,
 105     write_json_file,
 106     write_string,
 107     YoutubeDLCookieJar,
 108     YoutubeDLCookieProcessor,
 109     YoutubeDLHandler,
 110     YoutubeDLRedirectHandler,
 111     process_communicate_or_kill,
 112 )
 113 from .cache import Cache
 114 from .extractor import (
 115     gen_extractor_classes,
 116     get_info_extractor,
 117     _LAZY_LOADER,
 118     _PLUGIN_CLASSES
 119 )
 120 from .extractor.openload import PhantomJSwrapper
 121 from .downloader import (
 122     get_suitable_downloader,
 123     shorten_protocol_name
 124 )
 125 from .downloader.rtmp import rtmpdump_version
 126 from .postprocessor import (
 127     FFmpegFixupM3u8PP,
 128     FFmpegFixupM4aPP,
 129     FFmpegFixupStretchedPP,
 130     FFmpegMergerPP,
 131     FFmpegPostProcessor,
 132     # FFmpegSubtitlesConvertorPP,
 133     get_postprocessor,
 134     MoveFilesAfterDownloadPP,
 135 )
 136 from .version import __version__
 137
 138 if compat_os_name == 'nt':
 139     import ctypes
 140
 141
 142 class YoutubeDL(object):
 143     """YoutubeDL class.
 144
 145     YoutubeDL objects are the ones responsible of downloading the
 146     actual video file and writing it to disk if the user has requested
 147     it, among some other tasks. In most cases there should be one per
 148     program. As, given a video URL, the downloader doesn't know how to
 149     extract all the needed information, task that InfoExtractors do, it
 150     has to pass the URL to one of them.
 151
 152     For this, YoutubeDL objects have a method that allows
 153     InfoExtractors to be registered in a given order. When it is passed
 154     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 155     finds that reports being able to handle it. The InfoExtractor extracts
 156     all the information about the video or videos the URL refers to, and
 157     YoutubeDL process the extracted information, possibly using a File
 158     Downloader to download the video.
 159
 160     YoutubeDL objects accept a lot of parameters. In order not to saturate
 161     the object constructor with arguments, it receives a dictionary of
 162     options instead. These options are available through the params
 163     attribute for the InfoExtractors to use. The YoutubeDL also
 164     registers itself as the downloader in charge for the InfoExtractors
 165     that are added to it, so this is a "mutual registration".
 166
 167     Available options:
 168
 169     username:          Username for authentication purposes.
 170     password:          Password for authentication purposes.
 171     videopassword:     Password for accessing a video.
 172     ap_mso:            Adobe Pass multiple-system operator identifier.
 173     ap_username:       Multiple-system operator account username.
 174     ap_password:       Multiple-system operator account password.
 175     usenetrc:          Use netrc for authentication instead.
 176     verbose:           Print additional info to stdout.
 177     quiet:             Do not print messages to stdout.
 178     no_warnings:       Do not print out anything for warnings.
 179     forceurl:          Force printing final URL.
 180     forcetitle:        Force printing title.
 181     forceid:           Force printing ID.
 182     forcethumbnail:    Force printing thumbnail URL.
 183     forcedescription:  Force printing description.
 184     forcefilename:     Force printing final filename.
 185     forceduration:     Force printing duration.
 186     forcejson:         Force printing info_dict as JSON.
 187     dump_single_json:  Force printing the info_dict of the whole playlist
 188                        (or video) as a single JSON line.
 189     force_write_download_archive: Force writing download archive regardless
 190                        of 'skip_download' or 'simulate'.
 191     simulate:          Do not download the video files.
 192     format:            Video format code. see "FORMAT SELECTION" for more details.
 193     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 194     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 195                        extracting metadata even if the video is not actually
 196                        available for download (experimental)
 197     format_sort:       How to sort the video formats. see "Sorting Formats"
 198                        for more details.
 199     format_sort_force: Force the given format_sort. see "Sorting Formats"
 200                        for more details.
 201     allow_multiple_video_streams:   Allow multiple video streams to be merged
 202                        into a single file
 203     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 204                        into a single file
 205     paths:             Dictionary of output paths. The allowed keys are 'home'
 206                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 207     outtmpl:           Dictionary of templates for output names. Allowed keys
 208                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 209                        A string a also accepted for backward compatibility
 210     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 211     restrictfilenames: Do not allow "&" and spaces in file names
 212     trim_file_name:    Limit length of filename (extension excluded)
 213     windowsfilenames:  Force the filenames to be windows compatible
 214     ignoreerrors:      Do not stop on download errors
 215                        (Default True when running yt-dlp,
 216                        but False when directly accessing YoutubeDL class)
 217     force_generic_extractor: Force downloader to use the generic extractor
 218     overwrites:        Overwrite all video and metadata files if True,
 219                        overwrite only non-video files if None
 220                        and don't overwrite any file if False
 221     playliststart:     Playlist item to start at.
 222     playlistend:       Playlist item to end at.
 223     playlist_items:    Specific indices of playlist to download.
 224     playlistreverse:   Download playlist items in reverse order.
 225     playlistrandom:    Download playlist items in random order.
 226     matchtitle:        Download only matching titles.
 227     rejecttitle:       Reject downloads for matching titles.
 228     logger:            Log messages to a logging.Logger instance.
 229     logtostderr:       Log messages to stderr instead of stdout.
 230     writedescription:  Write the video description to a .description file
 231     writeinfojson:     Write the video description to a .info.json file
 232     clean_infojson:    Remove private fields from the infojson
 233     writecomments:     Extract video comments. This will not be written to disk
 234                        unless writeinfojson is also given
 235     writeannotations:  Write the video annotations to a .annotations.xml file
 236     writethumbnail:    Write the thumbnail image to a file
 237     allow_playlist_files: Whether to write playlists' description, infojson etc
 238                        also to disk when using the 'write*' options
 239     write_all_thumbnails:  Write all thumbnail formats to files
 240     writelink:         Write an internet shortcut file, depending on the
 241                        current platform (.url/.webloc/.desktop)
 242     writeurllink:      Write a Windows internet shortcut file (.url)
 243     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 244     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 245     writesubtitles:    Write the video subtitles to a file
 246     writeautomaticsub: Write the automatically generated subtitles to a file
 247     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 248                        Downloads all the subtitles of the video
 249                        (requires writesubtitles or writeautomaticsub)
 250     listsubtitles:     Lists all available subtitles for the video
 251     subtitlesformat:   The format code for subtitles
 252     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 253                        The list may contain "all" to refer to all the available
 254                        subtitles. The language can be prefixed with a "-" to
 255                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 256     keepvideo:         Keep the video file after post-processing
 257     daterange:         A DateRange object, download only if the upload_date is in the range.
 258     skip_download:     Skip the actual download of the video file
 259     cachedir:          Location of the cache files in the filesystem.
 260                        False to disable filesystem cache.
 261     noplaylist:        Download single video instead of a playlist if in doubt.
 262     age_limit:         An integer representing the user's age in years.
 263                        Unsuitable videos for the given age are skipped.
 264     min_views:         An integer representing the minimum view count the video
 265                        must have in order to not be skipped.
 266                        Videos without view count information are always
 267                        downloaded. None for no limit.
 268     max_views:         An integer representing the maximum view count.
 269                        Videos that are more popular than that are not
 270                        downloaded.
 271                        Videos without view count information are always
 272                        downloaded. None for no limit.
 273     download_archive:  File name of a file where all downloads are recorded.
 274                        Videos already present in the file are not downloaded
 275                        again.
 276     break_on_existing: Stop the download process after attempting to download a
 277                        file that is in the archive.
 278     break_on_reject:   Stop the download process when encountering a video that
 279                        has been filtered out.
 280     cookiefile:        File name where cookies should be read from and dumped to
 281     nocheckcertificate:Do not verify SSL certificates
 282     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 283                        At the moment, this is only supported by YouTube.
 284     proxy:             URL of the proxy server to use
 285     geo_verification_proxy:  URL of the proxy to use for IP address verification
 286                        on geo-restricted sites.
 287     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 288     bidi_workaround:   Work around buggy terminals without bidirectional text
 289                        support, using fridibi
 290     debug_printtraffic:Print out sent and received HTTP traffic
 291     include_ads:       Download ads as well
 292     default_search:    Prepend this string if an input url is not valid.
 293                        'auto' for elaborate guessing
 294     encoding:          Use this encoding instead of the system-specified.
 295     extract_flat:      Do not resolve URLs, return the immediate result.
 296                        Pass in 'in_playlist' to only show this behavior for
 297                        playlist items.
 298     postprocessors:    A list of dictionaries, each with an entry
 299                        * key:  The name of the postprocessor. See
 300                                yt_dlp/postprocessor/__init__.py for a list.
 301                        * when: When to run the postprocessor. Can be one of
 302                                pre_process|before_dl|post_process|after_move.
 303                                Assumed to be 'post_process' if not given
 304     post_hooks:        A list of functions that get called as the final step
 305                        for each video file, after all postprocessors have been
 306                        called. The filename will be passed as the only argument.
 307     progress_hooks:    A list of functions that get called on download
 308                        progress, with a dictionary with the entries
 309                        * status: One of "downloading", "error", or "finished".
 310                                  Check this first and ignore unknown values.
 311
 312                        If status is one of "downloading", or "finished", the
 313                        following properties may also be present:
 314                        * filename: The final filename (always present)
 315                        * tmpfilename: The filename we're currently writing to
 316                        * downloaded_bytes: Bytes on disk
 317                        * total_bytes: Size of the whole file, None if unknown
 318                        * total_bytes_estimate: Guess of the eventual file size,
 319                                                None if unavailable.
 320                        * elapsed: The number of seconds since download started.
 321                        * eta: The estimated time in seconds, None if unknown
 322                        * speed: The download speed in bytes/second, None if
 323                                 unknown
 324                        * fragment_index: The counter of the currently
 325                                          downloaded video fragment.
 326                        * fragment_count: The number of fragments (= individual
 327                                          files that will be merged)
 328
 329                        Progress hooks are guaranteed to be called at least once
 330                        (with status "finished") if the download is successful.
 331     merge_output_format: Extension to use when merging formats.
 332     final_ext:         Expected final extension; used to detect when the file was
 333                        already downloaded and converted. "merge_output_format" is
 334                        replaced by this extension when given
 335     fixup:             Automatically correct known faults of the file.
 336                        One of:
 337                        - "never": do nothing
 338                        - "warn": only emit a warning
 339                        - "detect_or_warn": check whether we can do anything
 340                                            about it, warn otherwise (default)
 341     source_address:    Client-side IP address to bind to.
 342     call_home:         Boolean, true iff we are allowed to contact the
 343                        yt-dlp servers for debugging. (BROKEN)
 344     sleep_interval_requests: Number of seconds to sleep between requests
 345                        during extraction
 346     sleep_interval:    Number of seconds to sleep before each download when
 347                        used alone or a lower bound of a range for randomized
 348                        sleep before each download (minimum possible number
 349                        of seconds to sleep) when used along with
 350                        max_sleep_interval.
 351     max_sleep_interval:Upper bound of a range for randomized sleep before each
 352                        download (maximum possible number of seconds to sleep).
 353                        Must only be used along with sleep_interval.
 354                        Actual sleep time will be a random float from range
 355                        [sleep_interval; max_sleep_interval].
 356     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 357     listformats:       Print an overview of available video formats and exit.
 358     list_thumbnails:   Print a table of all thumbnails and exit.
 359     match_filter:      A function that gets called with the info_dict of
 360                        every video.
 361                        If it returns a message, the video is ignored.
 362                        If it returns None, the video is downloaded.
 363                        match_filter_func in utils.py is one example for this.
 364     no_color:          Do not emit color codes in output.
 365     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 366                        HTTP header
 367     geo_bypass_country:
 368                        Two-letter ISO 3166-2 country code that will be used for
 369                        explicit geographic restriction bypassing via faking
 370                        X-Forwarded-For HTTP header
 371     geo_bypass_ip_block:
 372                        IP range in CIDR notation that will be used similarly to
 373                        geo_bypass_country
 374
 375     The following options determine which downloader is picked:
 376     external_downloader: A dictionary of protocol keys and the executable of the
 377                        external downloader to use for it. The allowed protocols
 378                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 379                        Set the value to 'native' to use the native downloader
 380     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 381                        or {'m3u8': 'ffmpeg'} instead.
 382                        Use the native HLS downloader instead of ffmpeg/avconv
 383                        if True, otherwise use ffmpeg/avconv if False, otherwise
 384                        use downloader suggested by extractor if None.
 385
 386     The following parameters are not used by YoutubeDL itself, they are used by
 387     the downloader (see yt_dlp/downloader/common.py):
 388     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 389     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 390     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 391     http_chunk_size.
 392
 393     The following options are used by the post processors:
 394     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 395                        otherwise prefer ffmpeg. (avconv support is deprecated)
 396     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 397                        to the binary or its containing directory.
 398     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 399                         and a list of additional command-line arguments for the
 400                         postprocessor/executable. The dict can also have "PP+EXE" keys
 401                         which are used when the given exe is used by the given PP.
 402                         Use 'default' as the name for arguments to passed to all PP
 403
 404     The following options are used by the extractors:
 405     extractor_retries: Number of times to retry for known errors
 406     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 407     hls_split_discontinuity: Split HLS playlists to different formats at
 408                        discontinuities such as ad breaks (default: False)
 409     youtube_include_dash_manifest: If True (default), DASH manifests and related
 410                        data will be downloaded and processed by extractor.
 411                        You can reduce network I/O by disabling it if you don't
 412                        care about DASH. (only for youtube)
 413     youtube_include_hls_manifest: If True (default), HLS manifests and related
 414                        data will be downloaded and processed by extractor.
 415                        You can reduce network I/O by disabling it if you don't
 416                        care about HLS. (only for youtube)
 417     """
 418
 419     _NUMERIC_FIELDS = set((
 420         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 421         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 422         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 423         'average_rating', 'comment_count', 'age_limit',
 424         'start_time', 'end_time',
 425         'chapter_number', 'season_number', 'episode_number',
 426         'track_number', 'disc_number', 'release_year',
 427         'playlist_index',
 428     ))
 429
 430     params = None
 431     _ies = []
 432     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 433     __prepare_filename_warned = False
 434     _first_webpage_request = True
 435     _download_retcode = None
 436     _num_downloads = None
 437     _playlist_level = 0
 438     _playlist_urls = set()
 439     _screen_file = None
 440
 441     def __init__(self, params=None, auto_init=True):
 442         """Create a FileDownloader object with the given options."""
 443         if params is None:
 444             params = {}
 445         self._ies = []
 446         self._ies_instances = {}
 447         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 448         self.__prepare_filename_warned = False
 449         self._first_webpage_request = True
 450         self._post_hooks = []
 451         self._progress_hooks = []
 452         self._download_retcode = 0
 453         self._num_downloads = 0
 454         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 455         self._err_file = sys.stderr
 456         self.params = {
 457             # Default parameters
 458             'nocheckcertificate': False,
 459         }
 460         self.params.update(params)
 461         self.cache = Cache(self)
 462         self.archive = set()
 463
 464         """Preload the archive, if any is specified"""
 465         def preload_download_archive(self):
 466             fn = self.params.get('download_archive')
 467             if fn is None:
 468                 return False
 469             try:
 470                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 471                     for line in archive_file:
 472                         self.archive.add(line.strip())
 473             except IOError as ioe:
 474                 if ioe.errno != errno.ENOENT:
 475                     raise
 476                 return False
 477             return True
 478
 479         def check_deprecated(param, option, suggestion):
 480             if self.params.get(param) is not None:
 481                 self.report_warning(
 482                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 483                 return True
 484             return False
 485
 486         if self.params.get('verbose'):
 487             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 488
 489         preload_download_archive(self)
 490
 491         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 492             if self.params.get('geo_verification_proxy') is None:
 493                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 494
 495         if self.params.get('final_ext'):
 496             if self.params.get('merge_output_format'):
 497                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 498             self.params['merge_output_format'] = self.params['final_ext']
 499
 500         if 'overwrites' in self.params and self.params['overwrites'] is None:
 501             del self.params['overwrites']
 502
 503         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 504         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 505         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 506
 507         if params.get('bidi_workaround', False):
 508             try:
 509                 import pty
 510                 master, slave = pty.openpty()
 511                 width = compat_get_terminal_size().columns
 512                 if width is None:
 513                     width_args = []
 514                 else:
 515                     width_args = ['-w', str(width)]
 516                 sp_kwargs = dict(
 517                     stdin=subprocess.PIPE,
 518                     stdout=slave,
 519                     stderr=self._err_file)
 520                 try:
 521                     self._output_process = subprocess.Popen(
 522                         ['bidiv'] + width_args, **sp_kwargs
 523                     )
 524                 except OSError:
 525                     self._output_process = subprocess.Popen(
 526                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 527                 self._output_channel = os.fdopen(master, 'rb')
 528             except OSError as ose:
 529                 if ose.errno == errno.ENOENT:
 530                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 531                 else:
 532                     raise
 533
 534         if (sys.platform != 'win32'
 535                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 536                 and not params.get('restrictfilenames', False)):
 537             # Unicode filesystem API will throw errors (#1474, #13027)
 538             self.report_warning(
 539                 'Assuming --restrict-filenames since file system encoding '
 540                 'cannot encode all characters. '
 541                 'Set the LC_ALL environment variable to fix this.')
 542             self.params['restrictfilenames'] = True
 543
 544         self.outtmpl_dict = self.parse_outtmpl()
 545
 546         self._setup_opener()
 547
 548         if auto_init:
 549             self.print_debug_header()
 550             self.add_default_info_extractors()
 551
 552         for pp_def_raw in self.params.get('postprocessors', []):
 553             pp_class = get_postprocessor(pp_def_raw['key'])
 554             pp_def = dict(pp_def_raw)
 555             del pp_def['key']
 556             if 'when' in pp_def:
 557                 when = pp_def['when']
 558                 del pp_def['when']
 559             else:
 560                 when = 'post_process'
 561             pp = pp_class(self, **compat_kwargs(pp_def))
 562             self.add_post_processor(pp, when=when)
 563
 564         for ph in self.params.get('post_hooks', []):
 565             self.add_post_hook(ph)
 566
 567         for ph in self.params.get('progress_hooks', []):
 568             self.add_progress_hook(ph)
 569
 570         register_socks_protocols()
 571
 572     def warn_if_short_id(self, argv):
 573         # short YouTube ID starting with dash?
 574         idxs = [
 575             i for i, a in enumerate(argv)
 576             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 577         if idxs:
 578             correct_argv = (
 579                 ['yt-dlp']
 580                 + [a for i, a in enumerate(argv) if i not in idxs]
 581                 + ['--'] + [argv[i] for i in idxs]
 582             )
 583             self.report_warning(
 584                 'Long argument string detected. '
 585                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 586                 args_to_str(correct_argv))
 587
 588     def add_info_extractor(self, ie):
 589         """Add an InfoExtractor object to the end of the list."""
 590         self._ies.append(ie)
 591         if not isinstance(ie, type):
 592             self._ies_instances[ie.ie_key()] = ie
 593             ie.set_downloader(self)
 594
 595     def get_info_extractor(self, ie_key):
 596         """
 597         Get an instance of an IE with name ie_key, it will try to get one from
 598         the _ies list, if there's no instance it will create a new one and add
 599         it to the extractor list.
 600         """
 601         ie = self._ies_instances.get(ie_key)
 602         if ie is None:
 603             ie = get_info_extractor(ie_key)()
 604             self.add_info_extractor(ie)
 605         return ie
 606
 607     def add_default_info_extractors(self):
 608         """
 609         Add the InfoExtractors returned by gen_extractors to the end of the list
 610         """
 611         for ie in gen_extractor_classes():
 612             self.add_info_extractor(ie)
 613
 614     def add_post_processor(self, pp, when='post_process'):
 615         """Add a PostProcessor object to the end of the chain."""
 616         self._pps[when].append(pp)
 617         pp.set_downloader(self)
 618
 619     def add_post_hook(self, ph):
 620         """Add the post hook"""
 621         self._post_hooks.append(ph)
 622
 623     def add_progress_hook(self, ph):
 624         """Add the progress hook (currently only for the file downloader)"""
 625         self._progress_hooks.append(ph)
 626
 627     def _bidi_workaround(self, message):
 628         if not hasattr(self, '_output_channel'):
 629             return message
 630
 631         assert hasattr(self, '_output_process')
 632         assert isinstance(message, compat_str)
 633         line_count = message.count('\n') + 1
 634         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 635         self._output_process.stdin.flush()
 636         res = ''.join(self._output_channel.readline().decode('utf-8')
 637                       for _ in range(line_count))
 638         return res[:-len('\n')]
 639
 640     def to_screen(self, message, skip_eol=False):
 641         """Print message to stdout if not in quiet mode."""
 642         return self.to_stdout(message, skip_eol, check_quiet=True)
 643
 644     def _write_string(self, s, out=None):
 645         write_string(s, out=out, encoding=self.params.get('encoding'))
 646
 647     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 648         """Print message to stdout if not in quiet mode."""
 649         if self.params.get('logger'):
 650             self.params['logger'].debug(message)
 651         elif not check_quiet or not self.params.get('quiet', False):
 652             message = self._bidi_workaround(message)
 653             terminator = ['\n', ''][skip_eol]
 654             output = message + terminator
 655
 656             self._write_string(output, self._screen_file)
 657
 658     def to_stderr(self, message):
 659         """Print message to stderr."""
 660         assert isinstance(message, compat_str)
 661         if self.params.get('logger'):
 662             self.params['logger'].error(message)
 663         else:
 664             message = self._bidi_workaround(message)
 665             output = message + '\n'
 666             self._write_string(output, self._err_file)
 667
 668     def to_console_title(self, message):
 669         if not self.params.get('consoletitle', False):
 670             return
 671         if compat_os_name == 'nt':
 672             if ctypes.windll.kernel32.GetConsoleWindow():
 673                 # c_wchar_p() might not be necessary if `message` is
 674                 # already of type unicode()
 675                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 676         elif 'TERM' in os.environ:
 677             self._write_string('\033]0;%s\007' % message, self._screen_file)
 678
 679     def save_console_title(self):
 680         if not self.params.get('consoletitle', False):
 681             return
 682         if self.params.get('simulate', False):
 683             return
 684         if compat_os_name != 'nt' and 'TERM' in os.environ:
 685             # Save the title on stack
 686             self._write_string('\033[22;0t', self._screen_file)
 687
 688     def restore_console_title(self):
 689         if not self.params.get('consoletitle', False):
 690             return
 691         if self.params.get('simulate', False):
 692             return
 693         if compat_os_name != 'nt' and 'TERM' in os.environ:
 694             # Restore the title from stack
 695             self._write_string('\033[23;0t', self._screen_file)
 696
 697     def __enter__(self):
 698         self.save_console_title()
 699         return self
 700
 701     def __exit__(self, *args):
 702         self.restore_console_title()
 703
 704         if self.params.get('cookiefile') is not None:
 705             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 706
 707     def trouble(self, message=None, tb=None):
 708         """Determine action to take when a download problem appears.
 709
 710         Depending on if the downloader has been configured to ignore
 711         download errors or not, this method may throw an exception or
 712         not when errors are found, after printing the message.
 713
 714         tb, if given, is additional traceback information.
 715         """
 716         if message is not None:
 717             self.to_stderr(message)
 718         if self.params.get('verbose'):
 719             if tb is None:
 720                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 721                     tb = ''
 722                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 723                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 724                     tb += encode_compat_str(traceback.format_exc())
 725                 else:
 726                     tb_data = traceback.format_list(traceback.extract_stack())
 727                     tb = ''.join(tb_data)
 728             self.to_stderr(tb)
 729         if not self.params.get('ignoreerrors', False):
 730             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 731                 exc_info = sys.exc_info()[1].exc_info
 732             else:
 733                 exc_info = sys.exc_info()
 734             raise DownloadError(message, exc_info)
 735         self._download_retcode = 1
 736
 737     def report_warning(self, message):
 738         '''
 739         Print the message to stderr, it will be prefixed with 'WARNING:'
 740         If stderr is a tty file the 'WARNING:' will be colored
 741         '''
 742         if self.params.get('logger') is not None:
 743             self.params['logger'].warning(message)
 744         else:
 745             if self.params.get('no_warnings'):
 746                 return
 747             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 748                 _msg_header = '\033[0;33mWARNING:\033[0m'
 749             else:
 750                 _msg_header = 'WARNING:'
 751             warning_message = '%s %s' % (_msg_header, message)
 752             self.to_stderr(warning_message)
 753
 754     def report_error(self, message, tb=None):
 755         '''
 756         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 757         in red if stderr is a tty file.
 758         '''
 759         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 760             _msg_header = '\033[0;31mERROR:\033[0m'
 761         else:
 762             _msg_header = 'ERROR:'
 763         error_message = '%s %s' % (_msg_header, message)
 764         self.trouble(error_message, tb)
 765
 766     def report_file_already_downloaded(self, file_name):
 767         """Report file has already been fully downloaded."""
 768         try:
 769             self.to_screen('[download] %s has already been downloaded' % file_name)
 770         except UnicodeEncodeError:
 771             self.to_screen('[download] The file has already been downloaded')
 772
 773     def report_file_delete(self, file_name):
 774         """Report that existing file will be deleted."""
 775         try:
 776             self.to_screen('Deleting existing file %s' % file_name)
 777         except UnicodeEncodeError:
 778             self.to_screen('Deleting existing file')
 779
 780     def parse_outtmpl(self):
 781         outtmpl_dict = self.params.get('outtmpl', {})
 782         if not isinstance(outtmpl_dict, dict):
 783             outtmpl_dict = {'default': outtmpl_dict}
 784         outtmpl_dict.update({
 785             k: v for k, v in DEFAULT_OUTTMPL.items()
 786             if not outtmpl_dict.get(k)})
 787         for key, val in outtmpl_dict.items():
 788             if isinstance(val, bytes):
 789                 self.report_warning(
 790                     'Parameter outtmpl is bytes, but should be a unicode string. '
 791                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 792         return outtmpl_dict
 793
 794     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 795         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 796         template_dict = dict(info_dict)
 797
 798         # duration_string
 799         template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 800             formatSeconds(info_dict['duration'], '-')
 801             if info_dict.get('duration', None) is not None
 802             else None)
 803
 804         # epoch
 805         template_dict['epoch'] = int(time.time())
 806
 807         # autonumber
 808         autonumber_size = self.params.get('autonumber_size')
 809         if autonumber_size is None:
 810             autonumber_size = 5
 811         template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 812
 813         # resolution if not defined
 814         if template_dict.get('resolution') is None:
 815             if template_dict.get('width') and template_dict.get('height'):
 816                 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 817             elif template_dict.get('height'):
 818                 template_dict['resolution'] = '%sp' % template_dict['height']
 819             elif template_dict.get('width'):
 820                 template_dict['resolution'] = '%dx?' % template_dict['width']
 821
 822         if sanitize is None:
 823             sanitize = lambda k, v: v
 824         template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 825                              for k, v in template_dict.items()
 826                              if v is not None and not isinstance(v, (list, tuple, dict)))
 827         na = self.params.get('outtmpl_na_placeholder', 'NA')
 828         template_dict = collections.defaultdict(lambda: na, template_dict)
 829
 830         # For fields playlist_index and autonumber convert all occurrences
 831         # of %(field)s to %(field)0Nd for backward compatibility
 832         field_size_compat_map = {
 833             'playlist_index': len(str(template_dict['n_entries'])),
 834             'autonumber': autonumber_size,
 835         }
 836         FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 837         mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 838         if mobj:
 839             outtmpl = re.sub(
 840                 FIELD_SIZE_COMPAT_RE,
 841                 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 842                 outtmpl)
 843
 844         numeric_fields = list(self._NUMERIC_FIELDS)
 845
 846         # Format date
 847         FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
 848         for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
 849             conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
 850             if key in template_dict:
 851                 continue
 852             value = strftime_or_none(template_dict.get(field), frmt, na)
 853             if conv_type in 'crs':  # string
 854                 value = sanitize(field, value)
 855             else:  # number
 856                 numeric_fields.append(key)
 857                 value = float_or_none(value, default=None)
 858             if value is not None:
 859                 template_dict[key] = value
 860
 861         # Missing numeric fields used together with integer presentation types
 862         # in format specification will break the argument substitution since
 863         # string NA placeholder is returned for missing fields. We will patch
 864         # output template for missing fields to meet string presentation type.
 865         for numeric_field in numeric_fields:
 866             if numeric_field not in template_dict:
 867                 outtmpl = re.sub(
 868                     FORMAT_RE.format(re.escape(numeric_field)),
 869                     r'%({0})s'.format(numeric_field), outtmpl)
 870
 871         return outtmpl, template_dict
 872
 873     def _prepare_filename(self, info_dict, tmpl_type='default'):
 874         try:
 875             sanitize = lambda k, v: sanitize_filename(
 876                 compat_str(v),
 877                 restricted=self.params.get('restrictfilenames'),
 878                 is_id=(k == 'id' or k.endswith('_id')))
 879             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 880             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 881
 882             # expand_path translates '%%' into '%' and '$$' into '$'
 883             # correspondingly that is not what we want since we need to keep
 884             # '%%' intact for template dict substitution step. Working around
 885             # with boundary-alike separator hack.
 886             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 887             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 888
 889             # outtmpl should be expand_path'ed before template dict substitution
 890             # because meta fields may contain env variables we don't want to
 891             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 892             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 893             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 894
 895             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 896             if force_ext is not None:
 897                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 898
 899             # https://github.com/blackjack4494/youtube-dlc/issues/85
 900             trim_file_name = self.params.get('trim_file_name', False)
 901             if trim_file_name:
 902                 fn_groups = filename.rsplit('.')
 903                 ext = fn_groups[-1]
 904                 sub_ext = ''
 905                 if len(fn_groups) > 2:
 906                     sub_ext = fn_groups[-2]
 907                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 908
 909             return filename
 910         except ValueError as err:
 911             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 912             return None
 913
 914     def prepare_filename(self, info_dict, dir_type='', warn=False):
 915         """Generate the output filename."""
 916         paths = self.params.get('paths', {})
 917         assert isinstance(paths, dict)
 918         filename = self._prepare_filename(info_dict, dir_type or 'default')
 919
 920         if warn and not self.__prepare_filename_warned:
 921             if not paths:
 922                 pass
 923             elif filename == '-':
 924                 self.report_warning('--paths is ignored when an outputting to stdout')
 925             elif os.path.isabs(filename):
 926                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 927             self.__prepare_filename_warned = True
 928         if filename == '-' or not filename:
 929             return filename
 930
 931         homepath = expand_path(paths.get('home', '').strip())
 932         assert isinstance(homepath, compat_str)
 933         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 934         assert isinstance(subdir, compat_str)
 935         path = os.path.join(homepath, subdir, filename)
 936
 937         # Temporary fix for #4787
 938         # 'Treat' all problem characters by passing filename through preferredencoding
 939         # to workaround encoding issues with subprocess on python2 @ Windows
 940         if sys.version_info < (3, 0) and sys.platform == 'win32':
 941             path = encodeFilename(path, True).decode(preferredencoding())
 942         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 943
 944     def _match_entry(self, info_dict, incomplete):
 945         """ Returns None if the file should be downloaded """
 946
 947         def check_filter():
 948             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 949             if 'title' in info_dict:
 950                 # This can happen when we're just evaluating the playlist
 951                 title = info_dict['title']
 952                 matchtitle = self.params.get('matchtitle', False)
 953                 if matchtitle:
 954                     if not re.search(matchtitle, title, re.IGNORECASE):
 955                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 956                 rejecttitle = self.params.get('rejecttitle', False)
 957                 if rejecttitle:
 958                     if re.search(rejecttitle, title, re.IGNORECASE):
 959                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 960             date = info_dict.get('upload_date')
 961             if date is not None:
 962                 dateRange = self.params.get('daterange', DateRange())
 963                 if date not in dateRange:
 964                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 965             view_count = info_dict.get('view_count')
 966             if view_count is not None:
 967                 min_views = self.params.get('min_views')
 968                 if min_views is not None and view_count < min_views:
 969                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 970                 max_views = self.params.get('max_views')
 971                 if max_views is not None and view_count > max_views:
 972                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 973             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 974                 return 'Skipping "%s" because it is age restricted' % video_title
 975             if self.in_download_archive(info_dict):
 976                 return '%s has already been recorded in archive' % video_title
 977
 978             if not incomplete:
 979                 match_filter = self.params.get('match_filter')
 980                 if match_filter is not None:
 981                     ret = match_filter(info_dict)
 982                     if ret is not None:
 983                         return ret
 984             return None
 985
 986         reason = check_filter()
 987         if reason is not None:
 988             self.to_screen('[download] ' + reason)
 989             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 990                 raise ExistingVideoReached()
 991             elif self.params.get('break_on_reject', False):
 992                 raise RejectedVideoReached()
 993         return reason
 994
 995     @staticmethod
 996     def add_extra_info(info_dict, extra_info):
 997         '''Set the keys from extra_info in info dict if they are missing'''
 998         for key, value in extra_info.items():
 999             info_dict.setdefault(key, value)
1000
1001     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
1002                      process=True, force_generic_extractor=False):
1003         '''
1004         Returns a list with a dictionary for each video we find.
1005         If 'download', also downloads the videos.
1006         extra_info is a dict containing the extra values to add to each result
1007         '''
1008
1009         if not ie_key and force_generic_extractor:
1010             ie_key = 'Generic'
1011
1012         if ie_key:
1013             ies = [self.get_info_extractor(ie_key)]
1014         else:
1015             ies = self._ies
1016
1017         for ie in ies:
1018             if not ie.suitable(url):
1019                 continue
1020
1021             ie_key = ie.ie_key()
1022             ie = self.get_info_extractor(ie_key)
1023             if not ie.working():
1024                 self.report_warning('The program functionality for this site has been marked as broken, '
1025                                     'and will probably not work.')
1026
1027             try:
1028                 temp_id = str_or_none(
1029                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1030                     else ie._match_id(url))
1031             except (AssertionError, IndexError, AttributeError):
1032                 temp_id = None
1033             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1034                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1035                                ie_key, temp_id))
1036                 break
1037             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
1038         else:
1039             self.report_error('no suitable InfoExtractor for URL %s' % url)
1040
1041     def __handle_extraction_exceptions(func):
1042         def wrapper(self, *args, **kwargs):
1043             try:
1044                 return func(self, *args, **kwargs)
1045             except GeoRestrictedError as e:
1046                 msg = e.msg
1047                 if e.countries:
1048                     msg += '\nThis video is available in %s.' % ', '.join(
1049                         map(ISO3166Utils.short2full, e.countries))
1050                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1051                 self.report_error(msg)
1052             except ExtractorError as e:  # An error we somewhat expected
1053                 self.report_error(compat_str(e), e.format_traceback())
1054             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1055                 raise
1056             except Exception as e:
1057                 if self.params.get('ignoreerrors', False):
1058                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1059                 else:
1060                     raise
1061         return wrapper
1062
1063     @__handle_extraction_exceptions
1064     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1065         ie_result = ie.extract(url)
1066         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1067             return
1068         if isinstance(ie_result, list):
1069             # Backwards compatibility: old IE result format
1070             ie_result = {
1071                 '_type': 'compat_list',
1072                 'entries': ie_result,
1073             }
1074         if info_dict:
1075             if info_dict.get('id'):
1076                 ie_result['id'] = info_dict['id']
1077             if info_dict.get('title'):
1078                 ie_result['title'] = info_dict['title']
1079         self.add_default_extra_info(ie_result, ie, url)
1080         if process:
1081             return self.process_ie_result(ie_result, download, extra_info)
1082         else:
1083             return ie_result
1084
1085     def add_default_extra_info(self, ie_result, ie, url):
1086         self.add_extra_info(ie_result, {
1087             'extractor': ie.IE_NAME,
1088             'webpage_url': url,
1089             'webpage_url_basename': url_basename(url),
1090             'extractor_key': ie.ie_key(),
1091         })
1092
1093     def process_ie_result(self, ie_result, download=True, extra_info={}):
1094         """
1095         Take the result of the ie(may be modified) and resolve all unresolved
1096         references (URLs, playlist items).
1097
1098         It will also download the videos if 'download'.
1099         Returns the resolved ie_result.
1100         """
1101         result_type = ie_result.get('_type', 'video')
1102
1103         if result_type in ('url', 'url_transparent'):
1104             ie_result['url'] = sanitize_url(ie_result['url'])
1105             extract_flat = self.params.get('extract_flat', False)
1106             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1107                     or extract_flat is True):
1108                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1109                 return ie_result
1110
1111         if result_type == 'video':
1112             self.add_extra_info(ie_result, extra_info)
1113             return self.process_video_result(ie_result, download=download)
1114         elif result_type == 'url':
1115             # We have to add extra_info to the results because it may be
1116             # contained in a playlist
1117             return self.extract_info(ie_result['url'],
1118                                      download, info_dict=ie_result,
1119                                      ie_key=ie_result.get('ie_key'),
1120                                      extra_info=extra_info)
1121         elif result_type == 'url_transparent':
1122             # Use the information from the embedding page
1123             info = self.extract_info(
1124                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1125                 extra_info=extra_info, download=False, process=False)
1126
1127             # extract_info may return None when ignoreerrors is enabled and
1128             # extraction failed with an error, don't crash and return early
1129             # in this case
1130             if not info:
1131                 return info
1132
1133             force_properties = dict(
1134                 (k, v) for k, v in ie_result.items() if v is not None)
1135             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1136                 if f in force_properties:
1137                     del force_properties[f]
1138             new_result = info.copy()
1139             new_result.update(force_properties)
1140
1141             # Extracted info may not be a video result (i.e.
1142             # info.get('_type', 'video') != video) but rather an url or
1143             # url_transparent. In such cases outer metadata (from ie_result)
1144             # should be propagated to inner one (info). For this to happen
1145             # _type of info should be overridden with url_transparent. This
1146             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1147             if new_result.get('_type') == 'url':
1148                 new_result['_type'] = 'url_transparent'
1149
1150             return self.process_ie_result(
1151                 new_result, download=download, extra_info=extra_info)
1152         elif result_type in ('playlist', 'multi_video'):
1153             # Protect from infinite recursion due to recursively nested playlists
1154             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1155             webpage_url = ie_result['webpage_url']
1156             if webpage_url in self._playlist_urls:
1157                 self.to_screen(
1158                     '[download] Skipping already downloaded playlist: %s'
1159                     % ie_result.get('title') or ie_result.get('id'))
1160                 return
1161
1162             self._playlist_level += 1
1163             self._playlist_urls.add(webpage_url)
1164             try:
1165                 return self.__process_playlist(ie_result, download)
1166             finally:
1167                 self._playlist_level -= 1
1168                 if not self._playlist_level:
1169                     self._playlist_urls.clear()
1170         elif result_type == 'compat_list':
1171             self.report_warning(
1172                 'Extractor %s returned a compat_list result. '
1173                 'It needs to be updated.' % ie_result.get('extractor'))
1174
1175             def _fixup(r):
1176                 self.add_extra_info(
1177                     r,
1178                     {
1179                         'extractor': ie_result['extractor'],
1180                         'webpage_url': ie_result['webpage_url'],
1181                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1182                         'extractor_key': ie_result['extractor_key'],
1183                     }
1184                 )
1185                 return r
1186             ie_result['entries'] = [
1187                 self.process_ie_result(_fixup(r), download, extra_info)
1188                 for r in ie_result['entries']
1189             ]
1190             return ie_result
1191         else:
1192             raise Exception('Invalid result type: %s' % result_type)
1193
1194     def _ensure_dir_exists(self, path):
1195         return make_dir(path, self.report_error)
1196
1197     def __process_playlist(self, ie_result, download):
1198         # We process each entry in the playlist
1199         playlist = ie_result.get('title') or ie_result.get('id')
1200         self.to_screen('[download] Downloading playlist: %s' % playlist)
1201
1202         if 'entries' not in ie_result:
1203             raise EntryNotInPlaylist()
1204         incomplete_entries = bool(ie_result.get('requested_entries'))
1205         if incomplete_entries:
1206             def fill_missing_entries(entries, indexes):
1207                 ret = [None] * max(*indexes)
1208                 for i, entry in zip(indexes, entries):
1209                     ret[i - 1] = entry
1210                 return ret
1211             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1212
1213         playlist_results = []
1214
1215         playliststart = self.params.get('playliststart', 1) - 1
1216         playlistend = self.params.get('playlistend')
1217         # For backwards compatibility, interpret -1 as whole list
1218         if playlistend == -1:
1219             playlistend = None
1220
1221         playlistitems_str = self.params.get('playlist_items')
1222         playlistitems = None
1223         if playlistitems_str is not None:
1224             def iter_playlistitems(format):
1225                 for string_segment in format.split(','):
1226                     if '-' in string_segment:
1227                         start, end = string_segment.split('-')
1228                         for item in range(int(start), int(end) + 1):
1229                             yield int(item)
1230                     else:
1231                         yield int(string_segment)
1232             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1233
1234         ie_entries = ie_result['entries']
1235
1236         def make_playlistitems_entries(list_ie_entries):
1237             num_entries = len(list_ie_entries)
1238             for i in playlistitems:
1239                 if -num_entries < i <= num_entries:
1240                     yield list_ie_entries[i - 1]
1241                 elif incomplete_entries:
1242                     raise EntryNotInPlaylist()
1243
1244         if isinstance(ie_entries, list):
1245             n_all_entries = len(ie_entries)
1246             if playlistitems:
1247                 entries = list(make_playlistitems_entries(ie_entries))
1248             else:
1249                 entries = ie_entries[playliststart:playlistend]
1250             n_entries = len(entries)
1251             msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
1252         elif isinstance(ie_entries, PagedList):
1253             if playlistitems:
1254                 entries = []
1255                 for item in playlistitems:
1256                     entries.extend(ie_entries.getslice(
1257                         item - 1, item
1258                     ))
1259             else:
1260                 entries = ie_entries.getslice(
1261                     playliststart, playlistend)
1262             n_entries = len(entries)
1263             msg = 'Downloading %d videos' % n_entries
1264         else:  # iterable
1265             if playlistitems:
1266                 entries = list(make_playlistitems_entries(list(itertools.islice(
1267                     ie_entries, 0, max(playlistitems)))))
1268             else:
1269                 entries = list(itertools.islice(
1270                     ie_entries, playliststart, playlistend))
1271             n_entries = len(entries)
1272             msg = 'Downloading %d videos' % n_entries
1273
1274         if any((entry is None for entry in entries)):
1275             raise EntryNotInPlaylist()
1276         if not playlistitems and (playliststart or playlistend):
1277             playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1278         ie_result['entries'] = entries
1279         ie_result['requested_entries'] = playlistitems
1280
1281         if self.params.get('allow_playlist_files', True):
1282             ie_copy = {
1283                 'playlist': playlist,
1284                 'playlist_id': ie_result.get('id'),
1285                 'playlist_title': ie_result.get('title'),
1286                 'playlist_uploader': ie_result.get('uploader'),
1287                 'playlist_uploader_id': ie_result.get('uploader_id'),
1288                 'playlist_index': 0
1289             }
1290             ie_copy.update(dict(ie_result))
1291
1292             if self.params.get('writeinfojson', False):
1293                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1294                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1295                     return
1296                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1297                     self.to_screen('[info] Playlist metadata is already present')
1298                 else:
1299                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1300                     try:
1301                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1302                     except (OSError, IOError):
1303                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1304
1305             if self.params.get('writedescription', False):
1306                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1307                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1308                     return
1309                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1310                     self.to_screen('[info] Playlist description is already present')
1311                 elif ie_result.get('description') is None:
1312                     self.report_warning('There\'s no playlist description to write.')
1313                 else:
1314                     try:
1315                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1316                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1317                             descfile.write(ie_result['description'])
1318                     except (OSError, IOError):
1319                         self.report_error('Cannot write playlist description file ' + descfn)
1320                         return
1321
1322         if self.params.get('playlistreverse', False):
1323             entries = entries[::-1]
1324         if self.params.get('playlistrandom', False):
1325             random.shuffle(entries)
1326
1327         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1328
1329         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
1330         for i, entry in enumerate(entries, 1):
1331             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1332             # This __x_forwarded_for_ip thing is a bit ugly but requires
1333             # minimal changes
1334             if x_forwarded_for:
1335                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1336             extra = {
1337                 'n_entries': n_entries,
1338                 'playlist': playlist,
1339                 'playlist_id': ie_result.get('id'),
1340                 'playlist_title': ie_result.get('title'),
1341                 'playlist_uploader': ie_result.get('uploader'),
1342                 'playlist_uploader_id': ie_result.get('uploader_id'),
1343                 'playlist_index': playlistitems[i - 1] if playlistitems else i,
1344                 'extractor': ie_result['extractor'],
1345                 'webpage_url': ie_result['webpage_url'],
1346                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1347                 'extractor_key': ie_result['extractor_key'],
1348             }
1349
1350             if self._match_entry(entry, incomplete=True) is not None:
1351                 continue
1352
1353             entry_result = self.__process_iterable_entry(entry, download, extra)
1354             # TODO: skip failed (empty) entries?
1355             playlist_results.append(entry_result)
1356         ie_result['entries'] = playlist_results
1357         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1358         return ie_result
1359
1360     @__handle_extraction_exceptions
1361     def __process_iterable_entry(self, entry, download, extra_info):
1362         return self.process_ie_result(
1363             entry, download=download, extra_info=extra_info)
1364
1365     def _build_format_filter(self, filter_spec):
1366         " Returns a function to filter the formats according to the filter_spec "
1367
1368         OPERATORS = {
1369             '<': operator.lt,
1370             '<=': operator.le,
1371             '>': operator.gt,
1372             '>=': operator.ge,
1373             '=': operator.eq,
1374             '!=': operator.ne,
1375         }
1376         operator_rex = re.compile(r'''(?x)\s*
1377             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1378             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1379             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1380             $
1381             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1382         m = operator_rex.search(filter_spec)
1383         if m:
1384             try:
1385                 comparison_value = int(m.group('value'))
1386             except ValueError:
1387                 comparison_value = parse_filesize(m.group('value'))
1388                 if comparison_value is None:
1389                     comparison_value = parse_filesize(m.group('value') + 'B')
1390                 if comparison_value is None:
1391                     raise ValueError(
1392                         'Invalid value %r in format specification %r' % (
1393                             m.group('value'), filter_spec))
1394             op = OPERATORS[m.group('op')]
1395
1396         if not m:
1397             STR_OPERATORS = {
1398                 '=': operator.eq,
1399                 '^=': lambda attr, value: attr.startswith(value),
1400                 '$=': lambda attr, value: attr.endswith(value),
1401                 '*=': lambda attr, value: value in attr,
1402             }
1403             str_operator_rex = re.compile(r'''(?x)
1404                 \s*(?P<key>[a-zA-Z0-9._-]+)
1405                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1406                 \s*(?P<value>[a-zA-Z0-9._-]+)
1407                 \s*$
1408                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1409             m = str_operator_rex.search(filter_spec)
1410             if m:
1411                 comparison_value = m.group('value')
1412                 str_op = STR_OPERATORS[m.group('op')]
1413                 if m.group('negation'):
1414                     op = lambda attr, value: not str_op(attr, value)
1415                 else:
1416                     op = str_op
1417
1418         if not m:
1419             raise ValueError('Invalid filter specification %r' % filter_spec)
1420
1421         def _filter(f):
1422             actual_value = f.get(m.group('key'))
1423             if actual_value is None:
1424                 return m.group('none_inclusive')
1425             return op(actual_value, comparison_value)
1426         return _filter
1427
1428     def _default_format_spec(self, info_dict, download=True):
1429
1430         def can_merge():
1431             merger = FFmpegMergerPP(self)
1432             return merger.available and merger.can_merge()
1433
1434         prefer_best = (
1435             not self.params.get('simulate', False)
1436             and download
1437             and (
1438                 not can_merge()
1439                 or info_dict.get('is_live', False)
1440                 or self.outtmpl_dict['default'] == '-'))
1441
1442         return (
1443             'best/bestvideo+bestaudio'
1444             if prefer_best
1445             else 'bestvideo*+bestaudio/best'
1446             if not self.params.get('allow_multiple_audio_streams', False)
1447             else 'bestvideo+bestaudio/best')
1448
1449     def build_format_selector(self, format_spec):
1450         def syntax_error(note, start):
1451             message = (
1452                 'Invalid format specification: '
1453                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1454             return SyntaxError(message)
1455
1456         PICKFIRST = 'PICKFIRST'
1457         MERGE = 'MERGE'
1458         SINGLE = 'SINGLE'
1459         GROUP = 'GROUP'
1460         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1461
1462         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1463                                   'video': self.params.get('allow_multiple_video_streams', False)}
1464
1465         def _parse_filter(tokens):
1466             filter_parts = []
1467             for type, string, start, _, _ in tokens:
1468                 if type == tokenize.OP and string == ']':
1469                     return ''.join(filter_parts)
1470                 else:
1471                     filter_parts.append(string)
1472
1473         def _remove_unused_ops(tokens):
1474             # Remove operators that we don't use and join them with the surrounding strings
1475             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1476             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1477             last_string, last_start, last_end, last_line = None, None, None, None
1478             for type, string, start, end, line in tokens:
1479                 if type == tokenize.OP and string == '[':
1480                     if last_string:
1481                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1482                         last_string = None
1483                     yield type, string, start, end, line
1484                     # everything inside brackets will be handled by _parse_filter
1485                     for type, string, start, end, line in tokens:
1486                         yield type, string, start, end, line
1487                         if type == tokenize.OP and string == ']':
1488                             break
1489                 elif type == tokenize.OP and string in ALLOWED_OPS:
1490                     if last_string:
1491                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1492                         last_string = None
1493                     yield type, string, start, end, line
1494                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1495                     if not last_string:
1496                         last_string = string
1497                         last_start = start
1498                         last_end = end
1499                     else:
1500                         last_string += string
1501             if last_string:
1502                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1503
1504         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1505             selectors = []
1506             current_selector = None
1507             for type, string, start, _, _ in tokens:
1508                 # ENCODING is only defined in python 3.x
1509                 if type == getattr(tokenize, 'ENCODING', None):
1510                     continue
1511                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1512                     current_selector = FormatSelector(SINGLE, string, [])
1513                 elif type == tokenize.OP:
1514                     if string == ')':
1515                         if not inside_group:
1516                             # ')' will be handled by the parentheses group
1517                             tokens.restore_last_token()
1518                         break
1519                     elif inside_merge and string in ['/', ',']:
1520                         tokens.restore_last_token()
1521                         break
1522                     elif inside_choice and string == ',':
1523                         tokens.restore_last_token()
1524                         break
1525                     elif string == ',':
1526                         if not current_selector:
1527                             raise syntax_error('"," must follow a format selector', start)
1528                         selectors.append(current_selector)
1529                         current_selector = None
1530                     elif string == '/':
1531                         if not current_selector:
1532                             raise syntax_error('"/" must follow a format selector', start)
1533                         first_choice = current_selector
1534                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1535                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1536                     elif string == '[':
1537                         if not current_selector:
1538                             current_selector = FormatSelector(SINGLE, 'best', [])
1539                         format_filter = _parse_filter(tokens)
1540                         current_selector.filters.append(format_filter)
1541                     elif string == '(':
1542                         if current_selector:
1543                             raise syntax_error('Unexpected "("', start)
1544                         group = _parse_format_selection(tokens, inside_group=True)
1545                         current_selector = FormatSelector(GROUP, group, [])
1546                     elif string == '+':
1547                         if not current_selector:
1548                             raise syntax_error('Unexpected "+"', start)
1549                         selector_1 = current_selector
1550                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1551                         if not selector_2:
1552                             raise syntax_error('Expected a selector', start)
1553                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1554                     else:
1555                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1556                 elif type == tokenize.ENDMARKER:
1557                     break
1558             if current_selector:
1559                 selectors.append(current_selector)
1560             return selectors
1561
1562         def _merge(formats_pair):
1563             format_1, format_2 = formats_pair
1564
1565             formats_info = []
1566             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1567             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1568
1569             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1570                 get_no_more = {"video": False, "audio": False}
1571                 for (i, fmt_info) in enumerate(formats_info):
1572                     for aud_vid in ["audio", "video"]:
1573                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1574                             if get_no_more[aud_vid]:
1575                                 formats_info.pop(i)
1576                             get_no_more[aud_vid] = True
1577
1578             if len(formats_info) == 1:
1579                 return formats_info[0]
1580
1581             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1582             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1583
1584             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1585             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1586
1587             output_ext = self.params.get('merge_output_format')
1588             if not output_ext:
1589                 if the_only_video:
1590                     output_ext = the_only_video['ext']
1591                 elif the_only_audio and not video_fmts:
1592                     output_ext = the_only_audio['ext']
1593                 else:
1594                     output_ext = 'mkv'
1595
1596             new_dict = {
1597                 'requested_formats': formats_info,
1598                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1599                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1600                 'ext': output_ext,
1601             }
1602
1603             if the_only_video:
1604                 new_dict.update({
1605                     'width': the_only_video.get('width'),
1606                     'height': the_only_video.get('height'),
1607                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1608                     'fps': the_only_video.get('fps'),
1609                     'vcodec': the_only_video.get('vcodec'),
1610                     'vbr': the_only_video.get('vbr'),
1611                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1612                 })
1613
1614             if the_only_audio:
1615                 new_dict.update({
1616                     'acodec': the_only_audio.get('acodec'),
1617                     'abr': the_only_audio.get('abr'),
1618                 })
1619
1620             return new_dict
1621
1622         def _build_selector_function(selector):
1623             if isinstance(selector, list):  # ,
1624                 fs = [_build_selector_function(s) for s in selector]
1625
1626                 def selector_function(ctx):
1627                     for f in fs:
1628                         for format in f(ctx):
1629                             yield format
1630                 return selector_function
1631
1632             elif selector.type == GROUP:  # ()
1633                 selector_function = _build_selector_function(selector.selector)
1634
1635             elif selector.type == PICKFIRST:  # /
1636                 fs = [_build_selector_function(s) for s in selector.selector]
1637
1638                 def selector_function(ctx):
1639                     for f in fs:
1640                         picked_formats = list(f(ctx))
1641                         if picked_formats:
1642                             return picked_formats
1643                     return []
1644
1645             elif selector.type == SINGLE:  # atom
1646                 format_spec = (selector.selector or 'best').lower()
1647
1648                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1649                 if format_spec == 'all':
1650                     def selector_function(ctx):
1651                         formats = list(ctx['formats'])
1652                         if formats:
1653                             for f in formats:
1654                                 yield f
1655                 elif format_spec == 'mergeall':
1656                     def selector_function(ctx):
1657                         formats = list(ctx['formats'])
1658                         if not formats:
1659                             return
1660                         merged_format = formats[-1]
1661                         for f in formats[-2::-1]:
1662                             merged_format = _merge((merged_format, f))
1663                         yield merged_format
1664
1665                 else:
1666                     format_fallback = False
1667                     mobj = re.match(
1668                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1669                         format_spec)
1670                     if mobj is not None:
1671                         format_idx = int_or_none(mobj.group('n'), default=1)
1672                         format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
1673                         format_type = (mobj.group('type') or [None])[0]
1674                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1675                         format_modified = mobj.group('mod') is not None
1676
1677                         format_fallback = not format_type and not format_modified  # for b, w
1678                         filter_f = (
1679                             (lambda f: f.get('%scodec' % format_type) != 'none')
1680                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1681                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1682                             if format_type  # bv, ba, wv, wa
1683                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1684                             if not format_modified  # b, w
1685                             else None)  # b*, w*
1686                     else:
1687                         format_idx = -1
1688                         filter_f = ((lambda f: f.get('ext') == format_spec)
1689                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1690                                     else (lambda f: f.get('format_id') == format_spec))  # id
1691
1692                     def selector_function(ctx):
1693                         formats = list(ctx['formats'])
1694                         if not formats:
1695                             return
1696                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1697                         n = len(matches)
1698                         if -n <= format_idx < n:
1699                             yield matches[format_idx]
1700                         elif format_fallback and ctx['incomplete_formats']:
1701                             # for extractors with incomplete formats (audio only (soundcloud)
1702                             # or video only (imgur)) best/worst will fallback to
1703                             # best/worst {video,audio}-only format
1704                             n = len(formats)
1705                             if -n <= format_idx < n:
1706                                 yield formats[format_idx]
1707
1708             elif selector.type == MERGE:        # +
1709                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1710
1711                 def selector_function(ctx):
1712                     for pair in itertools.product(
1713                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1714                         yield _merge(pair)
1715
1716             filters = [self._build_format_filter(f) for f in selector.filters]
1717
1718             def final_selector(ctx):
1719                 ctx_copy = copy.deepcopy(ctx)
1720                 for _filter in filters:
1721                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1722                 return selector_function(ctx_copy)
1723             return final_selector
1724
1725         stream = io.BytesIO(format_spec.encode('utf-8'))
1726         try:
1727             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1728         except tokenize.TokenError:
1729             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1730
1731         class TokenIterator(object):
1732             def __init__(self, tokens):
1733                 self.tokens = tokens
1734                 self.counter = 0
1735
1736             def __iter__(self):
1737                 return self
1738
1739             def __next__(self):
1740                 if self.counter >= len(self.tokens):
1741                     raise StopIteration()
1742                 value = self.tokens[self.counter]
1743                 self.counter += 1
1744                 return value
1745
1746             next = __next__
1747
1748             def restore_last_token(self):
1749                 self.counter -= 1
1750
1751         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1752         return _build_selector_function(parsed_selector)
1753
1754     def _calc_headers(self, info_dict):
1755         res = std_headers.copy()
1756
1757         add_headers = info_dict.get('http_headers')
1758         if add_headers:
1759             res.update(add_headers)
1760
1761         cookies = self._calc_cookies(info_dict)
1762         if cookies:
1763             res['Cookie'] = cookies
1764
1765         if 'X-Forwarded-For' not in res:
1766             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1767             if x_forwarded_for_ip:
1768                 res['X-Forwarded-For'] = x_forwarded_for_ip
1769
1770         return res
1771
1772     def _calc_cookies(self, info_dict):
1773         pr = sanitized_Request(info_dict['url'])
1774         self.cookiejar.add_cookie_header(pr)
1775         return pr.get_header('Cookie')
1776
1777     def process_video_result(self, info_dict, download=True):
1778         assert info_dict.get('_type', 'video') == 'video'
1779
1780         if 'id' not in info_dict:
1781             raise ExtractorError('Missing "id" field in extractor result')
1782         if 'title' not in info_dict:
1783             raise ExtractorError('Missing "title" field in extractor result')
1784
1785         def report_force_conversion(field, field_not, conversion):
1786             self.report_warning(
1787                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1788                 % (field, field_not, conversion))
1789
1790         def sanitize_string_field(info, string_field):
1791             field = info.get(string_field)
1792             if field is None or isinstance(field, compat_str):
1793                 return
1794             report_force_conversion(string_field, 'a string', 'string')
1795             info[string_field] = compat_str(field)
1796
1797         def sanitize_numeric_fields(info):
1798             for numeric_field in self._NUMERIC_FIELDS:
1799                 field = info.get(numeric_field)
1800                 if field is None or isinstance(field, compat_numeric_types):
1801                     continue
1802                 report_force_conversion(numeric_field, 'numeric', 'int')
1803                 info[numeric_field] = int_or_none(field)
1804
1805         sanitize_string_field(info_dict, 'id')
1806         sanitize_numeric_fields(info_dict)
1807
1808         if 'playlist' not in info_dict:
1809             # It isn't part of a playlist
1810             info_dict['playlist'] = None
1811             info_dict['playlist_index'] = None
1812
1813         thumbnails = info_dict.get('thumbnails')
1814         if thumbnails is None:
1815             thumbnail = info_dict.get('thumbnail')
1816             if thumbnail:
1817                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1818         if thumbnails:
1819             thumbnails.sort(key=lambda t: (
1820                 t.get('preference') if t.get('preference') is not None else -1,
1821                 t.get('width') if t.get('width') is not None else -1,
1822                 t.get('height') if t.get('height') is not None else -1,
1823                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1824             for i, t in enumerate(thumbnails):
1825                 t['url'] = sanitize_url(t['url'])
1826                 if t.get('width') and t.get('height'):
1827                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1828                 if t.get('id') is None:
1829                     t['id'] = '%d' % i
1830
1831         if self.params.get('list_thumbnails'):
1832             self.list_thumbnails(info_dict)
1833             return
1834
1835         thumbnail = info_dict.get('thumbnail')
1836         if thumbnail:
1837             info_dict['thumbnail'] = sanitize_url(thumbnail)
1838         elif thumbnails:
1839             info_dict['thumbnail'] = thumbnails[-1]['url']
1840
1841         if 'display_id' not in info_dict and 'id' in info_dict:
1842             info_dict['display_id'] = info_dict['id']
1843
1844         for ts_key, date_key in (
1845                 ('timestamp', 'upload_date'),
1846                 ('release_timestamp', 'release_date'),
1847         ):
1848             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1849                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1850                 # see http://bugs.python.org/issue1646728)
1851                 try:
1852                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1853                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
1854                 except (ValueError, OverflowError, OSError):
1855                     pass
1856
1857         # Auto generate title fields corresponding to the *_number fields when missing
1858         # in order to always have clean titles. This is very common for TV series.
1859         for field in ('chapter', 'season', 'episode'):
1860             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1861                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1862
1863         for cc_kind in ('subtitles', 'automatic_captions'):
1864             cc = info_dict.get(cc_kind)
1865             if cc:
1866                 for _, subtitle in cc.items():
1867                     for subtitle_format in subtitle:
1868                         if subtitle_format.get('url'):
1869                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1870                         if subtitle_format.get('ext') is None:
1871                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1872
1873         automatic_captions = info_dict.get('automatic_captions')
1874         subtitles = info_dict.get('subtitles')
1875
1876         if self.params.get('listsubtitles', False):
1877             if 'automatic_captions' in info_dict:
1878                 self.list_subtitles(
1879                     info_dict['id'], automatic_captions, 'automatic captions')
1880             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1881             return
1882
1883         info_dict['requested_subtitles'] = self.process_subtitles(
1884             info_dict['id'], subtitles, automatic_captions)
1885
1886         # We now pick which formats have to be downloaded
1887         if info_dict.get('formats') is None:
1888             # There's only one format available
1889             formats = [info_dict]
1890         else:
1891             formats = info_dict['formats']
1892
1893         if not formats:
1894             if not self.params.get('ignore_no_formats_error'):
1895                 raise ExtractorError('No video formats found!')
1896             else:
1897                 self.report_warning('No video formats found!')
1898
1899         def is_wellformed(f):
1900             url = f.get('url')
1901             if not url:
1902                 self.report_warning(
1903                     '"url" field is missing or empty - skipping format, '
1904                     'there is an error in extractor')
1905                 return False
1906             if isinstance(url, bytes):
1907                 sanitize_string_field(f, 'url')
1908             return True
1909
1910         # Filter out malformed formats for better extraction robustness
1911         formats = list(filter(is_wellformed, formats))
1912
1913         formats_dict = {}
1914
1915         # We check that all the formats have the format and format_id fields
1916         for i, format in enumerate(formats):
1917             sanitize_string_field(format, 'format_id')
1918             sanitize_numeric_fields(format)
1919             format['url'] = sanitize_url(format['url'])
1920             if not format.get('format_id'):
1921                 format['format_id'] = compat_str(i)
1922             else:
1923                 # Sanitize format_id from characters used in format selector expression
1924                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1925             format_id = format['format_id']
1926             if format_id not in formats_dict:
1927                 formats_dict[format_id] = []
1928             formats_dict[format_id].append(format)
1929
1930         # Make sure all formats have unique format_id
1931         for format_id, ambiguous_formats in formats_dict.items():
1932             if len(ambiguous_formats) > 1:
1933                 for i, format in enumerate(ambiguous_formats):
1934                     format['format_id'] = '%s-%d' % (format_id, i)
1935
1936         for i, format in enumerate(formats):
1937             if format.get('format') is None:
1938                 format['format'] = '{id} - {res}{note}'.format(
1939                     id=format['format_id'],
1940                     res=self.format_resolution(format),
1941                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1942                 )
1943             # Automatically determine file extension if missing
1944             if format.get('ext') is None:
1945                 format['ext'] = determine_ext(format['url']).lower()
1946             # Automatically determine protocol if missing (useful for format
1947             # selection purposes)
1948             if format.get('protocol') is None:
1949                 format['protocol'] = determine_protocol(format)
1950             # Add HTTP headers, so that external programs can use them from the
1951             # json output
1952             full_format_info = info_dict.copy()
1953             full_format_info.update(format)
1954             format['http_headers'] = self._calc_headers(full_format_info)
1955         # Remove private housekeeping stuff
1956         if '__x_forwarded_for_ip' in info_dict:
1957             del info_dict['__x_forwarded_for_ip']
1958
1959         # TODO Central sorting goes here
1960
1961         if formats and formats[0] is not info_dict:
1962             # only set the 'formats' fields if the original info_dict list them
1963             # otherwise we end up with a circular reference, the first (and unique)
1964             # element in the 'formats' field in info_dict is info_dict itself,
1965             # which can't be exported to json
1966             info_dict['formats'] = formats
1967         if self.params.get('listformats'):
1968             if not info_dict.get('formats'):
1969                 raise ExtractorError('No video formats found', expected=True)
1970             self.list_formats(info_dict)
1971             return
1972
1973         req_format = self.params.get('format')
1974         if req_format is None:
1975             req_format = self._default_format_spec(info_dict, download=download)
1976             if self.params.get('verbose'):
1977                 self.to_screen('[debug] Default format spec: %s' % req_format)
1978
1979         format_selector = self.build_format_selector(req_format)
1980
1981         # While in format selection we may need to have an access to the original
1982         # format set in order to calculate some metrics or do some processing.
1983         # For now we need to be able to guess whether original formats provided
1984         # by extractor are incomplete or not (i.e. whether extractor provides only
1985         # video-only or audio-only formats) for proper formats selection for
1986         # extractors with such incomplete formats (see
1987         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1988         # Since formats may be filtered during format selection and may not match
1989         # the original formats the results may be incorrect. Thus original formats
1990         # or pre-calculated metrics should be passed to format selection routines
1991         # as well.
1992         # We will pass a context object containing all necessary additional data
1993         # instead of just formats.
1994         # This fixes incorrect format selection issue (see
1995         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1996         incomplete_formats = (
1997             # All formats are video-only or
1998             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1999             # all formats are audio-only
2000             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2001
2002         ctx = {
2003             'formats': formats,
2004             'incomplete_formats': incomplete_formats,
2005         }
2006
2007         formats_to_download = list(format_selector(ctx))
2008         if not formats_to_download:
2009             if not self.params.get('ignore_no_formats_error'):
2010                 raise ExtractorError('Requested format is not available', expected=True)
2011             else:
2012                 self.report_warning('Requested format is not available')
2013         elif download:
2014             self.to_screen(
2015                 '[info] %s: Downloading format(s) %s'
2016                 % (info_dict['id'], ", ".join([f['format_id'] for f in formats_to_download])))
2017             if len(formats_to_download) > 1:
2018                 self.to_screen(
2019                     '[info] %s: Downloading video in %s formats'
2020                     % (info_dict['id'], len(formats_to_download)))
2021             for fmt in formats_to_download:
2022                 new_info = dict(info_dict)
2023                 new_info.update(fmt)
2024                 self.process_info(new_info)
2025         # We update the info dict with the best quality format (backwards compatibility)
2026         if formats_to_download:
2027             info_dict.update(formats_to_download[-1])
2028         return info_dict
2029
2030     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2031         """Select the requested subtitles and their format"""
2032         available_subs = {}
2033         if normal_subtitles and self.params.get('writesubtitles'):
2034             available_subs.update(normal_subtitles)
2035         if automatic_captions and self.params.get('writeautomaticsub'):
2036             for lang, cap_info in automatic_captions.items():
2037                 if lang not in available_subs:
2038                     available_subs[lang] = cap_info
2039
2040         if (not self.params.get('writesubtitles') and not
2041                 self.params.get('writeautomaticsub') or not
2042                 available_subs):
2043             return None
2044
2045         all_sub_langs = available_subs.keys()
2046         if self.params.get('allsubtitles', False):
2047             requested_langs = all_sub_langs
2048         elif self.params.get('subtitleslangs', False):
2049             requested_langs = set()
2050             for lang in self.params.get('subtitleslangs'):
2051                 if lang == 'all':
2052                     requested_langs.update(all_sub_langs)
2053                     continue
2054                 discard = lang[0] == '-'
2055                 if discard:
2056                     lang = lang[1:]
2057                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2058                 if discard:
2059                     for lang in current_langs:
2060                         requested_langs.discard(lang)
2061                 else:
2062                     requested_langs.update(current_langs)
2063         elif 'en' in available_subs:
2064             requested_langs = ['en']
2065         else:
2066             requested_langs = [list(all_sub_langs)[0]]
2067
2068         formats_query = self.params.get('subtitlesformat', 'best')
2069         formats_preference = formats_query.split('/') if formats_query else []
2070         subs = {}
2071         for lang in requested_langs:
2072             formats = available_subs.get(lang)
2073             if formats is None:
2074                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2075                 continue
2076             for ext in formats_preference:
2077                 if ext == 'best':
2078                     f = formats[-1]
2079                     break
2080                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2081                 if matches:
2082                     f = matches[-1]
2083                     break
2084             else:
2085                 f = formats[-1]
2086                 self.report_warning(
2087                     'No subtitle format found matching "%s" for language %s, '
2088                     'using %s' % (formats_query, lang, f['ext']))
2089             subs[lang] = f
2090         return subs
2091
2092     def __forced_printings(self, info_dict, filename, incomplete):
2093         def print_mandatory(field):
2094             if (self.params.get('force%s' % field, False)
2095                     and (not incomplete or info_dict.get(field) is not None)):
2096                 self.to_stdout(info_dict[field])
2097
2098         def print_optional(field):
2099             if (self.params.get('force%s' % field, False)
2100                     and info_dict.get(field) is not None):
2101                 self.to_stdout(info_dict[field])
2102
2103         print_mandatory('title')
2104         print_mandatory('id')
2105         if self.params.get('forceurl', False) and not incomplete:
2106             if info_dict.get('requested_formats') is not None:
2107                 for f in info_dict['requested_formats']:
2108                     self.to_stdout(f['url'] + f.get('play_path', ''))
2109             else:
2110                 # For RTMP URLs, also include the playpath
2111                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2112         print_optional('thumbnail')
2113         print_optional('description')
2114         if self.params.get('forcefilename', False) and filename is not None:
2115             self.to_stdout(filename)
2116         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2117             self.to_stdout(formatSeconds(info_dict['duration']))
2118         print_mandatory('format')
2119         if self.params.get('forcejson', False):
2120             self.post_extract(info_dict)
2121             self.to_stdout(json.dumps(info_dict, default=repr))
2122
2123     def process_info(self, info_dict):
2124         """Process a single resolved IE result."""
2125
2126         assert info_dict.get('_type', 'video') == 'video'
2127
2128         info_dict.setdefault('__postprocessors', [])
2129
2130         max_downloads = self.params.get('max_downloads')
2131         if max_downloads is not None:
2132             if self._num_downloads >= int(max_downloads):
2133                 raise MaxDownloadsReached()
2134
2135         # TODO: backward compatibility, to be removed
2136         info_dict['fulltitle'] = info_dict['title']
2137
2138         if 'format' not in info_dict:
2139             info_dict['format'] = info_dict['ext']
2140
2141         if self._match_entry(info_dict, incomplete=False) is not None:
2142             return
2143
2144         self.post_extract(info_dict)
2145         self._num_downloads += 1
2146
2147         info_dict, _ = self.pre_process(info_dict)
2148
2149         # info_dict['_filename'] needs to be set for backward compatibility
2150         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2151         temp_filename = self.prepare_filename(info_dict, 'temp')
2152         files_to_move = {}
2153
2154         # Forced printings
2155         self.__forced_printings(info_dict, full_filename, incomplete=False)
2156
2157         if self.params.get('simulate', False):
2158             if self.params.get('force_write_download_archive', False):
2159                 self.record_download_archive(info_dict)
2160
2161             # Do nothing else if in simulate mode
2162             return
2163
2164         if full_filename is None:
2165             return
2166
2167         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2168             return
2169         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2170             return
2171
2172         if self.params.get('writedescription', False):
2173             descfn = self.prepare_filename(info_dict, 'description')
2174             if not self._ensure_dir_exists(encodeFilename(descfn)):
2175                 return
2176             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2177                 self.to_screen('[info] Video description is already present')
2178             elif info_dict.get('description') is None:
2179                 self.report_warning('There\'s no description to write.')
2180             else:
2181                 try:
2182                     self.to_screen('[info] Writing video description to: ' + descfn)
2183                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2184                         descfile.write(info_dict['description'])
2185                 except (OSError, IOError):
2186                     self.report_error('Cannot write description file ' + descfn)
2187                     return
2188
2189         if self.params.get('writeannotations', False):
2190             annofn = self.prepare_filename(info_dict, 'annotation')
2191             if not self._ensure_dir_exists(encodeFilename(annofn)):
2192                 return
2193             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2194                 self.to_screen('[info] Video annotations are already present')
2195             elif not info_dict.get('annotations'):
2196                 self.report_warning('There are no annotations to write.')
2197             else:
2198                 try:
2199                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2200                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2201                         annofile.write(info_dict['annotations'])
2202                 except (KeyError, TypeError):
2203                     self.report_warning('There are no annotations to write.')
2204                 except (OSError, IOError):
2205                     self.report_error('Cannot write annotations file: ' + annofn)
2206                     return
2207
2208         def dl(name, info, subtitle=False):
2209             fd = get_suitable_downloader(info, self.params)(self, self.params)
2210             for ph in self._progress_hooks:
2211                 fd.add_progress_hook(ph)
2212             if self.params.get('verbose'):
2213                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2214             new_info = dict(info)
2215             if new_info.get('http_headers') is None:
2216                 new_info['http_headers'] = self._calc_headers(new_info)
2217             return fd.download(name, new_info, subtitle)
2218
2219         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2220                                        self.params.get('writeautomaticsub')])
2221
2222         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2223             # subtitles download errors are already managed as troubles in relevant IE
2224             # that way it will silently go on when used with unsupporting IE
2225             subtitles = info_dict['requested_subtitles']
2226             # ie = self.get_info_extractor(info_dict['extractor_key'])
2227             for sub_lang, sub_info in subtitles.items():
2228                 sub_format = sub_info['ext']
2229                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2230                 sub_filename_final = subtitles_filename(
2231                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2232                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2233                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2234                     sub_info['filepath'] = sub_filename
2235                     files_to_move[sub_filename] = sub_filename_final
2236                 else:
2237                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2238                     if sub_info.get('data') is not None:
2239                         try:
2240                             # Use newline='' to prevent conversion of newline characters
2241                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2242                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2243                                 subfile.write(sub_info['data'])
2244                             sub_info['filepath'] = sub_filename
2245                             files_to_move[sub_filename] = sub_filename_final
2246                         except (OSError, IOError):
2247                             self.report_error('Cannot write subtitles file ' + sub_filename)
2248                             return
2249                     else:
2250                         try:
2251                             dl(sub_filename, sub_info.copy(), subtitle=True)
2252                             sub_info['filepath'] = sub_filename
2253                             files_to_move[sub_filename] = sub_filename_final
2254                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2255                             self.report_warning('Unable to download subtitle for "%s": %s' %
2256                                                 (sub_lang, error_to_compat_str(err)))
2257                             continue
2258
2259         if self.params.get('writeinfojson', False):
2260             infofn = self.prepare_filename(info_dict, 'infojson')
2261             if not self._ensure_dir_exists(encodeFilename(infofn)):
2262                 return
2263             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2264                 self.to_screen('[info] Video metadata is already present')
2265             else:
2266                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2267                 try:
2268                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2269                 except (OSError, IOError):
2270                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2271                     return
2272             info_dict['__infojson_filename'] = infofn
2273
2274         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2275             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2276             thumb_filename = replace_extension(
2277                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2278             files_to_move[thumb_filename_temp] = thumb_filename
2279
2280         # Write internet shortcut files
2281         url_link = webloc_link = desktop_link = False
2282         if self.params.get('writelink', False):
2283             if sys.platform == "darwin":  # macOS.
2284                 webloc_link = True
2285             elif sys.platform.startswith("linux"):
2286                 desktop_link = True
2287             else:  # if sys.platform in ['win32', 'cygwin']:
2288                 url_link = True
2289         if self.params.get('writeurllink', False):
2290             url_link = True
2291         if self.params.get('writewebloclink', False):
2292             webloc_link = True
2293         if self.params.get('writedesktoplink', False):
2294             desktop_link = True
2295
2296         if url_link or webloc_link or desktop_link:
2297             if 'webpage_url' not in info_dict:
2298                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2299                 return
2300             ascii_url = iri_to_uri(info_dict['webpage_url'])
2301
2302         def _write_link_file(extension, template, newline, embed_filename):
2303             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2304             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2305                 self.to_screen('[info] Internet shortcut is already present')
2306             else:
2307                 try:
2308                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2309                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2310                         template_vars = {'url': ascii_url}
2311                         if embed_filename:
2312                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2313                         linkfile.write(template % template_vars)
2314                 except (OSError, IOError):
2315                     self.report_error('Cannot write internet shortcut ' + linkfn)
2316                     return False
2317             return True
2318
2319         if url_link:
2320             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2321                 return
2322         if webloc_link:
2323             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2324                 return
2325         if desktop_link:
2326             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2327                 return
2328
2329         try:
2330             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2331         except PostProcessingError as err:
2332             self.report_error('Preprocessing: %s' % str(err))
2333             return
2334
2335         must_record_download_archive = False
2336         if self.params.get('skip_download', False):
2337             info_dict['filepath'] = temp_filename
2338             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2339             info_dict['__files_to_move'] = files_to_move
2340             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2341         else:
2342             # Download
2343             try:
2344
2345                 def existing_file(*filepaths):
2346                     ext = info_dict.get('ext')
2347                     final_ext = self.params.get('final_ext', ext)
2348                     existing_files = []
2349                     for file in orderedSet(filepaths):
2350                         if final_ext != ext:
2351                             converted = replace_extension(file, final_ext, ext)
2352                             if os.path.exists(encodeFilename(converted)):
2353                                 existing_files.append(converted)
2354                         if os.path.exists(encodeFilename(file)):
2355                             existing_files.append(file)
2356
2357                     if not existing_files or self.params.get('overwrites', False):
2358                         for file in orderedSet(existing_files):
2359                             self.report_file_delete(file)
2360                             os.remove(encodeFilename(file))
2361                         return None
2362
2363                     self.report_file_already_downloaded(existing_files[0])
2364                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2365                     return existing_files[0]
2366
2367                 success = True
2368                 if info_dict.get('requested_formats') is not None:
2369                     downloaded = []
2370                     merger = FFmpegMergerPP(self)
2371                     if self.params.get('allow_unplayable_formats'):
2372                         self.report_warning(
2373                             'You have requested merging of multiple formats '
2374                             'while also allowing unplayable formats to be downloaded. '
2375                             'The formats won\'t be merged to prevent data corruption.')
2376                     elif not merger.available:
2377                         self.report_warning(
2378                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2379                             'The formats won\'t be merged.')
2380
2381                     def compatible_formats(formats):
2382                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2383                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2384                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2385                         if len(video_formats) > 2 or len(audio_formats) > 2:
2386                             return False
2387
2388                         # Check extension
2389                         exts = set(format.get('ext') for format in formats)
2390                         COMPATIBLE_EXTS = (
2391                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2392                             set(('webm',)),
2393                         )
2394                         for ext_sets in COMPATIBLE_EXTS:
2395                             if ext_sets.issuperset(exts):
2396                                 return True
2397                         # TODO: Check acodec/vcodec
2398                         return False
2399
2400                     requested_formats = info_dict['requested_formats']
2401                     old_ext = info_dict['ext']
2402                     if self.params.get('merge_output_format') is None:
2403                         if not compatible_formats(requested_formats):
2404                             info_dict['ext'] = 'mkv'
2405                             self.report_warning(
2406                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2407                         if (info_dict['ext'] == 'webm'
2408                                 and self.params.get('writethumbnail', False)
2409                                 and info_dict.get('thumbnails')):
2410                             info_dict['ext'] = 'mkv'
2411                             self.report_warning(
2412                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2413
2414                     def correct_ext(filename):
2415                         filename_real_ext = os.path.splitext(filename)[1][1:]
2416                         filename_wo_ext = (
2417                             os.path.splitext(filename)[0]
2418                             if filename_real_ext == old_ext
2419                             else filename)
2420                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2421
2422                     # Ensure filename always has a correct extension for successful merge
2423                     full_filename = correct_ext(full_filename)
2424                     temp_filename = correct_ext(temp_filename)
2425                     dl_filename = existing_file(full_filename, temp_filename)
2426                     info_dict['__real_download'] = False
2427                     if dl_filename is None:
2428                         for f in requested_formats:
2429                             new_info = dict(info_dict)
2430                             new_info.update(f)
2431                             fname = prepend_extension(
2432                                 self.prepare_filename(new_info, 'temp'),
2433                                 'f%s' % f['format_id'], new_info['ext'])
2434                             if not self._ensure_dir_exists(fname):
2435                                 return
2436                             downloaded.append(fname)
2437                             partial_success, real_download = dl(fname, new_info)
2438                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2439                             success = success and partial_success
2440                         if merger.available and not self.params.get('allow_unplayable_formats'):
2441                             info_dict['__postprocessors'].append(merger)
2442                             info_dict['__files_to_merge'] = downloaded
2443                             # Even if there were no downloads, it is being merged only now
2444                             info_dict['__real_download'] = True
2445                         else:
2446                             for file in downloaded:
2447                                 files_to_move[file] = None
2448                 else:
2449                     # Just a single file
2450                     dl_filename = existing_file(full_filename, temp_filename)
2451                     if dl_filename is None:
2452                         success, real_download = dl(temp_filename, info_dict)
2453                         info_dict['__real_download'] = real_download
2454
2455                 dl_filename = dl_filename or temp_filename
2456                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2457
2458             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2459                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2460                 return
2461             except (OSError, IOError) as err:
2462                 raise UnavailableVideoError(err)
2463             except (ContentTooShortError, ) as err:
2464                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2465                 return
2466
2467             if success and full_filename != '-':
2468                 # Fixup content
2469                 fixup_policy = self.params.get('fixup')
2470                 if fixup_policy is None:
2471                     fixup_policy = 'detect_or_warn'
2472
2473                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2474
2475                 stretched_ratio = info_dict.get('stretched_ratio')
2476                 if stretched_ratio is not None and stretched_ratio != 1:
2477                     if fixup_policy == 'warn':
2478                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2479                             info_dict['id'], stretched_ratio))
2480                     elif fixup_policy == 'detect_or_warn':
2481                         stretched_pp = FFmpegFixupStretchedPP(self)
2482                         if stretched_pp.available:
2483                             info_dict['__postprocessors'].append(stretched_pp)
2484                         else:
2485                             self.report_warning(
2486                                 '%s: Non-uniform pixel ratio (%s). %s'
2487                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2488                     else:
2489                         assert fixup_policy in ('ignore', 'never')
2490
2491                 if (info_dict.get('requested_formats') is None
2492                         and info_dict.get('container') == 'm4a_dash'
2493                         and info_dict.get('ext') == 'm4a'):
2494                     if fixup_policy == 'warn':
2495                         self.report_warning(
2496                             '%s: writing DASH m4a. '
2497                             'Only some players support this container.'
2498                             % info_dict['id'])
2499                     elif fixup_policy == 'detect_or_warn':
2500                         fixup_pp = FFmpegFixupM4aPP(self)
2501                         if fixup_pp.available:
2502                             info_dict['__postprocessors'].append(fixup_pp)
2503                         else:
2504                             self.report_warning(
2505                                 '%s: writing DASH m4a. '
2506                                 'Only some players support this container. %s'
2507                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2508                     else:
2509                         assert fixup_policy in ('ignore', 'never')
2510
2511                 if ('protocol' in info_dict
2512                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2513                     if fixup_policy == 'warn':
2514                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2515                             info_dict['id']))
2516                     elif fixup_policy == 'detect_or_warn':
2517                         fixup_pp = FFmpegFixupM3u8PP(self)
2518                         if fixup_pp.available:
2519                             info_dict['__postprocessors'].append(fixup_pp)
2520                         else:
2521                             self.report_warning(
2522                                 '%s: malformed AAC bitstream detected. %s'
2523                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2524                     else:
2525                         assert fixup_policy in ('ignore', 'never')
2526
2527                 try:
2528                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2529                 except PostProcessingError as err:
2530                     self.report_error('Postprocessing: %s' % str(err))
2531                     return
2532                 try:
2533                     for ph in self._post_hooks:
2534                         ph(info_dict['filepath'])
2535                 except Exception as err:
2536                     self.report_error('post hooks: %s' % str(err))
2537                     return
2538                 must_record_download_archive = True
2539
2540         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2541             self.record_download_archive(info_dict)
2542         max_downloads = self.params.get('max_downloads')
2543         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2544             raise MaxDownloadsReached()
2545
2546     def download(self, url_list):
2547         """Download a given list of URLs."""
2548         outtmpl = self.outtmpl_dict['default']
2549         if (len(url_list) > 1
2550                 and outtmpl != '-'
2551                 and '%' not in outtmpl
2552                 and self.params.get('max_downloads') != 1):
2553             raise SameFileError(outtmpl)
2554
2555         for url in url_list:
2556             try:
2557                 # It also downloads the videos
2558                 res = self.extract_info(
2559                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2560             except UnavailableVideoError:
2561                 self.report_error('unable to download video')
2562             except MaxDownloadsReached:
2563                 self.to_screen('[info] Maximum number of downloaded files reached')
2564                 raise
2565             except ExistingVideoReached:
2566                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2567                 raise
2568             except RejectedVideoReached:
2569                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2570                 raise
2571             else:
2572                 if self.params.get('dump_single_json', False):
2573                     self.post_extract(res)
2574                     self.to_stdout(json.dumps(res, default=repr))
2575
2576         return self._download_retcode
2577
2578     def download_with_info_file(self, info_filename):
2579         with contextlib.closing(fileinput.FileInput(
2580                 [info_filename], mode='r',
2581                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2582             # FileInput doesn't have a read method, we can't call json.load
2583             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2584         try:
2585             self.process_ie_result(info, download=True)
2586         except (DownloadError, EntryNotInPlaylist):
2587             webpage_url = info.get('webpage_url')
2588             if webpage_url is not None:
2589                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2590                 return self.download([webpage_url])
2591             else:
2592                 raise
2593         return self._download_retcode
2594
2595     @staticmethod
2596     def filter_requested_info(info_dict, actually_filter=True):
2597         if not actually_filter:
2598             info_dict['epoch'] = int(time.time())
2599             return info_dict
2600         exceptions = {
2601             'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
2602             'keep': ['_type'],
2603         }
2604         keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2605         filter_fn = lambda obj: (
2606             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2607             else obj if not isinstance(obj, dict)
2608             else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
2609         return filter_fn(info_dict)
2610
2611     def run_pp(self, pp, infodict):
2612         files_to_delete = []
2613         if '__files_to_move' not in infodict:
2614             infodict['__files_to_move'] = {}
2615         files_to_delete, infodict = pp.run(infodict)
2616         if not files_to_delete:
2617             return infodict
2618
2619         if self.params.get('keepvideo', False):
2620             for f in files_to_delete:
2621                 infodict['__files_to_move'].setdefault(f, '')
2622         else:
2623             for old_filename in set(files_to_delete):
2624                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2625                 try:
2626                     os.remove(encodeFilename(old_filename))
2627                 except (IOError, OSError):
2628                     self.report_warning('Unable to remove downloaded original file')
2629                 if old_filename in infodict['__files_to_move']:
2630                     del infodict['__files_to_move'][old_filename]
2631         return infodict
2632
2633     @staticmethod
2634     def post_extract(info_dict):
2635         def actual_post_extract(info_dict):
2636             if info_dict.get('_type') in ('playlist', 'multi_video'):
2637                 for video_dict in info_dict.get('entries', {}):
2638                     actual_post_extract(video_dict or {})
2639                 return
2640
2641             if '__post_extractor' not in info_dict:
2642                 return
2643             post_extractor = info_dict['__post_extractor']
2644             if post_extractor:
2645                 info_dict.update(post_extractor().items())
2646             del info_dict['__post_extractor']
2647             return
2648
2649         actual_post_extract(info_dict or {})
2650
2651     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2652         info = dict(ie_info)
2653         info['__files_to_move'] = files_to_move or {}
2654         for pp in self._pps[key]:
2655             info = self.run_pp(pp, info)
2656         return info, info.pop('__files_to_move', None)
2657
2658     def post_process(self, filename, ie_info, files_to_move=None):
2659         """Run all the postprocessors on the given file."""
2660         info = dict(ie_info)
2661         info['filepath'] = filename
2662         info['__files_to_move'] = files_to_move or {}
2663
2664         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2665             info = self.run_pp(pp, info)
2666         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2667         del info['__files_to_move']
2668         for pp in self._pps['after_move']:
2669             info = self.run_pp(pp, info)
2670         return info
2671
2672     def _make_archive_id(self, info_dict):
2673         video_id = info_dict.get('id')
2674         if not video_id:
2675             return
2676         # Future-proof against any change in case
2677         # and backwards compatibility with prior versions
2678         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2679         if extractor is None:
2680             url = str_or_none(info_dict.get('url'))
2681             if not url:
2682                 return
2683             # Try to find matching extractor for the URL and take its ie_key
2684             for ie in self._ies:
2685                 if ie.suitable(url):
2686                     extractor = ie.ie_key()
2687                     break
2688             else:
2689                 return
2690         return '%s %s' % (extractor.lower(), video_id)
2691
2692     def in_download_archive(self, info_dict):
2693         fn = self.params.get('download_archive')
2694         if fn is None:
2695             return False
2696
2697         vid_id = self._make_archive_id(info_dict)
2698         if not vid_id:
2699             return False  # Incomplete video information
2700
2701         return vid_id in self.archive
2702
2703     def record_download_archive(self, info_dict):
2704         fn = self.params.get('download_archive')
2705         if fn is None:
2706             return
2707         vid_id = self._make_archive_id(info_dict)
2708         assert vid_id
2709         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2710             archive_file.write(vid_id + '\n')
2711         self.archive.add(vid_id)
2712
2713     @staticmethod
2714     def format_resolution(format, default='unknown'):
2715         if format.get('vcodec') == 'none':
2716             return 'audio only'
2717         if format.get('resolution') is not None:
2718             return format['resolution']
2719         if format.get('width') and format.get('height'):
2720             res = '%dx%d' % (format['width'], format['height'])
2721         elif format.get('height'):
2722             res = '%sp' % format['height']
2723         elif format.get('width'):
2724             res = '%dx?' % format['width']
2725         else:
2726             res = default
2727         return res
2728
2729     def _format_note(self, fdict):
2730         res = ''
2731         if fdict.get('ext') in ['f4f', 'f4m']:
2732             res += '(unsupported) '
2733         if fdict.get('language'):
2734             if res:
2735                 res += ' '
2736             res += '[%s] ' % fdict['language']
2737         if fdict.get('format_note') is not None:
2738             res += fdict['format_note'] + ' '
2739         if fdict.get('tbr') is not None:
2740             res += '%4dk ' % fdict['tbr']
2741         if fdict.get('container') is not None:
2742             if res:
2743                 res += ', '
2744             res += '%s container' % fdict['container']
2745         if (fdict.get('vcodec') is not None
2746                 and fdict.get('vcodec') != 'none'):
2747             if res:
2748                 res += ', '
2749             res += fdict['vcodec']
2750             if fdict.get('vbr') is not None:
2751                 res += '@'
2752         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2753             res += 'video@'
2754         if fdict.get('vbr') is not None:
2755             res += '%4dk' % fdict['vbr']
2756         if fdict.get('fps') is not None:
2757             if res:
2758                 res += ', '
2759             res += '%sfps' % fdict['fps']
2760         if fdict.get('acodec') is not None:
2761             if res:
2762                 res += ', '
2763             if fdict['acodec'] == 'none':
2764                 res += 'video only'
2765             else:
2766                 res += '%-5s' % fdict['acodec']
2767         elif fdict.get('abr') is not None:
2768             if res:
2769                 res += ', '
2770             res += 'audio'
2771         if fdict.get('abr') is not None:
2772             res += '@%3dk' % fdict['abr']
2773         if fdict.get('asr') is not None:
2774             res += ' (%5dHz)' % fdict['asr']
2775         if fdict.get('filesize') is not None:
2776             if res:
2777                 res += ', '
2778             res += format_bytes(fdict['filesize'])
2779         elif fdict.get('filesize_approx') is not None:
2780             if res:
2781                 res += ', '
2782             res += '~' + format_bytes(fdict['filesize_approx'])
2783         return res
2784
2785     def _format_note_table(self, f):
2786         def join_fields(*vargs):
2787             return ', '.join((val for val in vargs if val != ''))
2788
2789         return join_fields(
2790             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2791             format_field(f, 'language', '[%s]'),
2792             format_field(f, 'format_note'),
2793             format_field(f, 'container', ignore=(None, f.get('ext'))),
2794             format_field(f, 'asr', '%5dHz'))
2795
2796     def list_formats(self, info_dict):
2797         formats = info_dict.get('formats', [info_dict])
2798         new_format = self.params.get('listformats_table', False)
2799         if new_format:
2800             table = [
2801                 [
2802                     format_field(f, 'format_id'),
2803                     format_field(f, 'ext'),
2804                     self.format_resolution(f),
2805                     format_field(f, 'fps', '%d'),
2806                     '|',
2807                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2808                     format_field(f, 'tbr', '%4dk'),
2809                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
2810                     '|',
2811                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2812                     format_field(f, 'vbr', '%4dk'),
2813                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2814                     format_field(f, 'abr', '%3dk'),
2815                     format_field(f, 'asr', '%5dHz'),
2816                     self._format_note_table(f)]
2817                 for f in formats
2818                 if f.get('preference') is None or f['preference'] >= -1000]
2819             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2820                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2821         else:
2822             table = [
2823                 [
2824                     format_field(f, 'format_id'),
2825                     format_field(f, 'ext'),
2826                     self.format_resolution(f),
2827                     self._format_note(f)]
2828                 for f in formats
2829                 if f.get('preference') is None or f['preference'] >= -1000]
2830             header_line = ['format code', 'extension', 'resolution', 'note']
2831
2832         self.to_screen(
2833             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2834                 header_line,
2835                 table,
2836                 delim=new_format,
2837                 extraGap=(0 if new_format else 1),
2838                 hideEmpty=new_format)))
2839
2840     def list_thumbnails(self, info_dict):
2841         thumbnails = info_dict.get('thumbnails')
2842         if not thumbnails:
2843             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2844             return
2845
2846         self.to_screen(
2847             '[info] Thumbnails for %s:' % info_dict['id'])
2848         self.to_screen(render_table(
2849             ['ID', 'width', 'height', 'URL'],
2850             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2851
2852     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2853         if not subtitles:
2854             self.to_screen('%s has no %s' % (video_id, name))
2855             return
2856         self.to_screen(
2857             'Available %s for %s:' % (name, video_id))
2858         self.to_screen(render_table(
2859             ['Language', 'formats'],
2860             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2861                 for lang, formats in subtitles.items()]))
2862
2863     def urlopen(self, req):
2864         """ Start an HTTP download """
2865         if isinstance(req, compat_basestring):
2866             req = sanitized_Request(req)
2867         return self._opener.open(req, timeout=self._socket_timeout)
2868
2869     def print_debug_header(self):
2870         if not self.params.get('verbose'):
2871             return
2872
2873         if type('') is not compat_str:
2874             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2875             self.report_warning(
2876                 'Your Python is broken! Update to a newer and supported version')
2877
2878         stdout_encoding = getattr(
2879             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2880         encoding_str = (
2881             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2882                 locale.getpreferredencoding(),
2883                 sys.getfilesystemencoding(),
2884                 stdout_encoding,
2885                 self.get_encoding()))
2886         write_string(encoding_str, encoding=None)
2887
2888         source = (
2889             '(exe)' if hasattr(sys, 'frozen')
2890             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2891             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2892             else '')
2893         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
2894         if _LAZY_LOADER:
2895             self._write_string('[debug] Lazy loading extractors enabled\n')
2896         if _PLUGIN_CLASSES:
2897             self._write_string(
2898                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2899         try:
2900             sp = subprocess.Popen(
2901                 ['git', 'rev-parse', '--short', 'HEAD'],
2902                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2903                 cwd=os.path.dirname(os.path.abspath(__file__)))
2904             out, err = process_communicate_or_kill(sp)
2905             out = out.decode().strip()
2906             if re.match('[0-9a-f]+', out):
2907                 self._write_string('[debug] Git HEAD: %s\n' % out)
2908         except Exception:
2909             try:
2910                 sys.exc_clear()
2911             except Exception:
2912                 pass
2913
2914         def python_implementation():
2915             impl_name = platform.python_implementation()
2916             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2917                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2918             return impl_name
2919
2920         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2921             platform.python_version(),
2922             python_implementation(),
2923             platform.architecture()[0],
2924             platform_name()))
2925
2926         exe_versions = FFmpegPostProcessor.get_versions(self)
2927         exe_versions['rtmpdump'] = rtmpdump_version()
2928         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2929         exe_str = ', '.join(
2930             '%s %s' % (exe, v)
2931             for exe, v in sorted(exe_versions.items())
2932             if v
2933         )
2934         if not exe_str:
2935             exe_str = 'none'
2936         self._write_string('[debug] exe versions: %s\n' % exe_str)
2937
2938         proxy_map = {}
2939         for handler in self._opener.handlers:
2940             if hasattr(handler, 'proxies'):
2941                 proxy_map.update(handler.proxies)
2942         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2943
2944         if self.params.get('call_home', False):
2945             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2946             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2947             return
2948             latest_version = self.urlopen(
2949                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2950             if version_tuple(latest_version) > version_tuple(__version__):
2951                 self.report_warning(
2952                     'You are using an outdated version (newest version: %s)! '
2953                     'See https://yt-dl.org/update if you need help updating.' %
2954                     latest_version)
2955
2956     def _setup_opener(self):
2957         timeout_val = self.params.get('socket_timeout')
2958         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2959
2960         opts_cookiefile = self.params.get('cookiefile')
2961         opts_proxy = self.params.get('proxy')
2962
2963         if opts_cookiefile is None:
2964             self.cookiejar = compat_cookiejar.CookieJar()
2965         else:
2966             opts_cookiefile = expand_path(opts_cookiefile)
2967             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2968             if os.access(opts_cookiefile, os.R_OK):
2969                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2970
2971         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2972         if opts_proxy is not None:
2973             if opts_proxy == '':
2974                 proxies = {}
2975             else:
2976                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2977         else:
2978             proxies = compat_urllib_request.getproxies()
2979             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2980             if 'http' in proxies and 'https' not in proxies:
2981                 proxies['https'] = proxies['http']
2982         proxy_handler = PerRequestProxyHandler(proxies)
2983
2984         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2985         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2986         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2987         redirect_handler = YoutubeDLRedirectHandler()
2988         data_handler = compat_urllib_request_DataHandler()
2989
2990         # When passing our own FileHandler instance, build_opener won't add the
2991         # default FileHandler and allows us to disable the file protocol, which
2992         # can be used for malicious purposes (see
2993         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2994         file_handler = compat_urllib_request.FileHandler()
2995
2996         def file_open(*args, **kwargs):
2997             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
2998         file_handler.file_open = file_open
2999
3000         opener = compat_urllib_request.build_opener(
3001             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3002
3003         # Delete the default user-agent header, which would otherwise apply in
3004         # cases where our custom HTTP handler doesn't come into play
3005         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3006         opener.addheaders = []
3007         self._opener = opener
3008
3009     def encode(self, s):
3010         if isinstance(s, bytes):
3011             return s  # Already encoded
3012
3013         try:
3014             return s.encode(self.get_encoding())
3015         except UnicodeEncodeError as err:
3016             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3017             raise
3018
3019     def get_encoding(self):
3020         encoding = self.params.get('encoding')
3021         if encoding is None:
3022             encoding = preferredencoding()
3023         return encoding
3024
3025     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3026         write_all = self.params.get('write_all_thumbnails', False)
3027         thumbnails = []
3028         if write_all or self.params.get('writethumbnail', False):
3029             thumbnails = info_dict.get('thumbnails') or []
3030         multiple = write_all and len(thumbnails) > 1
3031
3032         ret = []
3033         for t in thumbnails[::1 if write_all else -1]:
3034             thumb_ext = determine_ext(t['url'], 'jpg')
3035             suffix = '%s.' % t['id'] if multiple else ''
3036             thumb_display_id = '%s ' % t['id'] if multiple else ''
3037             t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3038
3039             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3040                 ret.append(suffix + thumb_ext)
3041                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3042                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3043             else:
3044                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3045                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3046                 try:
3047                     uf = self.urlopen(t['url'])
3048                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3049                         shutil.copyfileobj(uf, thumbf)
3050                     ret.append(suffix + thumb_ext)
3051                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3052                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3053                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
3054                     self.report_warning('Unable to download thumbnail "%s": %s' %
3055                                         (t['url'], error_to_compat_str(err)))
3056             if ret and not write_all:
3057                 break
3058         return ret