yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_http_client,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     OUTTMPL_TYPES,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     error_to_compat_str,
  63     EntryNotInPlaylist,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     FORMAT_RE,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     int_or_none,
  74     iri_to_uri,
  75     ISO3166Utils,
  76     locked_file,
  77     make_dir,
  78     make_HTTPS_handler,
  79     MaxDownloadsReached,
  80     orderedSet,
  81     PagedList,
  82     parse_filesize,
  83     PerRequestProxyHandler,
  84     platform_name,
  85     PostProcessingError,
  86     preferredencoding,
  87     prepend_extension,
  88     register_socks_protocols,
  89     render_table,
  90     replace_extension,
  91     RejectedVideoReached,
  92     SameFileError,
  93     sanitize_filename,
  94     sanitize_path,
  95     sanitize_url,
  96     sanitized_Request,
  97     std_headers,
  98     str_or_none,
  99     strftime_or_none,
 100     subtitles_filename,
 101     to_high_limit_path,
 102     UnavailableVideoError,
 103     url_basename,
 104     version_tuple,
 105     write_json_file,
 106     write_string,
 107     YoutubeDLCookieJar,
 108     YoutubeDLCookieProcessor,
 109     YoutubeDLHandler,
 110     YoutubeDLRedirectHandler,
 111     process_communicate_or_kill,
 112 )
 113 from .cache import Cache
 114 from .extractor import (
 115     gen_extractor_classes,
 116     get_info_extractor,
 117     _LAZY_LOADER,
 118     _PLUGIN_CLASSES
 119 )
 120 from .extractor.openload import PhantomJSwrapper
 121 from .downloader import (
 122     get_suitable_downloader,
 123     shorten_protocol_name
 124 )
 125 from .downloader.rtmp import rtmpdump_version
 126 from .postprocessor import (
 127     FFmpegFixupM3u8PP,
 128     FFmpegFixupM4aPP,
 129     FFmpegFixupStretchedPP,
 130     FFmpegMergerPP,
 131     FFmpegPostProcessor,
 132     # FFmpegSubtitlesConvertorPP,
 133     get_postprocessor,
 134     MoveFilesAfterDownloadPP,
 135 )
 136 from .version import __version__
 137
 138 if compat_os_name == 'nt':
 139     import ctypes
 140
 141
 142 class YoutubeDL(object):
 143     """YoutubeDL class.
 144
 145     YoutubeDL objects are the ones responsible of downloading the
 146     actual video file and writing it to disk if the user has requested
 147     it, among some other tasks. In most cases there should be one per
 148     program. As, given a video URL, the downloader doesn't know how to
 149     extract all the needed information, task that InfoExtractors do, it
 150     has to pass the URL to one of them.
 151
 152     For this, YoutubeDL objects have a method that allows
 153     InfoExtractors to be registered in a given order. When it is passed
 154     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 155     finds that reports being able to handle it. The InfoExtractor extracts
 156     all the information about the video or videos the URL refers to, and
 157     YoutubeDL process the extracted information, possibly using a File
 158     Downloader to download the video.
 159
 160     YoutubeDL objects accept a lot of parameters. In order not to saturate
 161     the object constructor with arguments, it receives a dictionary of
 162     options instead. These options are available through the params
 163     attribute for the InfoExtractors to use. The YoutubeDL also
 164     registers itself as the downloader in charge for the InfoExtractors
 165     that are added to it, so this is a "mutual registration".
 166
 167     Available options:
 168
 169     username:          Username for authentication purposes.
 170     password:          Password for authentication purposes.
 171     videopassword:     Password for accessing a video.
 172     ap_mso:            Adobe Pass multiple-system operator identifier.
 173     ap_username:       Multiple-system operator account username.
 174     ap_password:       Multiple-system operator account password.
 175     usenetrc:          Use netrc for authentication instead.
 176     verbose:           Print additional info to stdout.
 177     quiet:             Do not print messages to stdout.
 178     no_warnings:       Do not print out anything for warnings.
 179     forceurl:          Force printing final URL.
 180     forcetitle:        Force printing title.
 181     forceid:           Force printing ID.
 182     forcethumbnail:    Force printing thumbnail URL.
 183     forcedescription:  Force printing description.
 184     forcefilename:     Force printing final filename.
 185     forceduration:     Force printing duration.
 186     forcejson:         Force printing info_dict as JSON.
 187     dump_single_json:  Force printing the info_dict of the whole playlist
 188                        (or video) as a single JSON line.
 189     force_write_download_archive: Force writing download archive regardless
 190                        of 'skip_download' or 'simulate'.
 191     simulate:          Do not download the video files.
 192     format:            Video format code. see "FORMAT SELECTION" for more details.
 193     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 194     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 195                        extracting metadata even if the video is not actually
 196                        available for download (experimental)
 197     format_sort:       How to sort the video formats. see "Sorting Formats"
 198                        for more details.
 199     format_sort_force: Force the given format_sort. see "Sorting Formats"
 200                        for more details.
 201     allow_multiple_video_streams:   Allow multiple video streams to be merged
 202                        into a single file
 203     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 204                        into a single file
 205     paths:             Dictionary of output paths. The allowed keys are 'home'
 206                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 207     outtmpl:           Dictionary of templates for output names. Allowed keys
 208                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 209                        A string a also accepted for backward compatibility
 210     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 211     restrictfilenames: Do not allow "&" and spaces in file names
 212     trim_file_name:    Limit length of filename (extension excluded)
 213     windowsfilenames:  Force the filenames to be windows compatible
 214     ignoreerrors:      Do not stop on download errors
 215                        (Default True when running yt-dlp,
 216                        but False when directly accessing YoutubeDL class)
 217     force_generic_extractor: Force downloader to use the generic extractor
 218     overwrites:        Overwrite all video and metadata files if True,
 219                        overwrite only non-video files if None
 220                        and don't overwrite any file if False
 221     playliststart:     Playlist item to start at.
 222     playlistend:       Playlist item to end at.
 223     playlist_items:    Specific indices of playlist to download.
 224     playlistreverse:   Download playlist items in reverse order.
 225     playlistrandom:    Download playlist items in random order.
 226     matchtitle:        Download only matching titles.
 227     rejecttitle:       Reject downloads for matching titles.
 228     logger:            Log messages to a logging.Logger instance.
 229     logtostderr:       Log messages to stderr instead of stdout.
 230     writedescription:  Write the video description to a .description file
 231     writeinfojson:     Write the video description to a .info.json file
 232     clean_infojson:    Remove private fields from the infojson
 233     writecomments:     Extract video comments. This will not be written to disk
 234                        unless writeinfojson is also given
 235     writeannotations:  Write the video annotations to a .annotations.xml file
 236     writethumbnail:    Write the thumbnail image to a file
 237     allow_playlist_files: Whether to write playlists' description, infojson etc
 238                        also to disk when using the 'write*' options
 239     write_all_thumbnails:  Write all thumbnail formats to files
 240     writelink:         Write an internet shortcut file, depending on the
 241                        current platform (.url/.webloc/.desktop)
 242     writeurllink:      Write a Windows internet shortcut file (.url)
 243     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 244     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 245     writesubtitles:    Write the video subtitles to a file
 246     writeautomaticsub: Write the automatically generated subtitles to a file
 247     allsubtitles:      Downloads all the subtitles of the video
 248                        (requires writesubtitles or writeautomaticsub)
 249     listsubtitles:     Lists all available subtitles for the video
 250     subtitlesformat:   The format code for subtitles
 251     subtitleslangs:    List of languages of the subtitles to download
 252     keepvideo:         Keep the video file after post-processing
 253     daterange:         A DateRange object, download only if the upload_date is in the range.
 254     skip_download:     Skip the actual download of the video file
 255     cachedir:          Location of the cache files in the filesystem.
 256                        False to disable filesystem cache.
 257     noplaylist:        Download single video instead of a playlist if in doubt.
 258     age_limit:         An integer representing the user's age in years.
 259                        Unsuitable videos for the given age are skipped.
 260     min_views:         An integer representing the minimum view count the video
 261                        must have in order to not be skipped.
 262                        Videos without view count information are always
 263                        downloaded. None for no limit.
 264     max_views:         An integer representing the maximum view count.
 265                        Videos that are more popular than that are not
 266                        downloaded.
 267                        Videos without view count information are always
 268                        downloaded. None for no limit.
 269     download_archive:  File name of a file where all downloads are recorded.
 270                        Videos already present in the file are not downloaded
 271                        again.
 272     break_on_existing: Stop the download process after attempting to download a
 273                        file that is in the archive.
 274     break_on_reject:   Stop the download process when encountering a video that
 275                        has been filtered out.
 276     cookiefile:        File name where cookies should be read from and dumped to
 277     nocheckcertificate:Do not verify SSL certificates
 278     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 279                        At the moment, this is only supported by YouTube.
 280     proxy:             URL of the proxy server to use
 281     geo_verification_proxy:  URL of the proxy to use for IP address verification
 282                        on geo-restricted sites.
 283     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 284     bidi_workaround:   Work around buggy terminals without bidirectional text
 285                        support, using fridibi
 286     debug_printtraffic:Print out sent and received HTTP traffic
 287     include_ads:       Download ads as well
 288     default_search:    Prepend this string if an input url is not valid.
 289                        'auto' for elaborate guessing
 290     encoding:          Use this encoding instead of the system-specified.
 291     extract_flat:      Do not resolve URLs, return the immediate result.
 292                        Pass in 'in_playlist' to only show this behavior for
 293                        playlist items.
 294     postprocessors:    A list of dictionaries, each with an entry
 295                        * key:  The name of the postprocessor. See
 296                                yt_dlp/postprocessor/__init__.py for a list.
 297                        * when: When to run the postprocessor. Can be one of
 298                                pre_process|before_dl|post_process|after_move.
 299                                Assumed to be 'post_process' if not given
 300     post_hooks:        A list of functions that get called as the final step
 301                        for each video file, after all postprocessors have been
 302                        called. The filename will be passed as the only argument.
 303     progress_hooks:    A list of functions that get called on download
 304                        progress, with a dictionary with the entries
 305                        * status: One of "downloading", "error", or "finished".
 306                                  Check this first and ignore unknown values.
 307
 308                        If status is one of "downloading", or "finished", the
 309                        following properties may also be present:
 310                        * filename: The final filename (always present)
 311                        * tmpfilename: The filename we're currently writing to
 312                        * downloaded_bytes: Bytes on disk
 313                        * total_bytes: Size of the whole file, None if unknown
 314                        * total_bytes_estimate: Guess of the eventual file size,
 315                                                None if unavailable.
 316                        * elapsed: The number of seconds since download started.
 317                        * eta: The estimated time in seconds, None if unknown
 318                        * speed: The download speed in bytes/second, None if
 319                                 unknown
 320                        * fragment_index: The counter of the currently
 321                                          downloaded video fragment.
 322                        * fragment_count: The number of fragments (= individual
 323                                          files that will be merged)
 324
 325                        Progress hooks are guaranteed to be called at least once
 326                        (with status "finished") if the download is successful.
 327     merge_output_format: Extension to use when merging formats.
 328     final_ext:         Expected final extension; used to detect when the file was
 329                        already downloaded and converted. "merge_output_format" is
 330                        replaced by this extension when given
 331     fixup:             Automatically correct known faults of the file.
 332                        One of:
 333                        - "never": do nothing
 334                        - "warn": only emit a warning
 335                        - "detect_or_warn": check whether we can do anything
 336                                            about it, warn otherwise (default)
 337     source_address:    Client-side IP address to bind to.
 338     call_home:         Boolean, true iff we are allowed to contact the
 339                        yt-dlp servers for debugging. (BROKEN)
 340     sleep_interval_requests: Number of seconds to sleep between requests
 341                        during extraction
 342     sleep_interval:    Number of seconds to sleep before each download when
 343                        used alone or a lower bound of a range for randomized
 344                        sleep before each download (minimum possible number
 345                        of seconds to sleep) when used along with
 346                        max_sleep_interval.
 347     max_sleep_interval:Upper bound of a range for randomized sleep before each
 348                        download (maximum possible number of seconds to sleep).
 349                        Must only be used along with sleep_interval.
 350                        Actual sleep time will be a random float from range
 351                        [sleep_interval; max_sleep_interval].
 352     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 353     listformats:       Print an overview of available video formats and exit.
 354     list_thumbnails:   Print a table of all thumbnails and exit.
 355     match_filter:      A function that gets called with the info_dict of
 356                        every video.
 357                        If it returns a message, the video is ignored.
 358                        If it returns None, the video is downloaded.
 359                        match_filter_func in utils.py is one example for this.
 360     no_color:          Do not emit color codes in output.
 361     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 362                        HTTP header
 363     geo_bypass_country:
 364                        Two-letter ISO 3166-2 country code that will be used for
 365                        explicit geographic restriction bypassing via faking
 366                        X-Forwarded-For HTTP header
 367     geo_bypass_ip_block:
 368                        IP range in CIDR notation that will be used similarly to
 369                        geo_bypass_country
 370
 371     The following options determine which downloader is picked:
 372     external_downloader: A dictionary of protocol keys and the executable of the
 373                        external downloader to use for it. The allowed protocols
 374                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 375                        Set the value to 'native' to use the native downloader
 376     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 377                        or {'m3u8': 'ffmpeg'} instead.
 378                        Use the native HLS downloader instead of ffmpeg/avconv
 379                        if True, otherwise use ffmpeg/avconv if False, otherwise
 380                        use downloader suggested by extractor if None.
 381
 382     The following parameters are not used by YoutubeDL itself, they are used by
 383     the downloader (see yt_dlp/downloader/common.py):
 384     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 385     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 386     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 387     http_chunk_size.
 388
 389     The following options are used by the post processors:
 390     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 391                        otherwise prefer ffmpeg. (avconv support is deprecated)
 392     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 393                        to the binary or its containing directory.
 394     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 395                         and a list of additional command-line arguments for the
 396                         postprocessor/executable. The dict can also have "PP+EXE" keys
 397                         which are used when the given exe is used by the given PP.
 398                         Use 'default' as the name for arguments to passed to all PP
 399
 400     The following options are used by the extractors:
 401     extractor_retries: Number of times to retry for known errors
 402     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 403     hls_split_discontinuity: Split HLS playlists to different formats at
 404                        discontinuities such as ad breaks (default: False)
 405     youtube_include_dash_manifest: If True (default), DASH manifests and related
 406                        data will be downloaded and processed by extractor.
 407                        You can reduce network I/O by disabling it if you don't
 408                        care about DASH. (only for youtube)
 409     youtube_include_hls_manifest: If True (default), HLS manifests and related
 410                        data will be downloaded and processed by extractor.
 411                        You can reduce network I/O by disabling it if you don't
 412                        care about HLS. (only for youtube)
 413     """
 414
 415     _NUMERIC_FIELDS = set((
 416         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 417         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 418         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 419         'average_rating', 'comment_count', 'age_limit',
 420         'start_time', 'end_time',
 421         'chapter_number', 'season_number', 'episode_number',
 422         'track_number', 'disc_number', 'release_year',
 423         'playlist_index',
 424     ))
 425
 426     params = None
 427     _ies = []
 428     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 429     __prepare_filename_warned = False
 430     _first_webpage_request = True
 431     _download_retcode = None
 432     _num_downloads = None
 433     _playlist_level = 0
 434     _playlist_urls = set()
 435     _screen_file = None
 436
 437     def __init__(self, params=None, auto_init=True):
 438         """Create a FileDownloader object with the given options."""
 439         if params is None:
 440             params = {}
 441         self._ies = []
 442         self._ies_instances = {}
 443         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 444         self.__prepare_filename_warned = False
 445         self._first_webpage_request = True
 446         self._post_hooks = []
 447         self._progress_hooks = []
 448         self._download_retcode = 0
 449         self._num_downloads = 0
 450         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 451         self._err_file = sys.stderr
 452         self.params = {
 453             # Default parameters
 454             'nocheckcertificate': False,
 455         }
 456         self.params.update(params)
 457         self.cache = Cache(self)
 458         self.archive = set()
 459
 460         """Preload the archive, if any is specified"""
 461         def preload_download_archive(self):
 462             fn = self.params.get('download_archive')
 463             if fn is None:
 464                 return False
 465             try:
 466                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 467                     for line in archive_file:
 468                         self.archive.add(line.strip())
 469             except IOError as ioe:
 470                 if ioe.errno != errno.ENOENT:
 471                     raise
 472                 return False
 473             return True
 474
 475         def check_deprecated(param, option, suggestion):
 476             if self.params.get(param) is not None:
 477                 self.report_warning(
 478                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 479                 return True
 480             return False
 481
 482         if self.params.get('verbose'):
 483             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 484
 485         preload_download_archive(self)
 486
 487         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 488             if self.params.get('geo_verification_proxy') is None:
 489                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 490
 491         if self.params.get('final_ext'):
 492             if self.params.get('merge_output_format'):
 493                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 494             self.params['merge_output_format'] = self.params['final_ext']
 495
 496         if 'overwrites' in self.params and self.params['overwrites'] is None:
 497             del self.params['overwrites']
 498
 499         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 500         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 501         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 502
 503         if params.get('bidi_workaround', False):
 504             try:
 505                 import pty
 506                 master, slave = pty.openpty()
 507                 width = compat_get_terminal_size().columns
 508                 if width is None:
 509                     width_args = []
 510                 else:
 511                     width_args = ['-w', str(width)]
 512                 sp_kwargs = dict(
 513                     stdin=subprocess.PIPE,
 514                     stdout=slave,
 515                     stderr=self._err_file)
 516                 try:
 517                     self._output_process = subprocess.Popen(
 518                         ['bidiv'] + width_args, **sp_kwargs
 519                     )
 520                 except OSError:
 521                     self._output_process = subprocess.Popen(
 522                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 523                 self._output_channel = os.fdopen(master, 'rb')
 524             except OSError as ose:
 525                 if ose.errno == errno.ENOENT:
 526                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 527                 else:
 528                     raise
 529
 530         if (sys.platform != 'win32'
 531                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 532                 and not params.get('restrictfilenames', False)):
 533             # Unicode filesystem API will throw errors (#1474, #13027)
 534             self.report_warning(
 535                 'Assuming --restrict-filenames since file system encoding '
 536                 'cannot encode all characters. '
 537                 'Set the LC_ALL environment variable to fix this.')
 538             self.params['restrictfilenames'] = True
 539
 540         self.outtmpl_dict = self.parse_outtmpl()
 541
 542         self._setup_opener()
 543
 544         if auto_init:
 545             self.print_debug_header()
 546             self.add_default_info_extractors()
 547
 548         for pp_def_raw in self.params.get('postprocessors', []):
 549             pp_class = get_postprocessor(pp_def_raw['key'])
 550             pp_def = dict(pp_def_raw)
 551             del pp_def['key']
 552             if 'when' in pp_def:
 553                 when = pp_def['when']
 554                 del pp_def['when']
 555             else:
 556                 when = 'post_process'
 557             pp = pp_class(self, **compat_kwargs(pp_def))
 558             self.add_post_processor(pp, when=when)
 559
 560         for ph in self.params.get('post_hooks', []):
 561             self.add_post_hook(ph)
 562
 563         for ph in self.params.get('progress_hooks', []):
 564             self.add_progress_hook(ph)
 565
 566         register_socks_protocols()
 567
 568     def warn_if_short_id(self, argv):
 569         # short YouTube ID starting with dash?
 570         idxs = [
 571             i for i, a in enumerate(argv)
 572             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 573         if idxs:
 574             correct_argv = (
 575                 ['yt-dlp']
 576                 + [a for i, a in enumerate(argv) if i not in idxs]
 577                 + ['--'] + [argv[i] for i in idxs]
 578             )
 579             self.report_warning(
 580                 'Long argument string detected. '
 581                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 582                 args_to_str(correct_argv))
 583
 584     def add_info_extractor(self, ie):
 585         """Add an InfoExtractor object to the end of the list."""
 586         self._ies.append(ie)
 587         if not isinstance(ie, type):
 588             self._ies_instances[ie.ie_key()] = ie
 589             ie.set_downloader(self)
 590
 591     def get_info_extractor(self, ie_key):
 592         """
 593         Get an instance of an IE with name ie_key, it will try to get one from
 594         the _ies list, if there's no instance it will create a new one and add
 595         it to the extractor list.
 596         """
 597         ie = self._ies_instances.get(ie_key)
 598         if ie is None:
 599             ie = get_info_extractor(ie_key)()
 600             self.add_info_extractor(ie)
 601         return ie
 602
 603     def add_default_info_extractors(self):
 604         """
 605         Add the InfoExtractors returned by gen_extractors to the end of the list
 606         """
 607         for ie in gen_extractor_classes():
 608             self.add_info_extractor(ie)
 609
 610     def add_post_processor(self, pp, when='post_process'):
 611         """Add a PostProcessor object to the end of the chain."""
 612         self._pps[when].append(pp)
 613         pp.set_downloader(self)
 614
 615     def add_post_hook(self, ph):
 616         """Add the post hook"""
 617         self._post_hooks.append(ph)
 618
 619     def add_progress_hook(self, ph):
 620         """Add the progress hook (currently only for the file downloader)"""
 621         self._progress_hooks.append(ph)
 622
 623     def _bidi_workaround(self, message):
 624         if not hasattr(self, '_output_channel'):
 625             return message
 626
 627         assert hasattr(self, '_output_process')
 628         assert isinstance(message, compat_str)
 629         line_count = message.count('\n') + 1
 630         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 631         self._output_process.stdin.flush()
 632         res = ''.join(self._output_channel.readline().decode('utf-8')
 633                       for _ in range(line_count))
 634         return res[:-len('\n')]
 635
 636     def to_screen(self, message, skip_eol=False):
 637         """Print message to stdout if not in quiet mode."""
 638         return self.to_stdout(message, skip_eol, check_quiet=True)
 639
 640     def _write_string(self, s, out=None):
 641         write_string(s, out=out, encoding=self.params.get('encoding'))
 642
 643     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 644         """Print message to stdout if not in quiet mode."""
 645         if self.params.get('logger'):
 646             self.params['logger'].debug(message)
 647         elif not check_quiet or not self.params.get('quiet', False):
 648             message = self._bidi_workaround(message)
 649             terminator = ['\n', ''][skip_eol]
 650             output = message + terminator
 651
 652             self._write_string(output, self._screen_file)
 653
 654     def to_stderr(self, message):
 655         """Print message to stderr."""
 656         assert isinstance(message, compat_str)
 657         if self.params.get('logger'):
 658             self.params['logger'].error(message)
 659         else:
 660             message = self._bidi_workaround(message)
 661             output = message + '\n'
 662             self._write_string(output, self._err_file)
 663
 664     def to_console_title(self, message):
 665         if not self.params.get('consoletitle', False):
 666             return
 667         if compat_os_name == 'nt':
 668             if ctypes.windll.kernel32.GetConsoleWindow():
 669                 # c_wchar_p() might not be necessary if `message` is
 670                 # already of type unicode()
 671                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 672         elif 'TERM' in os.environ:
 673             self._write_string('\033]0;%s\007' % message, self._screen_file)
 674
 675     def save_console_title(self):
 676         if not self.params.get('consoletitle', False):
 677             return
 678         if self.params.get('simulate', False):
 679             return
 680         if compat_os_name != 'nt' and 'TERM' in os.environ:
 681             # Save the title on stack
 682             self._write_string('\033[22;0t', self._screen_file)
 683
 684     def restore_console_title(self):
 685         if not self.params.get('consoletitle', False):
 686             return
 687         if self.params.get('simulate', False):
 688             return
 689         if compat_os_name != 'nt' and 'TERM' in os.environ:
 690             # Restore the title from stack
 691             self._write_string('\033[23;0t', self._screen_file)
 692
 693     def __enter__(self):
 694         self.save_console_title()
 695         return self
 696
 697     def __exit__(self, *args):
 698         self.restore_console_title()
 699
 700         if self.params.get('cookiefile') is not None:
 701             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 702
 703     def trouble(self, message=None, tb=None):
 704         """Determine action to take when a download problem appears.
 705
 706         Depending on if the downloader has been configured to ignore
 707         download errors or not, this method may throw an exception or
 708         not when errors are found, after printing the message.
 709
 710         tb, if given, is additional traceback information.
 711         """
 712         if message is not None:
 713             self.to_stderr(message)
 714         if self.params.get('verbose'):
 715             if tb is None:
 716                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 717                     tb = ''
 718                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 719                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 720                     tb += encode_compat_str(traceback.format_exc())
 721                 else:
 722                     tb_data = traceback.format_list(traceback.extract_stack())
 723                     tb = ''.join(tb_data)
 724             self.to_stderr(tb)
 725         if not self.params.get('ignoreerrors', False):
 726             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 727                 exc_info = sys.exc_info()[1].exc_info
 728             else:
 729                 exc_info = sys.exc_info()
 730             raise DownloadError(message, exc_info)
 731         self._download_retcode = 1
 732
 733     def report_warning(self, message):
 734         '''
 735         Print the message to stderr, it will be prefixed with 'WARNING:'
 736         If stderr is a tty file the 'WARNING:' will be colored
 737         '''
 738         if self.params.get('logger') is not None:
 739             self.params['logger'].warning(message)
 740         else:
 741             if self.params.get('no_warnings'):
 742                 return
 743             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 744                 _msg_header = '\033[0;33mWARNING:\033[0m'
 745             else:
 746                 _msg_header = 'WARNING:'
 747             warning_message = '%s %s' % (_msg_header, message)
 748             self.to_stderr(warning_message)
 749
 750     def report_error(self, message, tb=None):
 751         '''
 752         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 753         in red if stderr is a tty file.
 754         '''
 755         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 756             _msg_header = '\033[0;31mERROR:\033[0m'
 757         else:
 758             _msg_header = 'ERROR:'
 759         error_message = '%s %s' % (_msg_header, message)
 760         self.trouble(error_message, tb)
 761
 762     def report_file_already_downloaded(self, file_name):
 763         """Report file has already been fully downloaded."""
 764         try:
 765             self.to_screen('[download] %s has already been downloaded' % file_name)
 766         except UnicodeEncodeError:
 767             self.to_screen('[download] The file has already been downloaded')
 768
 769     def report_file_delete(self, file_name):
 770         """Report that existing file will be deleted."""
 771         try:
 772             self.to_screen('Deleting existing file %s' % file_name)
 773         except UnicodeEncodeError:
 774             self.to_screen('Deleting existing file')
 775
 776     def parse_outtmpl(self):
 777         outtmpl_dict = self.params.get('outtmpl', {})
 778         if not isinstance(outtmpl_dict, dict):
 779             outtmpl_dict = {'default': outtmpl_dict}
 780         outtmpl_dict.update({
 781             k: v for k, v in DEFAULT_OUTTMPL.items()
 782             if not outtmpl_dict.get(k)})
 783         for key, val in outtmpl_dict.items():
 784             if isinstance(val, bytes):
 785                 self.report_warning(
 786                     'Parameter outtmpl is bytes, but should be a unicode string. '
 787                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 788         return outtmpl_dict
 789
 790     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 791         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 792         template_dict = dict(info_dict)
 793
 794         # duration_string
 795         template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 796             formatSeconds(info_dict['duration'], '-')
 797             if info_dict.get('duration', None) is not None
 798             else None)
 799
 800         # epoch
 801         template_dict['epoch'] = int(time.time())
 802
 803         # autonumber
 804         autonumber_size = self.params.get('autonumber_size')
 805         if autonumber_size is None:
 806             autonumber_size = 5
 807         template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 808
 809         # resolution if not defined
 810         if template_dict.get('resolution') is None:
 811             if template_dict.get('width') and template_dict.get('height'):
 812                 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 813             elif template_dict.get('height'):
 814                 template_dict['resolution'] = '%sp' % template_dict['height']
 815             elif template_dict.get('width'):
 816                 template_dict['resolution'] = '%dx?' % template_dict['width']
 817
 818         if sanitize is None:
 819             sanitize = lambda k, v: v
 820         template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 821                              for k, v in template_dict.items()
 822                              if v is not None and not isinstance(v, (list, tuple, dict)))
 823         na = self.params.get('outtmpl_na_placeholder', 'NA')
 824         template_dict = collections.defaultdict(lambda: na, template_dict)
 825
 826         # For fields playlist_index and autonumber convert all occurrences
 827         # of %(field)s to %(field)0Nd for backward compatibility
 828         field_size_compat_map = {
 829             'playlist_index': len(str(template_dict['n_entries'])),
 830             'autonumber': autonumber_size,
 831         }
 832         FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 833         mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 834         if mobj:
 835             outtmpl = re.sub(
 836                 FIELD_SIZE_COMPAT_RE,
 837                 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 838                 outtmpl)
 839
 840         numeric_fields = list(self._NUMERIC_FIELDS)
 841
 842         # Format date
 843         FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
 844         for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
 845             conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
 846             if key in template_dict:
 847                 continue
 848             value = strftime_or_none(template_dict.get(field), frmt, na)
 849             if conv_type in 'crs':  # string
 850                 value = sanitize(field, value)
 851             else:  # number
 852                 numeric_fields.append(key)
 853                 value = float_or_none(value, default=None)
 854             if value is not None:
 855                 template_dict[key] = value
 856
 857         # Missing numeric fields used together with integer presentation types
 858         # in format specification will break the argument substitution since
 859         # string NA placeholder is returned for missing fields. We will patch
 860         # output template for missing fields to meet string presentation type.
 861         for numeric_field in numeric_fields:
 862             if numeric_field not in template_dict:
 863                 outtmpl = re.sub(
 864                     FORMAT_RE.format(re.escape(numeric_field)),
 865                     r'%({0})s'.format(numeric_field), outtmpl)
 866
 867         return outtmpl, template_dict
 868
 869     def _prepare_filename(self, info_dict, tmpl_type='default'):
 870         try:
 871             sanitize = lambda k, v: sanitize_filename(
 872                 compat_str(v),
 873                 restricted=self.params.get('restrictfilenames'),
 874                 is_id=(k == 'id' or k.endswith('_id')))
 875             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 876             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 877
 878             # expand_path translates '%%' into '%' and '$$' into '$'
 879             # correspondingly that is not what we want since we need to keep
 880             # '%%' intact for template dict substitution step. Working around
 881             # with boundary-alike separator hack.
 882             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 883             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 884
 885             # outtmpl should be expand_path'ed before template dict substitution
 886             # because meta fields may contain env variables we don't want to
 887             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 888             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 889             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 890
 891             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 892             if force_ext is not None:
 893                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 894
 895             # https://github.com/blackjack4494/youtube-dlc/issues/85
 896             trim_file_name = self.params.get('trim_file_name', False)
 897             if trim_file_name:
 898                 fn_groups = filename.rsplit('.')
 899                 ext = fn_groups[-1]
 900                 sub_ext = ''
 901                 if len(fn_groups) > 2:
 902                     sub_ext = fn_groups[-2]
 903                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 904
 905             return filename
 906         except ValueError as err:
 907             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 908             return None
 909
 910     def prepare_filename(self, info_dict, dir_type='', warn=False):
 911         """Generate the output filename."""
 912         paths = self.params.get('paths', {})
 913         assert isinstance(paths, dict)
 914         filename = self._prepare_filename(info_dict, dir_type or 'default')
 915
 916         if warn and not self.__prepare_filename_warned:
 917             if not paths:
 918                 pass
 919             elif filename == '-':
 920                 self.report_warning('--paths is ignored when an outputting to stdout')
 921             elif os.path.isabs(filename):
 922                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 923             self.__prepare_filename_warned = True
 924         if filename == '-' or not filename:
 925             return filename
 926
 927         homepath = expand_path(paths.get('home', '').strip())
 928         assert isinstance(homepath, compat_str)
 929         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 930         assert isinstance(subdir, compat_str)
 931         path = os.path.join(homepath, subdir, filename)
 932
 933         # Temporary fix for #4787
 934         # 'Treat' all problem characters by passing filename through preferredencoding
 935         # to workaround encoding issues with subprocess on python2 @ Windows
 936         if sys.version_info < (3, 0) and sys.platform == 'win32':
 937             path = encodeFilename(path, True).decode(preferredencoding())
 938         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 939
 940     def _match_entry(self, info_dict, incomplete):
 941         """ Returns None if the file should be downloaded """
 942
 943         def check_filter():
 944             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 945             if 'title' in info_dict:
 946                 # This can happen when we're just evaluating the playlist
 947                 title = info_dict['title']
 948                 matchtitle = self.params.get('matchtitle', False)
 949                 if matchtitle:
 950                     if not re.search(matchtitle, title, re.IGNORECASE):
 951                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 952                 rejecttitle = self.params.get('rejecttitle', False)
 953                 if rejecttitle:
 954                     if re.search(rejecttitle, title, re.IGNORECASE):
 955                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 956             date = info_dict.get('upload_date')
 957             if date is not None:
 958                 dateRange = self.params.get('daterange', DateRange())
 959                 if date not in dateRange:
 960                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 961             view_count = info_dict.get('view_count')
 962             if view_count is not None:
 963                 min_views = self.params.get('min_views')
 964                 if min_views is not None and view_count < min_views:
 965                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 966                 max_views = self.params.get('max_views')
 967                 if max_views is not None and view_count > max_views:
 968                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 969             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 970                 return 'Skipping "%s" because it is age restricted' % video_title
 971             if self.in_download_archive(info_dict):
 972                 return '%s has already been recorded in archive' % video_title
 973
 974             if not incomplete:
 975                 match_filter = self.params.get('match_filter')
 976                 if match_filter is not None:
 977                     ret = match_filter(info_dict)
 978                     if ret is not None:
 979                         return ret
 980             return None
 981
 982         reason = check_filter()
 983         if reason is not None:
 984             self.to_screen('[download] ' + reason)
 985             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 986                 raise ExistingVideoReached()
 987             elif self.params.get('break_on_reject', False):
 988                 raise RejectedVideoReached()
 989         return reason
 990
 991     @staticmethod
 992     def add_extra_info(info_dict, extra_info):
 993         '''Set the keys from extra_info in info dict if they are missing'''
 994         for key, value in extra_info.items():
 995             info_dict.setdefault(key, value)
 996
 997     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 998                      process=True, force_generic_extractor=False):
 999         '''
1000         Returns a list with a dictionary for each video we find.
1001         If 'download', also downloads the videos.
1002         extra_info is a dict containing the extra values to add to each result
1003         '''
1004
1005         if not ie_key and force_generic_extractor:
1006             ie_key = 'Generic'
1007
1008         if ie_key:
1009             ies = [self.get_info_extractor(ie_key)]
1010         else:
1011             ies = self._ies
1012
1013         for ie in ies:
1014             if not ie.suitable(url):
1015                 continue
1016
1017             ie_key = ie.ie_key()
1018             ie = self.get_info_extractor(ie_key)
1019             if not ie.working():
1020                 self.report_warning('The program functionality for this site has been marked as broken, '
1021                                     'and will probably not work.')
1022
1023             try:
1024                 temp_id = str_or_none(
1025                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1026                     else ie._match_id(url))
1027             except (AssertionError, IndexError, AttributeError):
1028                 temp_id = None
1029             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1030                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1031                                ie_key, temp_id))
1032                 break
1033             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
1034         else:
1035             self.report_error('no suitable InfoExtractor for URL %s' % url)
1036
1037     def __handle_extraction_exceptions(func):
1038         def wrapper(self, *args, **kwargs):
1039             try:
1040                 return func(self, *args, **kwargs)
1041             except GeoRestrictedError as e:
1042                 msg = e.msg
1043                 if e.countries:
1044                     msg += '\nThis video is available in %s.' % ', '.join(
1045                         map(ISO3166Utils.short2full, e.countries))
1046                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1047                 self.report_error(msg)
1048             except ExtractorError as e:  # An error we somewhat expected
1049                 self.report_error(compat_str(e), e.format_traceback())
1050             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1051                 raise
1052             except Exception as e:
1053                 if self.params.get('ignoreerrors', False):
1054                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1055                 else:
1056                     raise
1057         return wrapper
1058
1059     @__handle_extraction_exceptions
1060     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1061         ie_result = ie.extract(url)
1062         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1063             return
1064         if isinstance(ie_result, list):
1065             # Backwards compatibility: old IE result format
1066             ie_result = {
1067                 '_type': 'compat_list',
1068                 'entries': ie_result,
1069             }
1070         if info_dict:
1071             if info_dict.get('id'):
1072                 ie_result['id'] = info_dict['id']
1073             if info_dict.get('title'):
1074                 ie_result['title'] = info_dict['title']
1075         self.add_default_extra_info(ie_result, ie, url)
1076         if process:
1077             return self.process_ie_result(ie_result, download, extra_info)
1078         else:
1079             return ie_result
1080
1081     def add_default_extra_info(self, ie_result, ie, url):
1082         self.add_extra_info(ie_result, {
1083             'extractor': ie.IE_NAME,
1084             'webpage_url': url,
1085             'webpage_url_basename': url_basename(url),
1086             'extractor_key': ie.ie_key(),
1087         })
1088
1089     def process_ie_result(self, ie_result, download=True, extra_info={}):
1090         """
1091         Take the result of the ie(may be modified) and resolve all unresolved
1092         references (URLs, playlist items).
1093
1094         It will also download the videos if 'download'.
1095         Returns the resolved ie_result.
1096         """
1097         result_type = ie_result.get('_type', 'video')
1098
1099         if result_type in ('url', 'url_transparent'):
1100             ie_result['url'] = sanitize_url(ie_result['url'])
1101             extract_flat = self.params.get('extract_flat', False)
1102             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1103                     or extract_flat is True):
1104                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1105                 return ie_result
1106
1107         if result_type == 'video':
1108             self.add_extra_info(ie_result, extra_info)
1109             return self.process_video_result(ie_result, download=download)
1110         elif result_type == 'url':
1111             # We have to add extra_info to the results because it may be
1112             # contained in a playlist
1113             return self.extract_info(ie_result['url'],
1114                                      download, info_dict=ie_result,
1115                                      ie_key=ie_result.get('ie_key'),
1116                                      extra_info=extra_info)
1117         elif result_type == 'url_transparent':
1118             # Use the information from the embedding page
1119             info = self.extract_info(
1120                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1121                 extra_info=extra_info, download=False, process=False)
1122
1123             # extract_info may return None when ignoreerrors is enabled and
1124             # extraction failed with an error, don't crash and return early
1125             # in this case
1126             if not info:
1127                 return info
1128
1129             force_properties = dict(
1130                 (k, v) for k, v in ie_result.items() if v is not None)
1131             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1132                 if f in force_properties:
1133                     del force_properties[f]
1134             new_result = info.copy()
1135             new_result.update(force_properties)
1136
1137             # Extracted info may not be a video result (i.e.
1138             # info.get('_type', 'video') != video) but rather an url or
1139             # url_transparent. In such cases outer metadata (from ie_result)
1140             # should be propagated to inner one (info). For this to happen
1141             # _type of info should be overridden with url_transparent. This
1142             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1143             if new_result.get('_type') == 'url':
1144                 new_result['_type'] = 'url_transparent'
1145
1146             return self.process_ie_result(
1147                 new_result, download=download, extra_info=extra_info)
1148         elif result_type in ('playlist', 'multi_video'):
1149             # Protect from infinite recursion due to recursively nested playlists
1150             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1151             webpage_url = ie_result['webpage_url']
1152             if webpage_url in self._playlist_urls:
1153                 self.to_screen(
1154                     '[download] Skipping already downloaded playlist: %s'
1155                     % ie_result.get('title') or ie_result.get('id'))
1156                 return
1157
1158             self._playlist_level += 1
1159             self._playlist_urls.add(webpage_url)
1160             try:
1161                 return self.__process_playlist(ie_result, download)
1162             finally:
1163                 self._playlist_level -= 1
1164                 if not self._playlist_level:
1165                     self._playlist_urls.clear()
1166         elif result_type == 'compat_list':
1167             self.report_warning(
1168                 'Extractor %s returned a compat_list result. '
1169                 'It needs to be updated.' % ie_result.get('extractor'))
1170
1171             def _fixup(r):
1172                 self.add_extra_info(
1173                     r,
1174                     {
1175                         'extractor': ie_result['extractor'],
1176                         'webpage_url': ie_result['webpage_url'],
1177                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1178                         'extractor_key': ie_result['extractor_key'],
1179                     }
1180                 )
1181                 return r
1182             ie_result['entries'] = [
1183                 self.process_ie_result(_fixup(r), download, extra_info)
1184                 for r in ie_result['entries']
1185             ]
1186             return ie_result
1187         else:
1188             raise Exception('Invalid result type: %s' % result_type)
1189
1190     def _ensure_dir_exists(self, path):
1191         return make_dir(path, self.report_error)
1192
1193     def __process_playlist(self, ie_result, download):
1194         # We process each entry in the playlist
1195         playlist = ie_result.get('title') or ie_result.get('id')
1196         self.to_screen('[download] Downloading playlist: %s' % playlist)
1197
1198         if 'entries' not in ie_result:
1199             raise EntryNotInPlaylist()
1200         incomplete_entries = bool(ie_result.get('requested_entries'))
1201         if incomplete_entries:
1202             def fill_missing_entries(entries, indexes):
1203                 ret = [None] * max(*indexes)
1204                 for i, entry in zip(indexes, entries):
1205                     ret[i - 1] = entry
1206                 return ret
1207             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1208
1209         playlist_results = []
1210
1211         playliststart = self.params.get('playliststart', 1) - 1
1212         playlistend = self.params.get('playlistend')
1213         # For backwards compatibility, interpret -1 as whole list
1214         if playlistend == -1:
1215             playlistend = None
1216
1217         playlistitems_str = self.params.get('playlist_items')
1218         playlistitems = None
1219         if playlistitems_str is not None:
1220             def iter_playlistitems(format):
1221                 for string_segment in format.split(','):
1222                     if '-' in string_segment:
1223                         start, end = string_segment.split('-')
1224                         for item in range(int(start), int(end) + 1):
1225                             yield int(item)
1226                     else:
1227                         yield int(string_segment)
1228             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1229
1230         ie_entries = ie_result['entries']
1231
1232         def make_playlistitems_entries(list_ie_entries):
1233             num_entries = len(list_ie_entries)
1234             for i in playlistitems:
1235                 if -num_entries < i <= num_entries:
1236                     yield list_ie_entries[i - 1]
1237                 elif incomplete_entries:
1238                     raise EntryNotInPlaylist()
1239
1240         if isinstance(ie_entries, list):
1241             n_all_entries = len(ie_entries)
1242             if playlistitems:
1243                 entries = list(make_playlistitems_entries(ie_entries))
1244             else:
1245                 entries = ie_entries[playliststart:playlistend]
1246             n_entries = len(entries)
1247             msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
1248         elif isinstance(ie_entries, PagedList):
1249             if playlistitems:
1250                 entries = []
1251                 for item in playlistitems:
1252                     entries.extend(ie_entries.getslice(
1253                         item - 1, item
1254                     ))
1255             else:
1256                 entries = ie_entries.getslice(
1257                     playliststart, playlistend)
1258             n_entries = len(entries)
1259             msg = 'Downloading %d videos' % n_entries
1260         else:  # iterable
1261             if playlistitems:
1262                 entries = list(make_playlistitems_entries(list(itertools.islice(
1263                     ie_entries, 0, max(playlistitems)))))
1264             else:
1265                 entries = list(itertools.islice(
1266                     ie_entries, playliststart, playlistend))
1267             n_entries = len(entries)
1268             msg = 'Downloading %d videos' % n_entries
1269
1270         if any((entry is None for entry in entries)):
1271             raise EntryNotInPlaylist()
1272         if not playlistitems and (playliststart or playlistend):
1273             playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1274         ie_result['entries'] = entries
1275         ie_result['requested_entries'] = playlistitems
1276
1277         if self.params.get('allow_playlist_files', True):
1278             ie_copy = {
1279                 'playlist': playlist,
1280                 'playlist_id': ie_result.get('id'),
1281                 'playlist_title': ie_result.get('title'),
1282                 'playlist_uploader': ie_result.get('uploader'),
1283                 'playlist_uploader_id': ie_result.get('uploader_id'),
1284                 'playlist_index': 0
1285             }
1286             ie_copy.update(dict(ie_result))
1287
1288             if self.params.get('writeinfojson', False):
1289                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1290                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1291                     return
1292                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1293                     self.to_screen('[info] Playlist metadata is already present')
1294                 else:
1295                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1296                     try:
1297                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1298                     except (OSError, IOError):
1299                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1300
1301             if self.params.get('writedescription', False):
1302                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1303                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1304                     return
1305                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1306                     self.to_screen('[info] Playlist description is already present')
1307                 elif ie_result.get('description') is None:
1308                     self.report_warning('There\'s no playlist description to write.')
1309                 else:
1310                     try:
1311                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1312                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1313                             descfile.write(ie_result['description'])
1314                     except (OSError, IOError):
1315                         self.report_error('Cannot write playlist description file ' + descfn)
1316                         return
1317
1318         if self.params.get('playlistreverse', False):
1319             entries = entries[::-1]
1320         if self.params.get('playlistrandom', False):
1321             random.shuffle(entries)
1322
1323         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1324
1325         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
1326         for i, entry in enumerate(entries, 1):
1327             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1328             # This __x_forwarded_for_ip thing is a bit ugly but requires
1329             # minimal changes
1330             if x_forwarded_for:
1331                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1332             extra = {
1333                 'n_entries': n_entries,
1334                 'playlist': playlist,
1335                 'playlist_id': ie_result.get('id'),
1336                 'playlist_title': ie_result.get('title'),
1337                 'playlist_uploader': ie_result.get('uploader'),
1338                 'playlist_uploader_id': ie_result.get('uploader_id'),
1339                 'playlist_index': playlistitems[i - 1] if playlistitems else i,
1340                 'extractor': ie_result['extractor'],
1341                 'webpage_url': ie_result['webpage_url'],
1342                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1343                 'extractor_key': ie_result['extractor_key'],
1344             }
1345
1346             if self._match_entry(entry, incomplete=True) is not None:
1347                 continue
1348
1349             entry_result = self.__process_iterable_entry(entry, download, extra)
1350             # TODO: skip failed (empty) entries?
1351             playlist_results.append(entry_result)
1352         ie_result['entries'] = playlist_results
1353         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1354         return ie_result
1355
1356     @__handle_extraction_exceptions
1357     def __process_iterable_entry(self, entry, download, extra_info):
1358         return self.process_ie_result(
1359             entry, download=download, extra_info=extra_info)
1360
1361     def _build_format_filter(self, filter_spec):
1362         " Returns a function to filter the formats according to the filter_spec "
1363
1364         OPERATORS = {
1365             '<': operator.lt,
1366             '<=': operator.le,
1367             '>': operator.gt,
1368             '>=': operator.ge,
1369             '=': operator.eq,
1370             '!=': operator.ne,
1371         }
1372         operator_rex = re.compile(r'''(?x)\s*
1373             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1374             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1375             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1376             $
1377             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1378         m = operator_rex.search(filter_spec)
1379         if m:
1380             try:
1381                 comparison_value = int(m.group('value'))
1382             except ValueError:
1383                 comparison_value = parse_filesize(m.group('value'))
1384                 if comparison_value is None:
1385                     comparison_value = parse_filesize(m.group('value') + 'B')
1386                 if comparison_value is None:
1387                     raise ValueError(
1388                         'Invalid value %r in format specification %r' % (
1389                             m.group('value'), filter_spec))
1390             op = OPERATORS[m.group('op')]
1391
1392         if not m:
1393             STR_OPERATORS = {
1394                 '=': operator.eq,
1395                 '^=': lambda attr, value: attr.startswith(value),
1396                 '$=': lambda attr, value: attr.endswith(value),
1397                 '*=': lambda attr, value: value in attr,
1398             }
1399             str_operator_rex = re.compile(r'''(?x)
1400                 \s*(?P<key>[a-zA-Z0-9._-]+)
1401                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1402                 \s*(?P<value>[a-zA-Z0-9._-]+)
1403                 \s*$
1404                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1405             m = str_operator_rex.search(filter_spec)
1406             if m:
1407                 comparison_value = m.group('value')
1408                 str_op = STR_OPERATORS[m.group('op')]
1409                 if m.group('negation'):
1410                     op = lambda attr, value: not str_op(attr, value)
1411                 else:
1412                     op = str_op
1413
1414         if not m:
1415             raise ValueError('Invalid filter specification %r' % filter_spec)
1416
1417         def _filter(f):
1418             actual_value = f.get(m.group('key'))
1419             if actual_value is None:
1420                 return m.group('none_inclusive')
1421             return op(actual_value, comparison_value)
1422         return _filter
1423
1424     def _default_format_spec(self, info_dict, download=True):
1425
1426         def can_merge():
1427             merger = FFmpegMergerPP(self)
1428             return merger.available and merger.can_merge()
1429
1430         prefer_best = (
1431             not self.params.get('simulate', False)
1432             and download
1433             and (
1434                 not can_merge()
1435                 or info_dict.get('is_live', False)
1436                 or self.outtmpl_dict['default'] == '-'))
1437
1438         return (
1439             'best/bestvideo+bestaudio'
1440             if prefer_best
1441             else 'bestvideo*+bestaudio/best'
1442             if not self.params.get('allow_multiple_audio_streams', False)
1443             else 'bestvideo+bestaudio/best')
1444
1445     def build_format_selector(self, format_spec):
1446         def syntax_error(note, start):
1447             message = (
1448                 'Invalid format specification: '
1449                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1450             return SyntaxError(message)
1451
1452         PICKFIRST = 'PICKFIRST'
1453         MERGE = 'MERGE'
1454         SINGLE = 'SINGLE'
1455         GROUP = 'GROUP'
1456         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1457
1458         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1459                                   'video': self.params.get('allow_multiple_video_streams', False)}
1460
1461         def _parse_filter(tokens):
1462             filter_parts = []
1463             for type, string, start, _, _ in tokens:
1464                 if type == tokenize.OP and string == ']':
1465                     return ''.join(filter_parts)
1466                 else:
1467                     filter_parts.append(string)
1468
1469         def _remove_unused_ops(tokens):
1470             # Remove operators that we don't use and join them with the surrounding strings
1471             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1472             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1473             last_string, last_start, last_end, last_line = None, None, None, None
1474             for type, string, start, end, line in tokens:
1475                 if type == tokenize.OP and string == '[':
1476                     if last_string:
1477                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1478                         last_string = None
1479                     yield type, string, start, end, line
1480                     # everything inside brackets will be handled by _parse_filter
1481                     for type, string, start, end, line in tokens:
1482                         yield type, string, start, end, line
1483                         if type == tokenize.OP and string == ']':
1484                             break
1485                 elif type == tokenize.OP and string in ALLOWED_OPS:
1486                     if last_string:
1487                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1488                         last_string = None
1489                     yield type, string, start, end, line
1490                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1491                     if not last_string:
1492                         last_string = string
1493                         last_start = start
1494                         last_end = end
1495                     else:
1496                         last_string += string
1497             if last_string:
1498                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1499
1500         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1501             selectors = []
1502             current_selector = None
1503             for type, string, start, _, _ in tokens:
1504                 # ENCODING is only defined in python 3.x
1505                 if type == getattr(tokenize, 'ENCODING', None):
1506                     continue
1507                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1508                     current_selector = FormatSelector(SINGLE, string, [])
1509                 elif type == tokenize.OP:
1510                     if string == ')':
1511                         if not inside_group:
1512                             # ')' will be handled by the parentheses group
1513                             tokens.restore_last_token()
1514                         break
1515                     elif inside_merge and string in ['/', ',']:
1516                         tokens.restore_last_token()
1517                         break
1518                     elif inside_choice and string == ',':
1519                         tokens.restore_last_token()
1520                         break
1521                     elif string == ',':
1522                         if not current_selector:
1523                             raise syntax_error('"," must follow a format selector', start)
1524                         selectors.append(current_selector)
1525                         current_selector = None
1526                     elif string == '/':
1527                         if not current_selector:
1528                             raise syntax_error('"/" must follow a format selector', start)
1529                         first_choice = current_selector
1530                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1531                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1532                     elif string == '[':
1533                         if not current_selector:
1534                             current_selector = FormatSelector(SINGLE, 'best', [])
1535                         format_filter = _parse_filter(tokens)
1536                         current_selector.filters.append(format_filter)
1537                     elif string == '(':
1538                         if current_selector:
1539                             raise syntax_error('Unexpected "("', start)
1540                         group = _parse_format_selection(tokens, inside_group=True)
1541                         current_selector = FormatSelector(GROUP, group, [])
1542                     elif string == '+':
1543                         if not current_selector:
1544                             raise syntax_error('Unexpected "+"', start)
1545                         selector_1 = current_selector
1546                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1547                         if not selector_2:
1548                             raise syntax_error('Expected a selector', start)
1549                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1550                     else:
1551                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1552                 elif type == tokenize.ENDMARKER:
1553                     break
1554             if current_selector:
1555                 selectors.append(current_selector)
1556             return selectors
1557
1558         def _merge(formats_pair):
1559             format_1, format_2 = formats_pair
1560
1561             formats_info = []
1562             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1563             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1564
1565             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1566                 get_no_more = {"video": False, "audio": False}
1567                 for (i, fmt_info) in enumerate(formats_info):
1568                     for aud_vid in ["audio", "video"]:
1569                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1570                             if get_no_more[aud_vid]:
1571                                 formats_info.pop(i)
1572                             get_no_more[aud_vid] = True
1573
1574             if len(formats_info) == 1:
1575                 return formats_info[0]
1576
1577             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1578             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1579
1580             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1581             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1582
1583             output_ext = self.params.get('merge_output_format')
1584             if not output_ext:
1585                 if the_only_video:
1586                     output_ext = the_only_video['ext']
1587                 elif the_only_audio and not video_fmts:
1588                     output_ext = the_only_audio['ext']
1589                 else:
1590                     output_ext = 'mkv'
1591
1592             new_dict = {
1593                 'requested_formats': formats_info,
1594                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1595                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1596                 'ext': output_ext,
1597             }
1598
1599             if the_only_video:
1600                 new_dict.update({
1601                     'width': the_only_video.get('width'),
1602                     'height': the_only_video.get('height'),
1603                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1604                     'fps': the_only_video.get('fps'),
1605                     'vcodec': the_only_video.get('vcodec'),
1606                     'vbr': the_only_video.get('vbr'),
1607                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1608                 })
1609
1610             if the_only_audio:
1611                 new_dict.update({
1612                     'acodec': the_only_audio.get('acodec'),
1613                     'abr': the_only_audio.get('abr'),
1614                 })
1615
1616             return new_dict
1617
1618         def _build_selector_function(selector):
1619             if isinstance(selector, list):  # ,
1620                 fs = [_build_selector_function(s) for s in selector]
1621
1622                 def selector_function(ctx):
1623                     for f in fs:
1624                         for format in f(ctx):
1625                             yield format
1626                 return selector_function
1627
1628             elif selector.type == GROUP:  # ()
1629                 selector_function = _build_selector_function(selector.selector)
1630
1631             elif selector.type == PICKFIRST:  # /
1632                 fs = [_build_selector_function(s) for s in selector.selector]
1633
1634                 def selector_function(ctx):
1635                     for f in fs:
1636                         picked_formats = list(f(ctx))
1637                         if picked_formats:
1638                             return picked_formats
1639                     return []
1640
1641             elif selector.type == SINGLE:  # atom
1642                 format_spec = (selector.selector or 'best').lower()
1643
1644                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1645                 if format_spec == 'all':
1646                     def selector_function(ctx):
1647                         formats = list(ctx['formats'])
1648                         if formats:
1649                             for f in formats:
1650                                 yield f
1651                 elif format_spec == 'mergeall':
1652                     def selector_function(ctx):
1653                         formats = list(ctx['formats'])
1654                         if not formats:
1655                             return
1656                         merged_format = formats[-1]
1657                         for f in formats[-2::-1]:
1658                             merged_format = _merge((merged_format, f))
1659                         yield merged_format
1660
1661                 else:
1662                     format_fallback = False
1663                     mobj = re.match(
1664                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1665                         format_spec)
1666                     if mobj is not None:
1667                         format_idx = int_or_none(mobj.group('n'), default=1)
1668                         format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
1669                         format_type = (mobj.group('type') or [None])[0]
1670                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1671                         format_modified = mobj.group('mod') is not None
1672
1673                         format_fallback = not format_type and not format_modified  # for b, w
1674                         filter_f = (
1675                             (lambda f: f.get('%scodec' % format_type) != 'none')
1676                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1677                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1678                             if format_type  # bv, ba, wv, wa
1679                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1680                             if not format_modified  # b, w
1681                             else None)  # b*, w*
1682                     else:
1683                         format_idx = -1
1684                         filter_f = ((lambda f: f.get('ext') == format_spec)
1685                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1686                                     else (lambda f: f.get('format_id') == format_spec))  # id
1687
1688                     def selector_function(ctx):
1689                         formats = list(ctx['formats'])
1690                         if not formats:
1691                             return
1692                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1693                         n = len(matches)
1694                         if -n <= format_idx < n:
1695                             yield matches[format_idx]
1696                         elif format_fallback and ctx['incomplete_formats']:
1697                             # for extractors with incomplete formats (audio only (soundcloud)
1698                             # or video only (imgur)) best/worst will fallback to
1699                             # best/worst {video,audio}-only format
1700                             n = len(formats)
1701                             if -n <= format_idx < n:
1702                                 yield formats[format_idx]
1703
1704             elif selector.type == MERGE:        # +
1705                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1706
1707                 def selector_function(ctx):
1708                     for pair in itertools.product(
1709                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1710                         yield _merge(pair)
1711
1712             filters = [self._build_format_filter(f) for f in selector.filters]
1713
1714             def final_selector(ctx):
1715                 ctx_copy = copy.deepcopy(ctx)
1716                 for _filter in filters:
1717                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1718                 return selector_function(ctx_copy)
1719             return final_selector
1720
1721         stream = io.BytesIO(format_spec.encode('utf-8'))
1722         try:
1723             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1724         except tokenize.TokenError:
1725             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1726
1727         class TokenIterator(object):
1728             def __init__(self, tokens):
1729                 self.tokens = tokens
1730                 self.counter = 0
1731
1732             def __iter__(self):
1733                 return self
1734
1735             def __next__(self):
1736                 if self.counter >= len(self.tokens):
1737                     raise StopIteration()
1738                 value = self.tokens[self.counter]
1739                 self.counter += 1
1740                 return value
1741
1742             next = __next__
1743
1744             def restore_last_token(self):
1745                 self.counter -= 1
1746
1747         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1748         return _build_selector_function(parsed_selector)
1749
1750     def _calc_headers(self, info_dict):
1751         res = std_headers.copy()
1752
1753         add_headers = info_dict.get('http_headers')
1754         if add_headers:
1755             res.update(add_headers)
1756
1757         cookies = self._calc_cookies(info_dict)
1758         if cookies:
1759             res['Cookie'] = cookies
1760
1761         if 'X-Forwarded-For' not in res:
1762             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1763             if x_forwarded_for_ip:
1764                 res['X-Forwarded-For'] = x_forwarded_for_ip
1765
1766         return res
1767
1768     def _calc_cookies(self, info_dict):
1769         pr = sanitized_Request(info_dict['url'])
1770         self.cookiejar.add_cookie_header(pr)
1771         return pr.get_header('Cookie')
1772
1773     def process_video_result(self, info_dict, download=True):
1774         assert info_dict.get('_type', 'video') == 'video'
1775
1776         if 'id' not in info_dict:
1777             raise ExtractorError('Missing "id" field in extractor result')
1778         if 'title' not in info_dict:
1779             raise ExtractorError('Missing "title" field in extractor result')
1780
1781         def report_force_conversion(field, field_not, conversion):
1782             self.report_warning(
1783                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1784                 % (field, field_not, conversion))
1785
1786         def sanitize_string_field(info, string_field):
1787             field = info.get(string_field)
1788             if field is None or isinstance(field, compat_str):
1789                 return
1790             report_force_conversion(string_field, 'a string', 'string')
1791             info[string_field] = compat_str(field)
1792
1793         def sanitize_numeric_fields(info):
1794             for numeric_field in self._NUMERIC_FIELDS:
1795                 field = info.get(numeric_field)
1796                 if field is None or isinstance(field, compat_numeric_types):
1797                     continue
1798                 report_force_conversion(numeric_field, 'numeric', 'int')
1799                 info[numeric_field] = int_or_none(field)
1800
1801         sanitize_string_field(info_dict, 'id')
1802         sanitize_numeric_fields(info_dict)
1803
1804         if 'playlist' not in info_dict:
1805             # It isn't part of a playlist
1806             info_dict['playlist'] = None
1807             info_dict['playlist_index'] = None
1808
1809         thumbnails = info_dict.get('thumbnails')
1810         if thumbnails is None:
1811             thumbnail = info_dict.get('thumbnail')
1812             if thumbnail:
1813                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1814         if thumbnails:
1815             thumbnails.sort(key=lambda t: (
1816                 t.get('preference') if t.get('preference') is not None else -1,
1817                 t.get('width') if t.get('width') is not None else -1,
1818                 t.get('height') if t.get('height') is not None else -1,
1819                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1820             for i, t in enumerate(thumbnails):
1821                 t['url'] = sanitize_url(t['url'])
1822                 if t.get('width') and t.get('height'):
1823                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1824                 if t.get('id') is None:
1825                     t['id'] = '%d' % i
1826
1827         if self.params.get('list_thumbnails'):
1828             self.list_thumbnails(info_dict)
1829             return
1830
1831         thumbnail = info_dict.get('thumbnail')
1832         if thumbnail:
1833             info_dict['thumbnail'] = sanitize_url(thumbnail)
1834         elif thumbnails:
1835             info_dict['thumbnail'] = thumbnails[-1]['url']
1836
1837         if 'display_id' not in info_dict and 'id' in info_dict:
1838             info_dict['display_id'] = info_dict['id']
1839
1840         for ts_key, date_key in (
1841                 ('timestamp', 'upload_date'),
1842                 ('release_timestamp', 'release_date'),
1843         ):
1844             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1845                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1846                 # see http://bugs.python.org/issue1646728)
1847                 try:
1848                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1849                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
1850                 except (ValueError, OverflowError, OSError):
1851                     pass
1852
1853         # Auto generate title fields corresponding to the *_number fields when missing
1854         # in order to always have clean titles. This is very common for TV series.
1855         for field in ('chapter', 'season', 'episode'):
1856             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1857                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1858
1859         for cc_kind in ('subtitles', 'automatic_captions'):
1860             cc = info_dict.get(cc_kind)
1861             if cc:
1862                 for _, subtitle in cc.items():
1863                     for subtitle_format in subtitle:
1864                         if subtitle_format.get('url'):
1865                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1866                         if subtitle_format.get('ext') is None:
1867                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1868
1869         automatic_captions = info_dict.get('automatic_captions')
1870         subtitles = info_dict.get('subtitles')
1871
1872         if self.params.get('listsubtitles', False):
1873             if 'automatic_captions' in info_dict:
1874                 self.list_subtitles(
1875                     info_dict['id'], automatic_captions, 'automatic captions')
1876             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1877             return
1878
1879         info_dict['requested_subtitles'] = self.process_subtitles(
1880             info_dict['id'], subtitles, automatic_captions)
1881
1882         # We now pick which formats have to be downloaded
1883         if info_dict.get('formats') is None:
1884             # There's only one format available
1885             formats = [info_dict]
1886         else:
1887             formats = info_dict['formats']
1888
1889         if not formats:
1890             if not self.params.get('ignore_no_formats_error'):
1891                 raise ExtractorError('No video formats found!')
1892             else:
1893                 self.report_warning('No video formats found!')
1894
1895         def is_wellformed(f):
1896             url = f.get('url')
1897             if not url:
1898                 self.report_warning(
1899                     '"url" field is missing or empty - skipping format, '
1900                     'there is an error in extractor')
1901                 return False
1902             if isinstance(url, bytes):
1903                 sanitize_string_field(f, 'url')
1904             return True
1905
1906         # Filter out malformed formats for better extraction robustness
1907         formats = list(filter(is_wellformed, formats))
1908
1909         formats_dict = {}
1910
1911         # We check that all the formats have the format and format_id fields
1912         for i, format in enumerate(formats):
1913             sanitize_string_field(format, 'format_id')
1914             sanitize_numeric_fields(format)
1915             format['url'] = sanitize_url(format['url'])
1916             if not format.get('format_id'):
1917                 format['format_id'] = compat_str(i)
1918             else:
1919                 # Sanitize format_id from characters used in format selector expression
1920                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1921             format_id = format['format_id']
1922             if format_id not in formats_dict:
1923                 formats_dict[format_id] = []
1924             formats_dict[format_id].append(format)
1925
1926         # Make sure all formats have unique format_id
1927         for format_id, ambiguous_formats in formats_dict.items():
1928             if len(ambiguous_formats) > 1:
1929                 for i, format in enumerate(ambiguous_formats):
1930                     format['format_id'] = '%s-%d' % (format_id, i)
1931
1932         for i, format in enumerate(formats):
1933             if format.get('format') is None:
1934                 format['format'] = '{id} - {res}{note}'.format(
1935                     id=format['format_id'],
1936                     res=self.format_resolution(format),
1937                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1938                 )
1939             # Automatically determine file extension if missing
1940             if format.get('ext') is None:
1941                 format['ext'] = determine_ext(format['url']).lower()
1942             # Automatically determine protocol if missing (useful for format
1943             # selection purposes)
1944             if format.get('protocol') is None:
1945                 format['protocol'] = determine_protocol(format)
1946             # Add HTTP headers, so that external programs can use them from the
1947             # json output
1948             full_format_info = info_dict.copy()
1949             full_format_info.update(format)
1950             format['http_headers'] = self._calc_headers(full_format_info)
1951         # Remove private housekeeping stuff
1952         if '__x_forwarded_for_ip' in info_dict:
1953             del info_dict['__x_forwarded_for_ip']
1954
1955         # TODO Central sorting goes here
1956
1957         if formats and formats[0] is not info_dict:
1958             # only set the 'formats' fields if the original info_dict list them
1959             # otherwise we end up with a circular reference, the first (and unique)
1960             # element in the 'formats' field in info_dict is info_dict itself,
1961             # which can't be exported to json
1962             info_dict['formats'] = formats
1963         if self.params.get('listformats'):
1964             if not info_dict.get('formats'):
1965                 raise ExtractorError('No video formats found', expected=True)
1966             self.list_formats(info_dict)
1967             return
1968
1969         req_format = self.params.get('format')
1970         if req_format is None:
1971             req_format = self._default_format_spec(info_dict, download=download)
1972             if self.params.get('verbose'):
1973                 self.to_screen('[debug] Default format spec: %s' % req_format)
1974
1975         format_selector = self.build_format_selector(req_format)
1976
1977         # While in format selection we may need to have an access to the original
1978         # format set in order to calculate some metrics or do some processing.
1979         # For now we need to be able to guess whether original formats provided
1980         # by extractor are incomplete or not (i.e. whether extractor provides only
1981         # video-only or audio-only formats) for proper formats selection for
1982         # extractors with such incomplete formats (see
1983         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1984         # Since formats may be filtered during format selection and may not match
1985         # the original formats the results may be incorrect. Thus original formats
1986         # or pre-calculated metrics should be passed to format selection routines
1987         # as well.
1988         # We will pass a context object containing all necessary additional data
1989         # instead of just formats.
1990         # This fixes incorrect format selection issue (see
1991         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1992         incomplete_formats = (
1993             # All formats are video-only or
1994             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1995             # all formats are audio-only
1996             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1997
1998         ctx = {
1999             'formats': formats,
2000             'incomplete_formats': incomplete_formats,
2001         }
2002
2003         formats_to_download = list(format_selector(ctx))
2004         if not formats_to_download:
2005             if not self.params.get('ignore_no_formats_error'):
2006                 raise ExtractorError('Requested format is not available', expected=True)
2007             else:
2008                 self.report_warning('Requested format is not available')
2009         elif download:
2010             self.to_screen(
2011                 '[info] %s: Downloading format(s) %s'
2012                 % (info_dict['id'], ", ".join([f['format_id'] for f in formats_to_download])))
2013             if len(formats_to_download) > 1:
2014                 self.to_screen(
2015                     '[info] %s: Downloading video in %s formats'
2016                     % (info_dict['id'], len(formats_to_download)))
2017             for fmt in formats_to_download:
2018                 new_info = dict(info_dict)
2019                 new_info.update(fmt)
2020                 self.process_info(new_info)
2021         # We update the info dict with the best quality format (backwards compatibility)
2022         if formats_to_download:
2023             info_dict.update(formats_to_download[-1])
2024         return info_dict
2025
2026     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2027         """Select the requested subtitles and their format"""
2028         available_subs = {}
2029         if normal_subtitles and self.params.get('writesubtitles'):
2030             available_subs.update(normal_subtitles)
2031         if automatic_captions and self.params.get('writeautomaticsub'):
2032             for lang, cap_info in automatic_captions.items():
2033                 if lang not in available_subs:
2034                     available_subs[lang] = cap_info
2035
2036         if (not self.params.get('writesubtitles') and not
2037                 self.params.get('writeautomaticsub') or not
2038                 available_subs):
2039             return None
2040
2041         if self.params.get('allsubtitles', False):
2042             requested_langs = available_subs.keys()
2043         else:
2044             if self.params.get('subtitleslangs', False):
2045                 requested_langs = self.params.get('subtitleslangs')
2046             elif 'en' in available_subs:
2047                 requested_langs = ['en']
2048             else:
2049                 requested_langs = [list(available_subs.keys())[0]]
2050
2051         formats_query = self.params.get('subtitlesformat', 'best')
2052         formats_preference = formats_query.split('/') if formats_query else []
2053         subs = {}
2054         for lang in requested_langs:
2055             formats = available_subs.get(lang)
2056             if formats is None:
2057                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2058                 continue
2059             for ext in formats_preference:
2060                 if ext == 'best':
2061                     f = formats[-1]
2062                     break
2063                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2064                 if matches:
2065                     f = matches[-1]
2066                     break
2067             else:
2068                 f = formats[-1]
2069                 self.report_warning(
2070                     'No subtitle format found matching "%s" for language %s, '
2071                     'using %s' % (formats_query, lang, f['ext']))
2072             subs[lang] = f
2073         return subs
2074
2075     def __forced_printings(self, info_dict, filename, incomplete):
2076         def print_mandatory(field):
2077             if (self.params.get('force%s' % field, False)
2078                     and (not incomplete or info_dict.get(field) is not None)):
2079                 self.to_stdout(info_dict[field])
2080
2081         def print_optional(field):
2082             if (self.params.get('force%s' % field, False)
2083                     and info_dict.get(field) is not None):
2084                 self.to_stdout(info_dict[field])
2085
2086         print_mandatory('title')
2087         print_mandatory('id')
2088         if self.params.get('forceurl', False) and not incomplete:
2089             if info_dict.get('requested_formats') is not None:
2090                 for f in info_dict['requested_formats']:
2091                     self.to_stdout(f['url'] + f.get('play_path', ''))
2092             else:
2093                 # For RTMP URLs, also include the playpath
2094                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2095         print_optional('thumbnail')
2096         print_optional('description')
2097         if self.params.get('forcefilename', False) and filename is not None:
2098             self.to_stdout(filename)
2099         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2100             self.to_stdout(formatSeconds(info_dict['duration']))
2101         print_mandatory('format')
2102         if self.params.get('forcejson', False):
2103             self.post_extract(info_dict)
2104             self.to_stdout(json.dumps(info_dict, default=repr))
2105
2106     def process_info(self, info_dict):
2107         """Process a single resolved IE result."""
2108
2109         assert info_dict.get('_type', 'video') == 'video'
2110
2111         info_dict.setdefault('__postprocessors', [])
2112
2113         max_downloads = self.params.get('max_downloads')
2114         if max_downloads is not None:
2115             if self._num_downloads >= int(max_downloads):
2116                 raise MaxDownloadsReached()
2117
2118         # TODO: backward compatibility, to be removed
2119         info_dict['fulltitle'] = info_dict['title']
2120
2121         if 'format' not in info_dict:
2122             info_dict['format'] = info_dict['ext']
2123
2124         if self._match_entry(info_dict, incomplete=False) is not None:
2125             return
2126
2127         self.post_extract(info_dict)
2128         self._num_downloads += 1
2129
2130         info_dict, _ = self.pre_process(info_dict)
2131
2132         # info_dict['_filename'] needs to be set for backward compatibility
2133         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2134         temp_filename = self.prepare_filename(info_dict, 'temp')
2135         files_to_move = {}
2136
2137         # Forced printings
2138         self.__forced_printings(info_dict, full_filename, incomplete=False)
2139
2140         if self.params.get('simulate', False):
2141             if self.params.get('force_write_download_archive', False):
2142                 self.record_download_archive(info_dict)
2143
2144             # Do nothing else if in simulate mode
2145             return
2146
2147         if full_filename is None:
2148             return
2149
2150         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2151             return
2152         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2153             return
2154
2155         if self.params.get('writedescription', False):
2156             descfn = self.prepare_filename(info_dict, 'description')
2157             if not self._ensure_dir_exists(encodeFilename(descfn)):
2158                 return
2159             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2160                 self.to_screen('[info] Video description is already present')
2161             elif info_dict.get('description') is None:
2162                 self.report_warning('There\'s no description to write.')
2163             else:
2164                 try:
2165                     self.to_screen('[info] Writing video description to: ' + descfn)
2166                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2167                         descfile.write(info_dict['description'])
2168                 except (OSError, IOError):
2169                     self.report_error('Cannot write description file ' + descfn)
2170                     return
2171
2172         if self.params.get('writeannotations', False):
2173             annofn = self.prepare_filename(info_dict, 'annotation')
2174             if not self._ensure_dir_exists(encodeFilename(annofn)):
2175                 return
2176             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2177                 self.to_screen('[info] Video annotations are already present')
2178             elif not info_dict.get('annotations'):
2179                 self.report_warning('There are no annotations to write.')
2180             else:
2181                 try:
2182                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2183                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2184                         annofile.write(info_dict['annotations'])
2185                 except (KeyError, TypeError):
2186                     self.report_warning('There are no annotations to write.')
2187                 except (OSError, IOError):
2188                     self.report_error('Cannot write annotations file: ' + annofn)
2189                     return
2190
2191         def dl(name, info, subtitle=False):
2192             fd = get_suitable_downloader(info, self.params)(self, self.params)
2193             for ph in self._progress_hooks:
2194                 fd.add_progress_hook(ph)
2195             if self.params.get('verbose'):
2196                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2197             new_info = dict(info)
2198             if new_info.get('http_headers') is None:
2199                 new_info['http_headers'] = self._calc_headers(new_info)
2200             return fd.download(name, new_info, subtitle)
2201
2202         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2203                                        self.params.get('writeautomaticsub')])
2204
2205         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2206             # subtitles download errors are already managed as troubles in relevant IE
2207             # that way it will silently go on when used with unsupporting IE
2208             subtitles = info_dict['requested_subtitles']
2209             # ie = self.get_info_extractor(info_dict['extractor_key'])
2210             for sub_lang, sub_info in subtitles.items():
2211                 sub_format = sub_info['ext']
2212                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2213                 sub_filename_final = subtitles_filename(
2214                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2215                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2216                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2217                     sub_info['filepath'] = sub_filename
2218                     files_to_move[sub_filename] = sub_filename_final
2219                 else:
2220                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2221                     if sub_info.get('data') is not None:
2222                         try:
2223                             # Use newline='' to prevent conversion of newline characters
2224                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2225                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2226                                 subfile.write(sub_info['data'])
2227                             sub_info['filepath'] = sub_filename
2228                             files_to_move[sub_filename] = sub_filename_final
2229                         except (OSError, IOError):
2230                             self.report_error('Cannot write subtitles file ' + sub_filename)
2231                             return
2232                     else:
2233                         try:
2234                             dl(sub_filename, sub_info.copy(), subtitle=True)
2235                             sub_info['filepath'] = sub_filename
2236                             files_to_move[sub_filename] = sub_filename_final
2237                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2238                             self.report_warning('Unable to download subtitle for "%s": %s' %
2239                                                 (sub_lang, error_to_compat_str(err)))
2240                             continue
2241
2242         if self.params.get('writeinfojson', False):
2243             infofn = self.prepare_filename(info_dict, 'infojson')
2244             if not self._ensure_dir_exists(encodeFilename(infofn)):
2245                 return
2246             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2247                 self.to_screen('[info] Video metadata is already present')
2248             else:
2249                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2250                 try:
2251                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2252                 except (OSError, IOError):
2253                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2254                     return
2255             info_dict['__infojson_filename'] = infofn
2256
2257         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2258             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2259             thumb_filename = replace_extension(
2260                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2261             files_to_move[thumb_filename_temp] = thumb_filename
2262
2263         # Write internet shortcut files
2264         url_link = webloc_link = desktop_link = False
2265         if self.params.get('writelink', False):
2266             if sys.platform == "darwin":  # macOS.
2267                 webloc_link = True
2268             elif sys.platform.startswith("linux"):
2269                 desktop_link = True
2270             else:  # if sys.platform in ['win32', 'cygwin']:
2271                 url_link = True
2272         if self.params.get('writeurllink', False):
2273             url_link = True
2274         if self.params.get('writewebloclink', False):
2275             webloc_link = True
2276         if self.params.get('writedesktoplink', False):
2277             desktop_link = True
2278
2279         if url_link or webloc_link or desktop_link:
2280             if 'webpage_url' not in info_dict:
2281                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2282                 return
2283             ascii_url = iri_to_uri(info_dict['webpage_url'])
2284
2285         def _write_link_file(extension, template, newline, embed_filename):
2286             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2287             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2288                 self.to_screen('[info] Internet shortcut is already present')
2289             else:
2290                 try:
2291                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2292                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2293                         template_vars = {'url': ascii_url}
2294                         if embed_filename:
2295                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2296                         linkfile.write(template % template_vars)
2297                 except (OSError, IOError):
2298                     self.report_error('Cannot write internet shortcut ' + linkfn)
2299                     return False
2300             return True
2301
2302         if url_link:
2303             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2304                 return
2305         if webloc_link:
2306             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2307                 return
2308         if desktop_link:
2309             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2310                 return
2311
2312         try:
2313             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2314         except PostProcessingError as err:
2315             self.report_error('Preprocessing: %s' % str(err))
2316             return
2317
2318         must_record_download_archive = False
2319         if self.params.get('skip_download', False):
2320             info_dict['filepath'] = temp_filename
2321             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2322             info_dict['__files_to_move'] = files_to_move
2323             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2324         else:
2325             # Download
2326             try:
2327
2328                 def existing_file(*filepaths):
2329                     ext = info_dict.get('ext')
2330                     final_ext = self.params.get('final_ext', ext)
2331                     existing_files = []
2332                     for file in orderedSet(filepaths):
2333                         if final_ext != ext:
2334                             converted = replace_extension(file, final_ext, ext)
2335                             if os.path.exists(encodeFilename(converted)):
2336                                 existing_files.append(converted)
2337                         if os.path.exists(encodeFilename(file)):
2338                             existing_files.append(file)
2339
2340                     if not existing_files or self.params.get('overwrites', False):
2341                         for file in orderedSet(existing_files):
2342                             self.report_file_delete(file)
2343                             os.remove(encodeFilename(file))
2344                         return None
2345
2346                     self.report_file_already_downloaded(existing_files[0])
2347                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2348                     return existing_files[0]
2349
2350                 success = True
2351                 if info_dict.get('requested_formats') is not None:
2352                     downloaded = []
2353                     merger = FFmpegMergerPP(self)
2354                     if self.params.get('allow_unplayable_formats'):
2355                         self.report_warning(
2356                             'You have requested merging of multiple formats '
2357                             'while also allowing unplayable formats to be downloaded. '
2358                             'The formats won\'t be merged to prevent data corruption.')
2359                     elif not merger.available:
2360                         self.report_warning(
2361                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2362                             'The formats won\'t be merged.')
2363
2364                     def compatible_formats(formats):
2365                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2366                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2367                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2368                         if len(video_formats) > 2 or len(audio_formats) > 2:
2369                             return False
2370
2371                         # Check extension
2372                         exts = set(format.get('ext') for format in formats)
2373                         COMPATIBLE_EXTS = (
2374                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2375                             set(('webm',)),
2376                         )
2377                         for ext_sets in COMPATIBLE_EXTS:
2378                             if ext_sets.issuperset(exts):
2379                                 return True
2380                         # TODO: Check acodec/vcodec
2381                         return False
2382
2383                     requested_formats = info_dict['requested_formats']
2384                     old_ext = info_dict['ext']
2385                     if self.params.get('merge_output_format') is None:
2386                         if not compatible_formats(requested_formats):
2387                             info_dict['ext'] = 'mkv'
2388                             self.report_warning(
2389                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2390                         if (info_dict['ext'] == 'webm'
2391                                 and self.params.get('writethumbnail', False)
2392                                 and info_dict.get('thumbnails')):
2393                             info_dict['ext'] = 'mkv'
2394                             self.report_warning(
2395                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2396
2397                     def correct_ext(filename):
2398                         filename_real_ext = os.path.splitext(filename)[1][1:]
2399                         filename_wo_ext = (
2400                             os.path.splitext(filename)[0]
2401                             if filename_real_ext == old_ext
2402                             else filename)
2403                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2404
2405                     # Ensure filename always has a correct extension for successful merge
2406                     full_filename = correct_ext(full_filename)
2407                     temp_filename = correct_ext(temp_filename)
2408                     dl_filename = existing_file(full_filename, temp_filename)
2409                     info_dict['__real_download'] = False
2410                     if dl_filename is None:
2411                         for f in requested_formats:
2412                             new_info = dict(info_dict)
2413                             new_info.update(f)
2414                             fname = prepend_extension(
2415                                 self.prepare_filename(new_info, 'temp'),
2416                                 'f%s' % f['format_id'], new_info['ext'])
2417                             if not self._ensure_dir_exists(fname):
2418                                 return
2419                             downloaded.append(fname)
2420                             partial_success, real_download = dl(fname, new_info)
2421                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2422                             success = success and partial_success
2423                         if merger.available and not self.params.get('allow_unplayable_formats'):
2424                             info_dict['__postprocessors'].append(merger)
2425                             info_dict['__files_to_merge'] = downloaded
2426                             # Even if there were no downloads, it is being merged only now
2427                             info_dict['__real_download'] = True
2428                         else:
2429                             for file in downloaded:
2430                                 files_to_move[file] = None
2431                 else:
2432                     # Just a single file
2433                     dl_filename = existing_file(full_filename, temp_filename)
2434                     if dl_filename is None:
2435                         success, real_download = dl(temp_filename, info_dict)
2436                         info_dict['__real_download'] = real_download
2437
2438                 dl_filename = dl_filename or temp_filename
2439                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2440
2441             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2442                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2443                 return
2444             except (OSError, IOError) as err:
2445                 raise UnavailableVideoError(err)
2446             except (ContentTooShortError, ) as err:
2447                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2448                 return
2449
2450             if success and full_filename != '-':
2451                 # Fixup content
2452                 fixup_policy = self.params.get('fixup')
2453                 if fixup_policy is None:
2454                     fixup_policy = 'detect_or_warn'
2455
2456                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2457
2458                 stretched_ratio = info_dict.get('stretched_ratio')
2459                 if stretched_ratio is not None and stretched_ratio != 1:
2460                     if fixup_policy == 'warn':
2461                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2462                             info_dict['id'], stretched_ratio))
2463                     elif fixup_policy == 'detect_or_warn':
2464                         stretched_pp = FFmpegFixupStretchedPP(self)
2465                         if stretched_pp.available:
2466                             info_dict['__postprocessors'].append(stretched_pp)
2467                         else:
2468                             self.report_warning(
2469                                 '%s: Non-uniform pixel ratio (%s). %s'
2470                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2471                     else:
2472                         assert fixup_policy in ('ignore', 'never')
2473
2474                 if (info_dict.get('requested_formats') is None
2475                         and info_dict.get('container') == 'm4a_dash'
2476                         and info_dict.get('ext') == 'm4a'):
2477                     if fixup_policy == 'warn':
2478                         self.report_warning(
2479                             '%s: writing DASH m4a. '
2480                             'Only some players support this container.'
2481                             % info_dict['id'])
2482                     elif fixup_policy == 'detect_or_warn':
2483                         fixup_pp = FFmpegFixupM4aPP(self)
2484                         if fixup_pp.available:
2485                             info_dict['__postprocessors'].append(fixup_pp)
2486                         else:
2487                             self.report_warning(
2488                                 '%s: writing DASH m4a. '
2489                                 'Only some players support this container. %s'
2490                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2491                     else:
2492                         assert fixup_policy in ('ignore', 'never')
2493
2494                 if ('protocol' in info_dict
2495                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2496                     if fixup_policy == 'warn':
2497                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2498                             info_dict['id']))
2499                     elif fixup_policy == 'detect_or_warn':
2500                         fixup_pp = FFmpegFixupM3u8PP(self)
2501                         if fixup_pp.available:
2502                             info_dict['__postprocessors'].append(fixup_pp)
2503                         else:
2504                             self.report_warning(
2505                                 '%s: malformed AAC bitstream detected. %s'
2506                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2507                     else:
2508                         assert fixup_policy in ('ignore', 'never')
2509
2510                 try:
2511                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2512                 except PostProcessingError as err:
2513                     self.report_error('Postprocessing: %s' % str(err))
2514                     return
2515                 try:
2516                     for ph in self._post_hooks:
2517                         ph(info_dict['filepath'])
2518                 except Exception as err:
2519                     self.report_error('post hooks: %s' % str(err))
2520                     return
2521                 must_record_download_archive = True
2522
2523         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2524             self.record_download_archive(info_dict)
2525         max_downloads = self.params.get('max_downloads')
2526         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2527             raise MaxDownloadsReached()
2528
2529     def download(self, url_list):
2530         """Download a given list of URLs."""
2531         outtmpl = self.outtmpl_dict['default']
2532         if (len(url_list) > 1
2533                 and outtmpl != '-'
2534                 and '%' not in outtmpl
2535                 and self.params.get('max_downloads') != 1):
2536             raise SameFileError(outtmpl)
2537
2538         for url in url_list:
2539             try:
2540                 # It also downloads the videos
2541                 res = self.extract_info(
2542                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2543             except UnavailableVideoError:
2544                 self.report_error('unable to download video')
2545             except MaxDownloadsReached:
2546                 self.to_screen('[info] Maximum number of downloaded files reached')
2547                 raise
2548             except ExistingVideoReached:
2549                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2550                 raise
2551             except RejectedVideoReached:
2552                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2553                 raise
2554             else:
2555                 if self.params.get('dump_single_json', False):
2556                     self.post_extract(res)
2557                     self.to_stdout(json.dumps(res, default=repr))
2558
2559         return self._download_retcode
2560
2561     def download_with_info_file(self, info_filename):
2562         with contextlib.closing(fileinput.FileInput(
2563                 [info_filename], mode='r',
2564                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2565             # FileInput doesn't have a read method, we can't call json.load
2566             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2567         try:
2568             self.process_ie_result(info, download=True)
2569         except (DownloadError, EntryNotInPlaylist):
2570             webpage_url = info.get('webpage_url')
2571             if webpage_url is not None:
2572                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2573                 return self.download([webpage_url])
2574             else:
2575                 raise
2576         return self._download_retcode
2577
2578     @staticmethod
2579     def filter_requested_info(info_dict, actually_filter=True):
2580         if not actually_filter:
2581             info_dict['epoch'] = int(time.time())
2582             return info_dict
2583         exceptions = {
2584             'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
2585             'keep': ['_type'],
2586         }
2587         keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2588         filter_fn = lambda obj: (
2589             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2590             else obj if not isinstance(obj, dict)
2591             else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
2592         return filter_fn(info_dict)
2593
2594     def run_pp(self, pp, infodict):
2595         files_to_delete = []
2596         if '__files_to_move' not in infodict:
2597             infodict['__files_to_move'] = {}
2598         files_to_delete, infodict = pp.run(infodict)
2599         if not files_to_delete:
2600             return infodict
2601
2602         if self.params.get('keepvideo', False):
2603             for f in files_to_delete:
2604                 infodict['__files_to_move'].setdefault(f, '')
2605         else:
2606             for old_filename in set(files_to_delete):
2607                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2608                 try:
2609                     os.remove(encodeFilename(old_filename))
2610                 except (IOError, OSError):
2611                     self.report_warning('Unable to remove downloaded original file')
2612                 if old_filename in infodict['__files_to_move']:
2613                     del infodict['__files_to_move'][old_filename]
2614         return infodict
2615
2616     @staticmethod
2617     def post_extract(info_dict):
2618         def actual_post_extract(info_dict):
2619             if info_dict.get('_type') in ('playlist', 'multi_video'):
2620                 for video_dict in info_dict.get('entries', {}):
2621                     actual_post_extract(video_dict or {})
2622                 return
2623
2624             if '__post_extractor' not in info_dict:
2625                 return
2626             post_extractor = info_dict['__post_extractor']
2627             if post_extractor:
2628                 info_dict.update(post_extractor().items())
2629             del info_dict['__post_extractor']
2630             return
2631
2632         actual_post_extract(info_dict or {})
2633
2634     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2635         info = dict(ie_info)
2636         info['__files_to_move'] = files_to_move or {}
2637         for pp in self._pps[key]:
2638             info = self.run_pp(pp, info)
2639         return info, info.pop('__files_to_move', None)
2640
2641     def post_process(self, filename, ie_info, files_to_move=None):
2642         """Run all the postprocessors on the given file."""
2643         info = dict(ie_info)
2644         info['filepath'] = filename
2645         info['__files_to_move'] = files_to_move or {}
2646
2647         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2648             info = self.run_pp(pp, info)
2649         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2650         del info['__files_to_move']
2651         for pp in self._pps['after_move']:
2652             info = self.run_pp(pp, info)
2653         return info
2654
2655     def _make_archive_id(self, info_dict):
2656         video_id = info_dict.get('id')
2657         if not video_id:
2658             return
2659         # Future-proof against any change in case
2660         # and backwards compatibility with prior versions
2661         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2662         if extractor is None:
2663             url = str_or_none(info_dict.get('url'))
2664             if not url:
2665                 return
2666             # Try to find matching extractor for the URL and take its ie_key
2667             for ie in self._ies:
2668                 if ie.suitable(url):
2669                     extractor = ie.ie_key()
2670                     break
2671             else:
2672                 return
2673         return '%s %s' % (extractor.lower(), video_id)
2674
2675     def in_download_archive(self, info_dict):
2676         fn = self.params.get('download_archive')
2677         if fn is None:
2678             return False
2679
2680         vid_id = self._make_archive_id(info_dict)
2681         if not vid_id:
2682             return False  # Incomplete video information
2683
2684         return vid_id in self.archive
2685
2686     def record_download_archive(self, info_dict):
2687         fn = self.params.get('download_archive')
2688         if fn is None:
2689             return
2690         vid_id = self._make_archive_id(info_dict)
2691         assert vid_id
2692         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2693             archive_file.write(vid_id + '\n')
2694         self.archive.add(vid_id)
2695
2696     @staticmethod
2697     def format_resolution(format, default='unknown'):
2698         if format.get('vcodec') == 'none':
2699             return 'audio only'
2700         if format.get('resolution') is not None:
2701             return format['resolution']
2702         if format.get('width') and format.get('height'):
2703             res = '%dx%d' % (format['width'], format['height'])
2704         elif format.get('height'):
2705             res = '%sp' % format['height']
2706         elif format.get('width'):
2707             res = '%dx?' % format['width']
2708         else:
2709             res = default
2710         return res
2711
2712     def _format_note(self, fdict):
2713         res = ''
2714         if fdict.get('ext') in ['f4f', 'f4m']:
2715             res += '(unsupported) '
2716         if fdict.get('language'):
2717             if res:
2718                 res += ' '
2719             res += '[%s] ' % fdict['language']
2720         if fdict.get('format_note') is not None:
2721             res += fdict['format_note'] + ' '
2722         if fdict.get('tbr') is not None:
2723             res += '%4dk ' % fdict['tbr']
2724         if fdict.get('container') is not None:
2725             if res:
2726                 res += ', '
2727             res += '%s container' % fdict['container']
2728         if (fdict.get('vcodec') is not None
2729                 and fdict.get('vcodec') != 'none'):
2730             if res:
2731                 res += ', '
2732             res += fdict['vcodec']
2733             if fdict.get('vbr') is not None:
2734                 res += '@'
2735         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2736             res += 'video@'
2737         if fdict.get('vbr') is not None:
2738             res += '%4dk' % fdict['vbr']
2739         if fdict.get('fps') is not None:
2740             if res:
2741                 res += ', '
2742             res += '%sfps' % fdict['fps']
2743         if fdict.get('acodec') is not None:
2744             if res:
2745                 res += ', '
2746             if fdict['acodec'] == 'none':
2747                 res += 'video only'
2748             else:
2749                 res += '%-5s' % fdict['acodec']
2750         elif fdict.get('abr') is not None:
2751             if res:
2752                 res += ', '
2753             res += 'audio'
2754         if fdict.get('abr') is not None:
2755             res += '@%3dk' % fdict['abr']
2756         if fdict.get('asr') is not None:
2757             res += ' (%5dHz)' % fdict['asr']
2758         if fdict.get('filesize') is not None:
2759             if res:
2760                 res += ', '
2761             res += format_bytes(fdict['filesize'])
2762         elif fdict.get('filesize_approx') is not None:
2763             if res:
2764                 res += ', '
2765             res += '~' + format_bytes(fdict['filesize_approx'])
2766         return res
2767
2768     def _format_note_table(self, f):
2769         def join_fields(*vargs):
2770             return ', '.join((val for val in vargs if val != ''))
2771
2772         return join_fields(
2773             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2774             format_field(f, 'language', '[%s]'),
2775             format_field(f, 'format_note'),
2776             format_field(f, 'container', ignore=(None, f.get('ext'))),
2777             format_field(f, 'asr', '%5dHz'))
2778
2779     def list_formats(self, info_dict):
2780         formats = info_dict.get('formats', [info_dict])
2781         new_format = self.params.get('listformats_table', False)
2782         if new_format:
2783             table = [
2784                 [
2785                     format_field(f, 'format_id'),
2786                     format_field(f, 'ext'),
2787                     self.format_resolution(f),
2788                     format_field(f, 'fps', '%d'),
2789                     '|',
2790                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2791                     format_field(f, 'tbr', '%4dk'),
2792                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
2793                     '|',
2794                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2795                     format_field(f, 'vbr', '%4dk'),
2796                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2797                     format_field(f, 'abr', '%3dk'),
2798                     format_field(f, 'asr', '%5dHz'),
2799                     self._format_note_table(f)]
2800                 for f in formats
2801                 if f.get('preference') is None or f['preference'] >= -1000]
2802             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2803                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2804         else:
2805             table = [
2806                 [
2807                     format_field(f, 'format_id'),
2808                     format_field(f, 'ext'),
2809                     self.format_resolution(f),
2810                     self._format_note(f)]
2811                 for f in formats
2812                 if f.get('preference') is None or f['preference'] >= -1000]
2813             header_line = ['format code', 'extension', 'resolution', 'note']
2814
2815         self.to_screen(
2816             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2817                 header_line,
2818                 table,
2819                 delim=new_format,
2820                 extraGap=(0 if new_format else 1),
2821                 hideEmpty=new_format)))
2822
2823     def list_thumbnails(self, info_dict):
2824         thumbnails = info_dict.get('thumbnails')
2825         if not thumbnails:
2826             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2827             return
2828
2829         self.to_screen(
2830             '[info] Thumbnails for %s:' % info_dict['id'])
2831         self.to_screen(render_table(
2832             ['ID', 'width', 'height', 'URL'],
2833             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2834
2835     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2836         if not subtitles:
2837             self.to_screen('%s has no %s' % (video_id, name))
2838             return
2839         self.to_screen(
2840             'Available %s for %s:' % (name, video_id))
2841         self.to_screen(render_table(
2842             ['Language', 'formats'],
2843             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2844                 for lang, formats in subtitles.items()]))
2845
2846     def urlopen(self, req):
2847         """ Start an HTTP download """
2848         if isinstance(req, compat_basestring):
2849             req = sanitized_Request(req)
2850         return self._opener.open(req, timeout=self._socket_timeout)
2851
2852     def print_debug_header(self):
2853         if not self.params.get('verbose'):
2854             return
2855
2856         if type('') is not compat_str:
2857             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2858             self.report_warning(
2859                 'Your Python is broken! Update to a newer and supported version')
2860
2861         stdout_encoding = getattr(
2862             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2863         encoding_str = (
2864             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2865                 locale.getpreferredencoding(),
2866                 sys.getfilesystemencoding(),
2867                 stdout_encoding,
2868                 self.get_encoding()))
2869         write_string(encoding_str, encoding=None)
2870
2871         source = (
2872             '(exe)' if hasattr(sys, 'frozen')
2873             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2874             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2875             else '')
2876         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
2877         if _LAZY_LOADER:
2878             self._write_string('[debug] Lazy loading extractors enabled\n')
2879         if _PLUGIN_CLASSES:
2880             self._write_string(
2881                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2882         try:
2883             sp = subprocess.Popen(
2884                 ['git', 'rev-parse', '--short', 'HEAD'],
2885                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2886                 cwd=os.path.dirname(os.path.abspath(__file__)))
2887             out, err = process_communicate_or_kill(sp)
2888             out = out.decode().strip()
2889             if re.match('[0-9a-f]+', out):
2890                 self._write_string('[debug] Git HEAD: %s\n' % out)
2891         except Exception:
2892             try:
2893                 sys.exc_clear()
2894             except Exception:
2895                 pass
2896
2897         def python_implementation():
2898             impl_name = platform.python_implementation()
2899             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2900                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2901             return impl_name
2902
2903         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2904             platform.python_version(),
2905             python_implementation(),
2906             platform.architecture()[0],
2907             platform_name()))
2908
2909         exe_versions = FFmpegPostProcessor.get_versions(self)
2910         exe_versions['rtmpdump'] = rtmpdump_version()
2911         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2912         exe_str = ', '.join(
2913             '%s %s' % (exe, v)
2914             for exe, v in sorted(exe_versions.items())
2915             if v
2916         )
2917         if not exe_str:
2918             exe_str = 'none'
2919         self._write_string('[debug] exe versions: %s\n' % exe_str)
2920
2921         proxy_map = {}
2922         for handler in self._opener.handlers:
2923             if hasattr(handler, 'proxies'):
2924                 proxy_map.update(handler.proxies)
2925         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2926
2927         if self.params.get('call_home', False):
2928             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2929             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2930             return
2931             latest_version = self.urlopen(
2932                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2933             if version_tuple(latest_version) > version_tuple(__version__):
2934                 self.report_warning(
2935                     'You are using an outdated version (newest version: %s)! '
2936                     'See https://yt-dl.org/update if you need help updating.' %
2937                     latest_version)
2938
2939     def _setup_opener(self):
2940         timeout_val = self.params.get('socket_timeout')
2941         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2942
2943         opts_cookiefile = self.params.get('cookiefile')
2944         opts_proxy = self.params.get('proxy')
2945
2946         if opts_cookiefile is None:
2947             self.cookiejar = compat_cookiejar.CookieJar()
2948         else:
2949             opts_cookiefile = expand_path(opts_cookiefile)
2950             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2951             if os.access(opts_cookiefile, os.R_OK):
2952                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2953
2954         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2955         if opts_proxy is not None:
2956             if opts_proxy == '':
2957                 proxies = {}
2958             else:
2959                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2960         else:
2961             proxies = compat_urllib_request.getproxies()
2962             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2963             if 'http' in proxies and 'https' not in proxies:
2964                 proxies['https'] = proxies['http']
2965         proxy_handler = PerRequestProxyHandler(proxies)
2966
2967         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2968         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2969         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2970         redirect_handler = YoutubeDLRedirectHandler()
2971         data_handler = compat_urllib_request_DataHandler()
2972
2973         # When passing our own FileHandler instance, build_opener won't add the
2974         # default FileHandler and allows us to disable the file protocol, which
2975         # can be used for malicious purposes (see
2976         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2977         file_handler = compat_urllib_request.FileHandler()
2978
2979         def file_open(*args, **kwargs):
2980             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
2981         file_handler.file_open = file_open
2982
2983         opener = compat_urllib_request.build_opener(
2984             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2985
2986         # Delete the default user-agent header, which would otherwise apply in
2987         # cases where our custom HTTP handler doesn't come into play
2988         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2989         opener.addheaders = []
2990         self._opener = opener
2991
2992     def encode(self, s):
2993         if isinstance(s, bytes):
2994             return s  # Already encoded
2995
2996         try:
2997             return s.encode(self.get_encoding())
2998         except UnicodeEncodeError as err:
2999             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3000             raise
3001
3002     def get_encoding(self):
3003         encoding = self.params.get('encoding')
3004         if encoding is None:
3005             encoding = preferredencoding()
3006         return encoding
3007
3008     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3009         write_all = self.params.get('write_all_thumbnails', False)
3010         thumbnails = []
3011         if write_all or self.params.get('writethumbnail', False):
3012             thumbnails = info_dict.get('thumbnails') or []
3013         multiple = write_all and len(thumbnails) > 1
3014
3015         ret = []
3016         for t in thumbnails[::1 if write_all else -1]:
3017             thumb_ext = determine_ext(t['url'], 'jpg')
3018             suffix = '%s.' % t['id'] if multiple else ''
3019             thumb_display_id = '%s ' % t['id'] if multiple else ''
3020             t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3021
3022             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3023                 ret.append(suffix + thumb_ext)
3024                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3025                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3026             else:
3027                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3028                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3029                 try:
3030                     uf = self.urlopen(t['url'])
3031                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3032                         shutil.copyfileobj(uf, thumbf)
3033                     ret.append(suffix + thumb_ext)
3034                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3035                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3036                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
3037                     self.report_warning('Unable to download thumbnail "%s": %s' %
3038                                         (t['url'], error_to_compat_str(err)))
3039             if ret and not write_all:
3040                 break
3041         return ret