youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     error_to_compat_str,
  61     expand_path,
  62     ExtractorError,
  63     format_bytes,
  64     format_field,
  65     formatSeconds,
  66     GeoRestrictedError,
  67     int_or_none,
  68     iri_to_uri,
  69     ISO3166Utils,
  70     locked_file,
  71     make_HTTPS_handler,
  72     MaxDownloadsReached,
  73     orderedSet,
  74     PagedList,
  75     parse_filesize,
  76     PerRequestProxyHandler,
  77     platform_name,
  78     PostProcessingError,
  79     preferredencoding,
  80     prepend_extension,
  81     register_socks_protocols,
  82     render_table,
  83     replace_extension,
  84     SameFileError,
  85     sanitize_filename,
  86     sanitize_path,
  87     sanitize_url,
  88     sanitized_Request,
  89     std_headers,
  90     str_or_none,
  91     subtitles_filename,
  92     to_high_limit_path,
  93     UnavailableVideoError,
  94     url_basename,
  95     version_tuple,
  96     write_json_file,
  97     write_string,
  98     YoutubeDLCookieJar,
  99     YoutubeDLCookieProcessor,
 100     YoutubeDLHandler,
 101     YoutubeDLRedirectHandler,
 102     process_communicate_or_kill,
 103 )
 104 from .cache import Cache
 105 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 106 from .extractor.openload import PhantomJSwrapper
 107 from .downloader import get_suitable_downloader
 108 from .downloader.rtmp import rtmpdump_version
 109 from .postprocessor import (
 110     FFmpegFixupM3u8PP,
 111     FFmpegFixupM4aPP,
 112     FFmpegFixupStretchedPP,
 113     FFmpegMergerPP,
 114     FFmpegPostProcessor,
 115     FFmpegSubtitlesConvertorPP,
 116     get_postprocessor,
 117 )
 118 from .version import __version__
 119
 120 if compat_os_name == 'nt':
 121     import ctypes
 122
 123
 124 class YoutubeDL(object):
 125     """YoutubeDL class.
 126
 127     YoutubeDL objects are the ones responsible of downloading the
 128     actual video file and writing it to disk if the user has requested
 129     it, among some other tasks. In most cases there should be one per
 130     program. As, given a video URL, the downloader doesn't know how to
 131     extract all the needed information, task that InfoExtractors do, it
 132     has to pass the URL to one of them.
 133
 134     For this, YoutubeDL objects have a method that allows
 135     InfoExtractors to be registered in a given order. When it is passed
 136     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 137     finds that reports being able to handle it. The InfoExtractor extracts
 138     all the information about the video or videos the URL refers to, and
 139     YoutubeDL process the extracted information, possibly using a File
 140     Downloader to download the video.
 141
 142     YoutubeDL objects accept a lot of parameters. In order not to saturate
 143     the object constructor with arguments, it receives a dictionary of
 144     options instead. These options are available through the params
 145     attribute for the InfoExtractors to use. The YoutubeDL also
 146     registers itself as the downloader in charge for the InfoExtractors
 147     that are added to it, so this is a "mutual registration".
 148
 149     Available options:
 150
 151     username:          Username for authentication purposes.
 152     password:          Password for authentication purposes.
 153     videopassword:     Password for accessing a video.
 154     ap_mso:            Adobe Pass multiple-system operator identifier.
 155     ap_username:       Multiple-system operator account username.
 156     ap_password:       Multiple-system operator account password.
 157     usenetrc:          Use netrc for authentication instead.
 158     verbose:           Print additional info to stdout.
 159     quiet:             Do not print messages to stdout.
 160     no_warnings:       Do not print out anything for warnings.
 161     forceurl:          Force printing final URL.
 162     forcetitle:        Force printing title.
 163     forceid:           Force printing ID.
 164     forcethumbnail:    Force printing thumbnail URL.
 165     forcedescription:  Force printing description.
 166     forcefilename:     Force printing final filename.
 167     forceduration:     Force printing duration.
 168     forcejson:         Force printing info_dict as JSON.
 169     dump_single_json:  Force printing the info_dict of the whole playlist
 170                        (or video) as a single JSON line.
 171     force_write_download_archive: Force writing download archive regardless of
 172                        'skip_download' or 'simulate'.
 173     simulate:          Do not download the video files.
 174     format:            Video format code. see "FORMAT SELECTION" for more details.
 175     format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
 176     format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
 177     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
 178     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
 179     outtmpl:           Template for output names.
 180     restrictfilenames: Do not allow "&" and spaces in file names.
 181     trim_file_name:    Limit length of filename (extension excluded).
 182     ignoreerrors:      Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
 183     force_generic_extractor: Force downloader to use the generic extractor
 184     overwrites:        Overwrite all video and metadata files if True,
 185                        overwrite only non-video files if None
 186                        and don't overwrite any file if False
 187     playliststart:     Playlist item to start at.
 188     playlistend:       Playlist item to end at.
 189     playlist_items:    Specific indices of playlist to download.
 190     playlistreverse:   Download playlist items in reverse order.
 191     playlistrandom:    Download playlist items in random order.
 192     matchtitle:        Download only matching titles.
 193     rejecttitle:       Reject downloads for matching titles.
 194     logger:            Log messages to a logging.Logger instance.
 195     logtostderr:       Log messages to stderr instead of stdout.
 196     writedescription:  Write the video description to a .description file
 197     writeinfojson:     Write the video description to a .info.json file
 198     writeannotations:  Write the video annotations to a .annotations.xml file
 199     writethumbnail:    Write the thumbnail image to a file
 200     write_all_thumbnails:  Write all thumbnail formats to files
 201     writelink:         Write an internet shortcut file, depending on the
 202                        current platform (.url/.webloc/.desktop)
 203     writeurllink:      Write a Windows internet shortcut file (.url)
 204     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 205     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 206     writesubtitles:    Write the video subtitles to a file
 207     writeautomaticsub: Write the automatically generated subtitles to a file
 208     allsubtitles:      Downloads all the subtitles of the video
 209                        (requires writesubtitles or writeautomaticsub)
 210     listsubtitles:     Lists all available subtitles for the video
 211     subtitlesformat:   The format code for subtitles
 212     subtitleslangs:    List of languages of the subtitles to download
 213     keepvideo:         Keep the video file after post-processing
 214     daterange:         A DateRange object, download only if the upload_date is in the range.
 215     skip_download:     Skip the actual download of the video file
 216     cachedir:          Location of the cache files in the filesystem.
 217                        False to disable filesystem cache.
 218     noplaylist:        Download single video instead of a playlist if in doubt.
 219     age_limit:         An integer representing the user's age in years.
 220                        Unsuitable videos for the given age are skipped.
 221     min_views:         An integer representing the minimum view count the video
 222                        must have in order to not be skipped.
 223                        Videos without view count information are always
 224                        downloaded. None for no limit.
 225     max_views:         An integer representing the maximum view count.
 226                        Videos that are more popular than that are not
 227                        downloaded.
 228                        Videos without view count information are always
 229                        downloaded. None for no limit.
 230     download_archive:  File name of a file where all downloads are recorded.
 231                        Videos already present in the file are not downloaded
 232                        again.
 233     break_on_existing: Stop the download process after attempting to download a file that's
 234                        in the archive.
 235     cookiefile:        File name where cookies should be read from and dumped to.
 236     nocheckcertificate:Do not verify SSL certificates
 237     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 238                        At the moment, this is only supported by YouTube.
 239     proxy:             URL of the proxy server to use
 240     geo_verification_proxy:  URL of the proxy to use for IP address verification
 241                        on geo-restricted sites.
 242     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 243     bidi_workaround:   Work around buggy terminals without bidirectional text
 244                        support, using fridibi
 245     debug_printtraffic:Print out sent and received HTTP traffic
 246     include_ads:       Download ads as well
 247     default_search:    Prepend this string if an input url is not valid.
 248                        'auto' for elaborate guessing
 249     encoding:          Use this encoding instead of the system-specified.
 250     extract_flat:      Do not resolve URLs, return the immediate result.
 251                        Pass in 'in_playlist' to only show this behavior for
 252                        playlist items.
 253     postprocessors:    A list of dictionaries, each with an entry
 254                        * key:  The name of the postprocessor. See
 255                                youtube_dlc/postprocessor/__init__.py for a list.
 256                        as well as any further keyword arguments for the
 257                        postprocessor.
 258     post_hooks:        A list of functions that get called as the final step
 259                        for each video file, after all postprocessors have been
 260                        called. The filename will be passed as the only argument.
 261     progress_hooks:    A list of functions that get called on download
 262                        progress, with a dictionary with the entries
 263                        * status: One of "downloading", "error", or "finished".
 264                                  Check this first and ignore unknown values.
 265
 266                        If status is one of "downloading", or "finished", the
 267                        following properties may also be present:
 268                        * filename: The final filename (always present)
 269                        * tmpfilename: The filename we're currently writing to
 270                        * downloaded_bytes: Bytes on disk
 271                        * total_bytes: Size of the whole file, None if unknown
 272                        * total_bytes_estimate: Guess of the eventual file size,
 273                                                None if unavailable.
 274                        * elapsed: The number of seconds since download started.
 275                        * eta: The estimated time in seconds, None if unknown
 276                        * speed: The download speed in bytes/second, None if
 277                                 unknown
 278                        * fragment_index: The counter of the currently
 279                                          downloaded video fragment.
 280                        * fragment_count: The number of fragments (= individual
 281                                          files that will be merged)
 282
 283                        Progress hooks are guaranteed to be called at least once
 284                        (with status "finished") if the download is successful.
 285     merge_output_format: Extension to use when merging formats.
 286     fixup:             Automatically correct known faults of the file.
 287                        One of:
 288                        - "never": do nothing
 289                        - "warn": only emit a warning
 290                        - "detect_or_warn": check whether we can do anything
 291                                            about it, warn otherwise (default)
 292     source_address:    Client-side IP address to bind to.
 293     call_home:         Boolean, true iff we are allowed to contact the
 294                        youtube-dlc servers for debugging.
 295     sleep_interval:    Number of seconds to sleep before each download when
 296                        used alone or a lower bound of a range for randomized
 297                        sleep before each download (minimum possible number
 298                        of seconds to sleep) when used along with
 299                        max_sleep_interval.
 300     max_sleep_interval:Upper bound of a range for randomized sleep before each
 301                        download (maximum possible number of seconds to sleep).
 302                        Must only be used along with sleep_interval.
 303                        Actual sleep time will be a random float from range
 304                        [sleep_interval; max_sleep_interval].
 305     listformats:       Print an overview of available video formats and exit.
 306     list_thumbnails:   Print a table of all thumbnails and exit.
 307     match_filter:      A function that gets called with the info_dict of
 308                        every video.
 309                        If it returns a message, the video is ignored.
 310                        If it returns None, the video is downloaded.
 311                        match_filter_func in utils.py is one example for this.
 312     no_color:          Do not emit color codes in output.
 313     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 314                        HTTP header
 315     geo_bypass_country:
 316                        Two-letter ISO 3166-2 country code that will be used for
 317                        explicit geographic restriction bypassing via faking
 318                        X-Forwarded-For HTTP header
 319     geo_bypass_ip_block:
 320                        IP range in CIDR notation that will be used similarly to
 321                        geo_bypass_country
 322
 323     The following options determine which downloader is picked:
 324     external_downloader: Executable of the external downloader to call.
 325                        None or unset for standard (built-in) downloader.
 326     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 327                        if True, otherwise use ffmpeg/avconv if False, otherwise
 328                        use downloader suggested by extractor if None.
 329
 330     The following parameters are not used by YoutubeDL itself, they are used by
 331     the downloader (see youtube_dlc/downloader/common.py):
 332     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 333     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 334     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 335     http_chunk_size.
 336
 337     The following options are used by the post processors:
 338     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 339                        otherwise prefer ffmpeg.
 340     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 341                        to the binary or its containing directory.
 342     postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
 343                         of additional command-line arguments for the postprocessor.
 344                         Use 'default' as the name for arguments to passed to all PP.
 345
 346     The following options are used by the Youtube extractor:
 347     youtube_include_dash_manifest: If True (default), DASH manifests and related
 348                         data will be downloaded and processed by extractor.
 349                         You can reduce network I/O by disabling it if you don't
 350                         care about DASH.
 351     """
 352
 353     _NUMERIC_FIELDS = set((
 354         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 355         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 356         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 357         'average_rating', 'comment_count', 'age_limit',
 358         'start_time', 'end_time',
 359         'chapter_number', 'season_number', 'episode_number',
 360         'track_number', 'disc_number', 'release_year',
 361         'playlist_index',
 362     ))
 363
 364     params = None
 365     _ies = []
 366     _pps = []
 367     _download_retcode = None
 368     _num_downloads = None
 369     _screen_file = None
 370
 371     def __init__(self, params=None, auto_init=True):
 372         """Create a FileDownloader object with the given options."""
 373         if params is None:
 374             params = {}
 375         self._ies = []
 376         self._ies_instances = {}
 377         self._pps = []
 378         self._post_hooks = []
 379         self._progress_hooks = []
 380         self._download_retcode = 0
 381         self._num_downloads = 0
 382         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 383         self._err_file = sys.stderr
 384         self.params = {
 385             # Default parameters
 386             'nocheckcertificate': False,
 387         }
 388         self.params.update(params)
 389         self.cache = Cache(self)
 390         self.archive = set()
 391
 392         """Preload the archive, if any is specified"""
 393         def preload_download_archive(self):
 394             fn = self.params.get('download_archive')
 395             if fn is None:
 396                 return False
 397             try:
 398                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 399                     for line in archive_file:
 400                         self.archive.add(line.strip())
 401             except IOError as ioe:
 402                 if ioe.errno != errno.ENOENT:
 403                     raise
 404                 return False
 405             return True
 406
 407         def check_deprecated(param, option, suggestion):
 408             if self.params.get(param) is not None:
 409                 self.report_warning(
 410                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 411                 return True
 412             return False
 413
 414         if self.params.get('verbose'):
 415             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 416
 417         preload_download_archive(self)
 418
 419         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 420             if self.params.get('geo_verification_proxy') is None:
 421                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 422
 423         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 424         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 425         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 426
 427         if params.get('bidi_workaround', False):
 428             try:
 429                 import pty
 430                 master, slave = pty.openpty()
 431                 width = compat_get_terminal_size().columns
 432                 if width is None:
 433                     width_args = []
 434                 else:
 435                     width_args = ['-w', str(width)]
 436                 sp_kwargs = dict(
 437                     stdin=subprocess.PIPE,
 438                     stdout=slave,
 439                     stderr=self._err_file)
 440                 try:
 441                     self._output_process = subprocess.Popen(
 442                         ['bidiv'] + width_args, **sp_kwargs
 443                     )
 444                 except OSError:
 445                     self._output_process = subprocess.Popen(
 446                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 447                 self._output_channel = os.fdopen(master, 'rb')
 448             except OSError as ose:
 449                 if ose.errno == errno.ENOENT:
 450                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 451                 else:
 452                     raise
 453
 454         if (sys.platform != 'win32'
 455                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 456                 and not params.get('restrictfilenames', False)):
 457             # Unicode filesystem API will throw errors (#1474, #13027)
 458             self.report_warning(
 459                 'Assuming --restrict-filenames since file system encoding '
 460                 'cannot encode all characters. '
 461                 'Set the LC_ALL environment variable to fix this.')
 462             self.params['restrictfilenames'] = True
 463
 464         if isinstance(params.get('outtmpl'), bytes):
 465             self.report_warning(
 466                 'Parameter outtmpl is bytes, but should be a unicode string. '
 467                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 468
 469         self._setup_opener()
 470
 471         if auto_init:
 472             self.print_debug_header()
 473             self.add_default_info_extractors()
 474
 475         for pp_def_raw in self.params.get('postprocessors', []):
 476             pp_class = get_postprocessor(pp_def_raw['key'])
 477             pp_def = dict(pp_def_raw)
 478             del pp_def['key']
 479             pp = pp_class(self, **compat_kwargs(pp_def))
 480             self.add_post_processor(pp)
 481
 482         for ph in self.params.get('post_hooks', []):
 483             self.add_post_hook(ph)
 484
 485         for ph in self.params.get('progress_hooks', []):
 486             self.add_progress_hook(ph)
 487
 488         register_socks_protocols()
 489
 490     def warn_if_short_id(self, argv):
 491         # short YouTube ID starting with dash?
 492         idxs = [
 493             i for i, a in enumerate(argv)
 494             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 495         if idxs:
 496             correct_argv = (
 497                 ['youtube-dlc']
 498                 + [a for i, a in enumerate(argv) if i not in idxs]
 499                 + ['--'] + [argv[i] for i in idxs]
 500             )
 501             self.report_warning(
 502                 'Long argument string detected. '
 503                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 504                 args_to_str(correct_argv))
 505
 506     def add_info_extractor(self, ie):
 507         """Add an InfoExtractor object to the end of the list."""
 508         self._ies.append(ie)
 509         if not isinstance(ie, type):
 510             self._ies_instances[ie.ie_key()] = ie
 511             ie.set_downloader(self)
 512
 513     def get_info_extractor(self, ie_key):
 514         """
 515         Get an instance of an IE with name ie_key, it will try to get one from
 516         the _ies list, if there's no instance it will create a new one and add
 517         it to the extractor list.
 518         """
 519         ie = self._ies_instances.get(ie_key)
 520         if ie is None:
 521             ie = get_info_extractor(ie_key)()
 522             self.add_info_extractor(ie)
 523         return ie
 524
 525     def add_default_info_extractors(self):
 526         """
 527         Add the InfoExtractors returned by gen_extractors to the end of the list
 528         """
 529         for ie in gen_extractor_classes():
 530             self.add_info_extractor(ie)
 531
 532     def add_post_processor(self, pp):
 533         """Add a PostProcessor object to the end of the chain."""
 534         self._pps.append(pp)
 535         pp.set_downloader(self)
 536
 537     def add_post_hook(self, ph):
 538         """Add the post hook"""
 539         self._post_hooks.append(ph)
 540
 541     def add_progress_hook(self, ph):
 542         """Add the progress hook (currently only for the file downloader)"""
 543         self._progress_hooks.append(ph)
 544
 545     def _bidi_workaround(self, message):
 546         if not hasattr(self, '_output_channel'):
 547             return message
 548
 549         assert hasattr(self, '_output_process')
 550         assert isinstance(message, compat_str)
 551         line_count = message.count('\n') + 1
 552         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 553         self._output_process.stdin.flush()
 554         res = ''.join(self._output_channel.readline().decode('utf-8')
 555                       for _ in range(line_count))
 556         return res[:-len('\n')]
 557
 558     def to_screen(self, message, skip_eol=False):
 559         """Print message to stdout if not in quiet mode."""
 560         return self.to_stdout(message, skip_eol, check_quiet=True)
 561
 562     def _write_string(self, s, out=None):
 563         write_string(s, out=out, encoding=self.params.get('encoding'))
 564
 565     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 566         """Print message to stdout if not in quiet mode."""
 567         if self.params.get('logger'):
 568             self.params['logger'].debug(message)
 569         elif not check_quiet or not self.params.get('quiet', False):
 570             message = self._bidi_workaround(message)
 571             terminator = ['\n', ''][skip_eol]
 572             output = message + terminator
 573
 574             self._write_string(output, self._screen_file)
 575
 576     def to_stderr(self, message):
 577         """Print message to stderr."""
 578         assert isinstance(message, compat_str)
 579         if self.params.get('logger'):
 580             self.params['logger'].error(message)
 581         else:
 582             message = self._bidi_workaround(message)
 583             output = message + '\n'
 584             self._write_string(output, self._err_file)
 585
 586     def to_console_title(self, message):
 587         if not self.params.get('consoletitle', False):
 588             return
 589         if compat_os_name == 'nt':
 590             if ctypes.windll.kernel32.GetConsoleWindow():
 591                 # c_wchar_p() might not be necessary if `message` is
 592                 # already of type unicode()
 593                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 594         elif 'TERM' in os.environ:
 595             self._write_string('\033[0;%s\007' % message, self._screen_file)
 596
 597     def save_console_title(self):
 598         if not self.params.get('consoletitle', False):
 599             return
 600         if self.params.get('simulate', False):
 601             return
 602         if compat_os_name != 'nt' and 'TERM' in os.environ:
 603             # Save the title on stack
 604             self._write_string('\033[22;0t', self._screen_file)
 605
 606     def restore_console_title(self):
 607         if not self.params.get('consoletitle', False):
 608             return
 609         if self.params.get('simulate', False):
 610             return
 611         if compat_os_name != 'nt' and 'TERM' in os.environ:
 612             # Restore the title from stack
 613             self._write_string('\033[23;0t', self._screen_file)
 614
 615     def __enter__(self):
 616         self.save_console_title()
 617         return self
 618
 619     def __exit__(self, *args):
 620         self.restore_console_title()
 621
 622         if self.params.get('cookiefile') is not None:
 623             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 624
 625     def trouble(self, message=None, tb=None):
 626         """Determine action to take when a download problem appears.
 627
 628         Depending on if the downloader has been configured to ignore
 629         download errors or not, this method may throw an exception or
 630         not when errors are found, after printing the message.
 631
 632         tb, if given, is additional traceback information.
 633         """
 634         if message is not None:
 635             self.to_stderr(message)
 636         if self.params.get('verbose'):
 637             if tb is None:
 638                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 639                     tb = ''
 640                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 641                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 642                     tb += encode_compat_str(traceback.format_exc())
 643                 else:
 644                     tb_data = traceback.format_list(traceback.extract_stack())
 645                     tb = ''.join(tb_data)
 646             self.to_stderr(tb)
 647         if not self.params.get('ignoreerrors', False):
 648             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 649                 exc_info = sys.exc_info()[1].exc_info
 650             else:
 651                 exc_info = sys.exc_info()
 652             raise DownloadError(message, exc_info)
 653         self._download_retcode = 1
 654
 655     def report_warning(self, message):
 656         '''
 657         Print the message to stderr, it will be prefixed with 'WARNING:'
 658         If stderr is a tty file the 'WARNING:' will be colored
 659         '''
 660         if self.params.get('logger') is not None:
 661             self.params['logger'].warning(message)
 662         else:
 663             if self.params.get('no_warnings'):
 664                 return
 665             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 666                 _msg_header = '\033[0;33mWARNING:\033[0m'
 667             else:
 668                 _msg_header = 'WARNING:'
 669             warning_message = '%s %s' % (_msg_header, message)
 670             self.to_stderr(warning_message)
 671
 672     def report_error(self, message, tb=None):
 673         '''
 674         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 675         in red if stderr is a tty file.
 676         '''
 677         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 678             _msg_header = '\033[0;31mERROR:\033[0m'
 679         else:
 680             _msg_header = 'ERROR:'
 681         error_message = '%s %s' % (_msg_header, message)
 682         self.trouble(error_message, tb)
 683
 684     def report_file_already_downloaded(self, file_name):
 685         """Report file has already been fully downloaded."""
 686         try:
 687             self.to_screen('[download] %s has already been downloaded' % file_name)
 688         except UnicodeEncodeError:
 689             self.to_screen('[download] The file has already been downloaded')
 690
 691     def report_file_delete(self, file_name):
 692         """Report that existing file will be deleted."""
 693         try:
 694             self.to_screen('Deleting already existent file %s' % file_name)
 695         except UnicodeEncodeError:
 696             self.to_screen('Deleting already existent file')
 697
 698     def prepare_filename(self, info_dict):
 699         """Generate the output filename."""
 700         try:
 701             template_dict = dict(info_dict)
 702
 703             template_dict['epoch'] = int(time.time())
 704             autonumber_size = self.params.get('autonumber_size')
 705             if autonumber_size is None:
 706                 autonumber_size = 5
 707             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 708             if template_dict.get('resolution') is None:
 709                 if template_dict.get('width') and template_dict.get('height'):
 710                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 711                 elif template_dict.get('height'):
 712                     template_dict['resolution'] = '%sp' % template_dict['height']
 713                 elif template_dict.get('width'):
 714                     template_dict['resolution'] = '%dx?' % template_dict['width']
 715
 716             sanitize = lambda k, v: sanitize_filename(
 717                 compat_str(v),
 718                 restricted=self.params.get('restrictfilenames'),
 719                 is_id=(k == 'id' or k.endswith('_id')))
 720             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 721                                  for k, v in template_dict.items()
 722                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 723             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 724
 725             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 726
 727             # For fields playlist_index and autonumber convert all occurrences
 728             # of %(field)s to %(field)0Nd for backward compatibility
 729             field_size_compat_map = {
 730                 'playlist_index': len(str(template_dict['n_entries'])),
 731                 'autonumber': autonumber_size,
 732             }
 733             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 734             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 735             if mobj:
 736                 outtmpl = re.sub(
 737                     FIELD_SIZE_COMPAT_RE,
 738                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 739                     outtmpl)
 740
 741             # Missing numeric fields used together with integer presentation types
 742             # in format specification will break the argument substitution since
 743             # string 'NA' is returned for missing fields. We will patch output
 744             # template for missing fields to meet string presentation type.
 745             for numeric_field in self._NUMERIC_FIELDS:
 746                 if numeric_field not in template_dict:
 747                     # As of [1] format syntax is:
 748                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 749                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 750                     FORMAT_RE = r'''(?x)
 751                         (?<!%)
 752                         %
 753                         \({0}\)  # mapping key
 754                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 755                         (?:\d+)?  # minimum field width (optional)
 756                         (?:\.\d+)?  # precision (optional)
 757                         [hlL]?  # length modifier (optional)
 758                         [diouxXeEfFgGcrs%]  # conversion type
 759                     '''
 760                     outtmpl = re.sub(
 761                         FORMAT_RE.format(numeric_field),
 762                         r'%({0})s'.format(numeric_field), outtmpl)
 763
 764             # expand_path translates '%%' into '%' and '$$' into '$'
 765             # correspondingly that is not what we want since we need to keep
 766             # '%%' intact for template dict substitution step. Working around
 767             # with boundary-alike separator hack.
 768             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 769             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 770
 771             # outtmpl should be expand_path'ed before template dict substitution
 772             # because meta fields may contain env variables we don't want to
 773             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 774             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 775             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 776
 777             # https://github.com/blackjack4494/youtube-dlc/issues/85
 778             trim_file_name = self.params.get('trim_file_name', False)
 779             if trim_file_name:
 780                 fn_groups = filename.rsplit('.')
 781                 ext = fn_groups[-1]
 782                 sub_ext = ''
 783                 if len(fn_groups) > 2:
 784                     sub_ext = fn_groups[-2]
 785                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 786
 787             # Temporary fix for #4787
 788             # 'Treat' all problem characters by passing filename through preferredencoding
 789             # to workaround encoding issues with subprocess on python2 @ Windows
 790             if sys.version_info < (3, 0) and sys.platform == 'win32':
 791                 filename = encodeFilename(filename, True).decode(preferredencoding())
 792             return sanitize_path(filename)
 793         except ValueError as err:
 794             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 795             return None
 796
 797     def _match_entry(self, info_dict, incomplete):
 798         """ Returns None if the file should be downloaded """
 799
 800         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 801         if 'title' in info_dict:
 802             # This can happen when we're just evaluating the playlist
 803             title = info_dict['title']
 804             matchtitle = self.params.get('matchtitle', False)
 805             if matchtitle:
 806                 if not re.search(matchtitle, title, re.IGNORECASE):
 807                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 808             rejecttitle = self.params.get('rejecttitle', False)
 809             if rejecttitle:
 810                 if re.search(rejecttitle, title, re.IGNORECASE):
 811                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 812         date = info_dict.get('upload_date')
 813         if date is not None:
 814             dateRange = self.params.get('daterange', DateRange())
 815             if date not in dateRange:
 816                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 817         view_count = info_dict.get('view_count')
 818         if view_count is not None:
 819             min_views = self.params.get('min_views')
 820             if min_views is not None and view_count < min_views:
 821                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 822             max_views = self.params.get('max_views')
 823             if max_views is not None and view_count > max_views:
 824                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 825         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 826             return 'Skipping "%s" because it is age restricted' % video_title
 827         if self.in_download_archive(info_dict):
 828             return '%s has already been recorded in archive' % video_title
 829
 830         if not incomplete:
 831             match_filter = self.params.get('match_filter')
 832             if match_filter is not None:
 833                 ret = match_filter(info_dict)
 834                 if ret is not None:
 835                     return ret
 836
 837         return None
 838
 839     @staticmethod
 840     def add_extra_info(info_dict, extra_info):
 841         '''Set the keys from extra_info in info dict if they are missing'''
 842         for key, value in extra_info.items():
 843             info_dict.setdefault(key, value)
 844
 845     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 846                      process=True, force_generic_extractor=False):
 847         '''
 848         Returns a list with a dictionary for each video we find.
 849         If 'download', also downloads the videos.
 850         extra_info is a dict containing the extra values to add to each result
 851         '''
 852
 853         if not ie_key and force_generic_extractor:
 854             ie_key = 'Generic'
 855
 856         if ie_key:
 857             ies = [self.get_info_extractor(ie_key)]
 858         else:
 859             ies = self._ies
 860
 861         for ie in ies:
 862             if not ie.suitable(url):
 863                 continue
 864
 865             ie_key = ie.ie_key()
 866             ie = self.get_info_extractor(ie_key)
 867             if not ie.working():
 868                 self.report_warning('The program functionality for this site has been marked as broken, '
 869                                     'and will probably not work.')
 870
 871             try:
 872                 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
 873             except (AssertionError, IndexError, AttributeError):
 874                 temp_id = None
 875             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 876                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 877                                ie_key, temp_id))
 878                 break
 879
 880             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 881
 882         else:
 883             self.report_error('no suitable InfoExtractor for URL %s' % url)
 884
 885     def __handle_extraction_exceptions(func):
 886         def wrapper(self, *args, **kwargs):
 887             try:
 888                 return func(self, *args, **kwargs)
 889             except GeoRestrictedError as e:
 890                 msg = e.msg
 891                 if e.countries:
 892                     msg += '\nThis video is available in %s.' % ', '.join(
 893                         map(ISO3166Utils.short2full, e.countries))
 894                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 895                 self.report_error(msg)
 896             except ExtractorError as e:  # An error we somewhat expected
 897                 self.report_error(compat_str(e), e.format_traceback())
 898             except MaxDownloadsReached:
 899                 raise
 900             except Exception as e:
 901                 if self.params.get('ignoreerrors', False):
 902                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 903                 else:
 904                     raise
 905         return wrapper
 906
 907     @__handle_extraction_exceptions
 908     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 909         ie_result = ie.extract(url)
 910         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 911             return
 912         if isinstance(ie_result, list):
 913             # Backwards compatibility: old IE result format
 914             ie_result = {
 915                 '_type': 'compat_list',
 916                 'entries': ie_result,
 917             }
 918         if info_dict:
 919             if info_dict.get('id'):
 920                 ie_result['id'] = info_dict['id']
 921             if info_dict.get('title'):
 922                 ie_result['title'] = info_dict['title']
 923         self.add_default_extra_info(ie_result, ie, url)
 924         if process:
 925             return self.process_ie_result(ie_result, download, extra_info)
 926         else:
 927             return ie_result
 928
 929     def add_default_extra_info(self, ie_result, ie, url):
 930         self.add_extra_info(ie_result, {
 931             'extractor': ie.IE_NAME,
 932             'webpage_url': url,
 933             'duration_string': (
 934                 formatSeconds(ie_result['duration'], '-')
 935                 if ie_result.get('duration', None) is not None
 936                 else None),
 937             'webpage_url_basename': url_basename(url),
 938             'extractor_key': ie.ie_key(),
 939         })
 940
 941     def process_ie_result(self, ie_result, download=True, extra_info={}):
 942         """
 943         Take the result of the ie(may be modified) and resolve all unresolved
 944         references (URLs, playlist items).
 945
 946         It will also download the videos if 'download'.
 947         Returns the resolved ie_result.
 948         """
 949         result_type = ie_result.get('_type', 'video')
 950
 951         if result_type in ('url', 'url_transparent'):
 952             ie_result['url'] = sanitize_url(ie_result['url'])
 953             extract_flat = self.params.get('extract_flat', False)
 954             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 955                     or extract_flat is True):
 956                 self.__forced_printings(
 957                     ie_result, self.prepare_filename(ie_result),
 958                     incomplete=True)
 959                 return ie_result
 960
 961         if result_type == 'video':
 962             self.add_extra_info(ie_result, extra_info)
 963             return self.process_video_result(ie_result, download=download)
 964         elif result_type == 'url':
 965             # We have to add extra_info to the results because it may be
 966             # contained in a playlist
 967             return self.extract_info(ie_result['url'],
 968                                      download, info_dict=ie_result,
 969                                      ie_key=ie_result.get('ie_key'),
 970                                      extra_info=extra_info)
 971         elif result_type == 'url_transparent':
 972             # Use the information from the embedding page
 973             info = self.extract_info(
 974                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 975                 extra_info=extra_info, download=False, process=False)
 976
 977             # extract_info may return None when ignoreerrors is enabled and
 978             # extraction failed with an error, don't crash and return early
 979             # in this case
 980             if not info:
 981                 return info
 982
 983             force_properties = dict(
 984                 (k, v) for k, v in ie_result.items() if v is not None)
 985             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 986                 if f in force_properties:
 987                     del force_properties[f]
 988             new_result = info.copy()
 989             new_result.update(force_properties)
 990
 991             # Extracted info may not be a video result (i.e.
 992             # info.get('_type', 'video') != video) but rather an url or
 993             # url_transparent. In such cases outer metadata (from ie_result)
 994             # should be propagated to inner one (info). For this to happen
 995             # _type of info should be overridden with url_transparent. This
 996             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 997             if new_result.get('_type') == 'url':
 998                 new_result['_type'] = 'url_transparent'
 999
1000             return self.process_ie_result(
1001                 new_result, download=download, extra_info=extra_info)
1002         elif result_type in ('playlist', 'multi_video'):
1003             # We process each entry in the playlist
1004             playlist = ie_result.get('title') or ie_result.get('id')
1005             self.to_screen('[download] Downloading playlist: %s' % playlist)
1006
1007             playlist_results = []
1008
1009             playliststart = self.params.get('playliststart', 1) - 1
1010             playlistend = self.params.get('playlistend')
1011             # For backwards compatibility, interpret -1 as whole list
1012             if playlistend == -1:
1013                 playlistend = None
1014
1015             playlistitems_str = self.params.get('playlist_items')
1016             playlistitems = None
1017             if playlistitems_str is not None:
1018                 def iter_playlistitems(format):
1019                     for string_segment in format.split(','):
1020                         if '-' in string_segment:
1021                             start, end = string_segment.split('-')
1022                             for item in range(int(start), int(end) + 1):
1023                                 yield int(item)
1024                         else:
1025                             yield int(string_segment)
1026                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1027
1028             ie_entries = ie_result['entries']
1029
1030             def make_playlistitems_entries(list_ie_entries):
1031                 num_entries = len(list_ie_entries)
1032                 return [
1033                     list_ie_entries[i - 1] for i in playlistitems
1034                     if -num_entries <= i - 1 < num_entries]
1035
1036             def report_download(num_entries):
1037                 self.to_screen(
1038                     '[%s] playlist %s: Downloading %d videos' %
1039                     (ie_result['extractor'], playlist, num_entries))
1040
1041             if isinstance(ie_entries, list):
1042                 n_all_entries = len(ie_entries)
1043                 if playlistitems:
1044                     entries = make_playlistitems_entries(ie_entries)
1045                 else:
1046                     entries = ie_entries[playliststart:playlistend]
1047                 n_entries = len(entries)
1048                 self.to_screen(
1049                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1050                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
1051             elif isinstance(ie_entries, PagedList):
1052                 if playlistitems:
1053                     entries = []
1054                     for item in playlistitems:
1055                         entries.extend(ie_entries.getslice(
1056                             item - 1, item
1057                         ))
1058                 else:
1059                     entries = ie_entries.getslice(
1060                         playliststart, playlistend)
1061                 n_entries = len(entries)
1062                 report_download(n_entries)
1063             else:  # iterable
1064                 if playlistitems:
1065                     entries = make_playlistitems_entries(list(itertools.islice(
1066                         ie_entries, 0, max(playlistitems))))
1067                 else:
1068                     entries = list(itertools.islice(
1069                         ie_entries, playliststart, playlistend))
1070                 n_entries = len(entries)
1071                 report_download(n_entries)
1072
1073             if self.params.get('playlistreverse', False):
1074                 entries = entries[::-1]
1075
1076             if self.params.get('playlistrandom', False):
1077                 random.shuffle(entries)
1078
1079             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1080
1081             for i, entry in enumerate(entries, 1):
1082                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1083                 # This __x_forwarded_for_ip thing is a bit ugly but requires
1084                 # minimal changes
1085                 if x_forwarded_for:
1086                     entry['__x_forwarded_for_ip'] = x_forwarded_for
1087                 extra = {
1088                     'n_entries': n_entries,
1089                     'playlist': playlist,
1090                     'playlist_id': ie_result.get('id'),
1091                     'playlist_title': ie_result.get('title'),
1092                     'playlist_uploader': ie_result.get('uploader'),
1093                     'playlist_uploader_id': ie_result.get('uploader_id'),
1094                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1095                     'extractor': ie_result['extractor'],
1096                     'webpage_url': ie_result['webpage_url'],
1097                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1098                     'extractor_key': ie_result['extractor_key'],
1099                 }
1100
1101                 reason = self._match_entry(entry, incomplete=True)
1102                 if reason is not None:
1103                     if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1104                         print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1105                         break
1106                     else:
1107                         self.to_screen('[download] ' + reason)
1108                         continue
1109
1110                 entry_result = self.__process_iterable_entry(entry, download, extra)
1111                 # TODO: skip failed (empty) entries?
1112                 playlist_results.append(entry_result)
1113             ie_result['entries'] = playlist_results
1114             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1115             return ie_result
1116         elif result_type == 'compat_list':
1117             self.report_warning(
1118                 'Extractor %s returned a compat_list result. '
1119                 'It needs to be updated.' % ie_result.get('extractor'))
1120
1121             def _fixup(r):
1122                 self.add_extra_info(
1123                     r,
1124                     {
1125                         'extractor': ie_result['extractor'],
1126                         'webpage_url': ie_result['webpage_url'],
1127                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1128                         'extractor_key': ie_result['extractor_key'],
1129                     }
1130                 )
1131                 return r
1132             ie_result['entries'] = [
1133                 self.process_ie_result(_fixup(r), download, extra_info)
1134                 for r in ie_result['entries']
1135             ]
1136             return ie_result
1137         else:
1138             raise Exception('Invalid result type: %s' % result_type)
1139
1140     @__handle_extraction_exceptions
1141     def __process_iterable_entry(self, entry, download, extra_info):
1142         return self.process_ie_result(
1143             entry, download=download, extra_info=extra_info)
1144
1145     def _build_format_filter(self, filter_spec):
1146         " Returns a function to filter the formats according to the filter_spec "
1147
1148         OPERATORS = {
1149             '<': operator.lt,
1150             '<=': operator.le,
1151             '>': operator.gt,
1152             '>=': operator.ge,
1153             '=': operator.eq,
1154             '!=': operator.ne,
1155         }
1156         operator_rex = re.compile(r'''(?x)\s*
1157             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1158             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1159             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1160             $
1161             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1162         m = operator_rex.search(filter_spec)
1163         if m:
1164             try:
1165                 comparison_value = int(m.group('value'))
1166             except ValueError:
1167                 comparison_value = parse_filesize(m.group('value'))
1168                 if comparison_value is None:
1169                     comparison_value = parse_filesize(m.group('value') + 'B')
1170                 if comparison_value is None:
1171                     raise ValueError(
1172                         'Invalid value %r in format specification %r' % (
1173                             m.group('value'), filter_spec))
1174             op = OPERATORS[m.group('op')]
1175
1176         if not m:
1177             STR_OPERATORS = {
1178                 '=': operator.eq,
1179                 '^=': lambda attr, value: attr.startswith(value),
1180                 '$=': lambda attr, value: attr.endswith(value),
1181                 '*=': lambda attr, value: value in attr,
1182             }
1183             str_operator_rex = re.compile(r'''(?x)
1184                 \s*(?P<key>[a-zA-Z0-9._-]+)
1185                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1186                 \s*(?P<value>[a-zA-Z0-9._-]+)
1187                 \s*$
1188                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1189             m = str_operator_rex.search(filter_spec)
1190             if m:
1191                 comparison_value = m.group('value')
1192                 str_op = STR_OPERATORS[m.group('op')]
1193                 if m.group('negation'):
1194                     op = lambda attr, value: not str_op(attr, value)
1195                 else:
1196                     op = str_op
1197
1198         if not m:
1199             raise ValueError('Invalid filter specification %r' % filter_spec)
1200
1201         def _filter(f):
1202             actual_value = f.get(m.group('key'))
1203             if actual_value is None:
1204                 return m.group('none_inclusive')
1205             return op(actual_value, comparison_value)
1206         return _filter
1207
1208     def _default_format_spec(self, info_dict, download=True):
1209
1210         def can_merge():
1211             merger = FFmpegMergerPP(self)
1212             return merger.available and merger.can_merge()
1213
1214         prefer_best = (
1215             not self.params.get('simulate', False)
1216             and download
1217             and (
1218                 not can_merge()
1219                 or info_dict.get('is_live', False)
1220                 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1221
1222         return (
1223             'best/bestvideo+bestaudio'
1224             if prefer_best
1225             else 'bestvideo*+bestaudio/best'
1226             if not self.params.get('allow_multiple_audio_streams', False)
1227             else 'bestvideo+bestaudio/best')
1228
1229     def build_format_selector(self, format_spec):
1230         def syntax_error(note, start):
1231             message = (
1232                 'Invalid format specification: '
1233                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1234             return SyntaxError(message)
1235
1236         PICKFIRST = 'PICKFIRST'
1237         MERGE = 'MERGE'
1238         SINGLE = 'SINGLE'
1239         GROUP = 'GROUP'
1240         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1241
1242         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1243                                   'video': self.params.get('allow_multiple_video_streams', False)}
1244
1245         def _parse_filter(tokens):
1246             filter_parts = []
1247             for type, string, start, _, _ in tokens:
1248                 if type == tokenize.OP and string == ']':
1249                     return ''.join(filter_parts)
1250                 else:
1251                     filter_parts.append(string)
1252
1253         def _remove_unused_ops(tokens):
1254             # Remove operators that we don't use and join them with the surrounding strings
1255             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1256             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1257             last_string, last_start, last_end, last_line = None, None, None, None
1258             for type, string, start, end, line in tokens:
1259                 if type == tokenize.OP and string == '[':
1260                     if last_string:
1261                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1262                         last_string = None
1263                     yield type, string, start, end, line
1264                     # everything inside brackets will be handled by _parse_filter
1265                     for type, string, start, end, line in tokens:
1266                         yield type, string, start, end, line
1267                         if type == tokenize.OP and string == ']':
1268                             break
1269                 elif type == tokenize.OP and string in ALLOWED_OPS:
1270                     if last_string:
1271                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1272                         last_string = None
1273                     yield type, string, start, end, line
1274                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1275                     if not last_string:
1276                         last_string = string
1277                         last_start = start
1278                         last_end = end
1279                     else:
1280                         last_string += string
1281             if last_string:
1282                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1283
1284         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1285             selectors = []
1286             current_selector = None
1287             for type, string, start, _, _ in tokens:
1288                 # ENCODING is only defined in python 3.x
1289                 if type == getattr(tokenize, 'ENCODING', None):
1290                     continue
1291                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1292                     current_selector = FormatSelector(SINGLE, string, [])
1293                 elif type == tokenize.OP:
1294                     if string == ')':
1295                         if not inside_group:
1296                             # ')' will be handled by the parentheses group
1297                             tokens.restore_last_token()
1298                         break
1299                     elif inside_merge and string in ['/', ',']:
1300                         tokens.restore_last_token()
1301                         break
1302                     elif inside_choice and string == ',':
1303                         tokens.restore_last_token()
1304                         break
1305                     elif string == ',':
1306                         if not current_selector:
1307                             raise syntax_error('"," must follow a format selector', start)
1308                         selectors.append(current_selector)
1309                         current_selector = None
1310                     elif string == '/':
1311                         if not current_selector:
1312                             raise syntax_error('"/" must follow a format selector', start)
1313                         first_choice = current_selector
1314                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1315                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1316                     elif string == '[':
1317                         if not current_selector:
1318                             current_selector = FormatSelector(SINGLE, 'best', [])
1319                         format_filter = _parse_filter(tokens)
1320                         current_selector.filters.append(format_filter)
1321                     elif string == '(':
1322                         if current_selector:
1323                             raise syntax_error('Unexpected "("', start)
1324                         group = _parse_format_selection(tokens, inside_group=True)
1325                         current_selector = FormatSelector(GROUP, group, [])
1326                     elif string == '+':
1327                         if not current_selector:
1328                             raise syntax_error('Unexpected "+"', start)
1329                         selector_1 = current_selector
1330                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1331                         if not selector_2:
1332                             raise syntax_error('Expected a selector', start)
1333                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1334                     else:
1335                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1336                 elif type == tokenize.ENDMARKER:
1337                     break
1338             if current_selector:
1339                 selectors.append(current_selector)
1340             return selectors
1341
1342         def _build_selector_function(selector):
1343             if isinstance(selector, list):  # ,
1344                 fs = [_build_selector_function(s) for s in selector]
1345
1346                 def selector_function(ctx):
1347                     for f in fs:
1348                         for format in f(ctx):
1349                             yield format
1350                 return selector_function
1351
1352             elif selector.type == GROUP:  # ()
1353                 selector_function = _build_selector_function(selector.selector)
1354
1355             elif selector.type == PICKFIRST:  # /
1356                 fs = [_build_selector_function(s) for s in selector.selector]
1357
1358                 def selector_function(ctx):
1359                     for f in fs:
1360                         picked_formats = list(f(ctx))
1361                         if picked_formats:
1362                             return picked_formats
1363                     return []
1364
1365             elif selector.type == SINGLE:  # atom
1366                 format_spec = selector.selector if selector.selector is not None else 'best'
1367
1368                 if format_spec == 'all':
1369                     def selector_function(ctx):
1370                         formats = list(ctx['formats'])
1371                         if formats:
1372                             for f in formats:
1373                                 yield f
1374
1375                 else:
1376                     format_fallback = False
1377                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1378                     if format_spec_obj is not None:
1379                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1380                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1381                         not_format_type = 'v' if format_type == 'a' else 'a'
1382                         format_modified = format_spec_obj.group(3) is not None
1383
1384                         format_fallback = not format_type and not format_modified  # for b, w
1385                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1386                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1387                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1388                                     if format_type  # bv, ba, wv, wa
1389                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1390                                     if not format_modified  # b, w
1391                                     else None)  # b*, w*
1392                     else:
1393                         format_idx = -1
1394                         filter_f = ((lambda f: f.get('ext') == format_spec)
1395                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1396                                     else (lambda f: f.get('format_id') == format_spec))  # id
1397
1398                     def selector_function(ctx):
1399                         formats = list(ctx['formats'])
1400                         if not formats:
1401                             return
1402                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1403                         if matches:
1404                             yield matches[format_idx]
1405                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1406                             # for extractors with incomplete formats (audio only (soundcloud)
1407                             # or video only (imgur)) best/worst will fallback to
1408                             # best/worst {video,audio}-only format
1409                             yield formats[format_idx]
1410
1411             elif selector.type == MERGE:        # +
1412                 def _merge(formats_pair):
1413                     format_1, format_2 = formats_pair
1414
1415                     formats_info = []
1416                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1417                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1418
1419                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1420                         get_no_more = {"video": False, "audio": False}
1421                         for (i, fmt_info) in enumerate(formats_info):
1422                             for aud_vid in ["audio", "video"]:
1423                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1424                                     if get_no_more[aud_vid]:
1425                                         formats_info.pop(i)
1426                                     get_no_more[aud_vid] = True
1427
1428                     if len(formats_info) == 1:
1429                         return formats_info[0]
1430
1431                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1432                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1433
1434                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1435                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1436
1437                     output_ext = self.params.get('merge_output_format')
1438                     if not output_ext:
1439                         if the_only_video:
1440                             output_ext = the_only_video['ext']
1441                         elif the_only_audio and not video_fmts:
1442                             output_ext = the_only_audio['ext']
1443                         else:
1444                             output_ext = 'mkv'
1445
1446                     new_dict = {
1447                         'requested_formats': formats_info,
1448                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1449                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1450                         'ext': output_ext,
1451                     }
1452
1453                     if the_only_video:
1454                         new_dict.update({
1455                             'width': the_only_video.get('width'),
1456                             'height': the_only_video.get('height'),
1457                             'resolution': the_only_video.get('resolution'),
1458                             'fps': the_only_video.get('fps'),
1459                             'vcodec': the_only_video.get('vcodec'),
1460                             'vbr': the_only_video.get('vbr'),
1461                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1462                         })
1463
1464                     if the_only_audio:
1465                         new_dict.update({
1466                             'acodec': the_only_audio.get('acodec'),
1467                             'abr': the_only_audio.get('abr'),
1468                         })
1469
1470                     return new_dict
1471
1472                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1473
1474                 def selector_function(ctx):
1475                     for pair in itertools.product(
1476                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1477                         yield _merge(pair)
1478
1479             filters = [self._build_format_filter(f) for f in selector.filters]
1480
1481             def final_selector(ctx):
1482                 ctx_copy = copy.deepcopy(ctx)
1483                 for _filter in filters:
1484                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1485                 return selector_function(ctx_copy)
1486             return final_selector
1487
1488         stream = io.BytesIO(format_spec.encode('utf-8'))
1489         try:
1490             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1491         except tokenize.TokenError:
1492             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1493
1494         class TokenIterator(object):
1495             def __init__(self, tokens):
1496                 self.tokens = tokens
1497                 self.counter = 0
1498
1499             def __iter__(self):
1500                 return self
1501
1502             def __next__(self):
1503                 if self.counter >= len(self.tokens):
1504                     raise StopIteration()
1505                 value = self.tokens[self.counter]
1506                 self.counter += 1
1507                 return value
1508
1509             next = __next__
1510
1511             def restore_last_token(self):
1512                 self.counter -= 1
1513
1514         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1515         return _build_selector_function(parsed_selector)
1516
1517     def _calc_headers(self, info_dict):
1518         res = std_headers.copy()
1519
1520         add_headers = info_dict.get('http_headers')
1521         if add_headers:
1522             res.update(add_headers)
1523
1524         cookies = self._calc_cookies(info_dict)
1525         if cookies:
1526             res['Cookie'] = cookies
1527
1528         if 'X-Forwarded-For' not in res:
1529             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1530             if x_forwarded_for_ip:
1531                 res['X-Forwarded-For'] = x_forwarded_for_ip
1532
1533         return res
1534
1535     def _calc_cookies(self, info_dict):
1536         pr = sanitized_Request(info_dict['url'])
1537         self.cookiejar.add_cookie_header(pr)
1538         return pr.get_header('Cookie')
1539
1540     def process_video_result(self, info_dict, download=True):
1541         assert info_dict.get('_type', 'video') == 'video'
1542
1543         if 'id' not in info_dict:
1544             raise ExtractorError('Missing "id" field in extractor result')
1545         if 'title' not in info_dict:
1546             raise ExtractorError('Missing "title" field in extractor result')
1547
1548         def report_force_conversion(field, field_not, conversion):
1549             self.report_warning(
1550                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1551                 % (field, field_not, conversion))
1552
1553         def sanitize_string_field(info, string_field):
1554             field = info.get(string_field)
1555             if field is None or isinstance(field, compat_str):
1556                 return
1557             report_force_conversion(string_field, 'a string', 'string')
1558             info[string_field] = compat_str(field)
1559
1560         def sanitize_numeric_fields(info):
1561             for numeric_field in self._NUMERIC_FIELDS:
1562                 field = info.get(numeric_field)
1563                 if field is None or isinstance(field, compat_numeric_types):
1564                     continue
1565                 report_force_conversion(numeric_field, 'numeric', 'int')
1566                 info[numeric_field] = int_or_none(field)
1567
1568         sanitize_string_field(info_dict, 'id')
1569         sanitize_numeric_fields(info_dict)
1570
1571         if 'playlist' not in info_dict:
1572             # It isn't part of a playlist
1573             info_dict['playlist'] = None
1574             info_dict['playlist_index'] = None
1575
1576         thumbnails = info_dict.get('thumbnails')
1577         if thumbnails is None:
1578             thumbnail = info_dict.get('thumbnail')
1579             if thumbnail:
1580                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1581         if thumbnails:
1582             thumbnails.sort(key=lambda t: (
1583                 t.get('preference') if t.get('preference') is not None else -1,
1584                 t.get('width') if t.get('width') is not None else -1,
1585                 t.get('height') if t.get('height') is not None else -1,
1586                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1587             for i, t in enumerate(thumbnails):
1588                 t['url'] = sanitize_url(t['url'])
1589                 if t.get('width') and t.get('height'):
1590                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1591                 if t.get('id') is None:
1592                     t['id'] = '%d' % i
1593
1594         if self.params.get('list_thumbnails'):
1595             self.list_thumbnails(info_dict)
1596             return
1597
1598         thumbnail = info_dict.get('thumbnail')
1599         if thumbnail:
1600             info_dict['thumbnail'] = sanitize_url(thumbnail)
1601         elif thumbnails:
1602             info_dict['thumbnail'] = thumbnails[-1]['url']
1603
1604         if 'display_id' not in info_dict and 'id' in info_dict:
1605             info_dict['display_id'] = info_dict['id']
1606
1607         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1608             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1609             # see http://bugs.python.org/issue1646728)
1610             try:
1611                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1612                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1613             except (ValueError, OverflowError, OSError):
1614                 pass
1615
1616         # Auto generate title fields corresponding to the *_number fields when missing
1617         # in order to always have clean titles. This is very common for TV series.
1618         for field in ('chapter', 'season', 'episode'):
1619             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1620                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1621
1622         for cc_kind in ('subtitles', 'automatic_captions'):
1623             cc = info_dict.get(cc_kind)
1624             if cc:
1625                 for _, subtitle in cc.items():
1626                     for subtitle_format in subtitle:
1627                         if subtitle_format.get('url'):
1628                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1629                         if subtitle_format.get('ext') is None:
1630                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1631
1632         automatic_captions = info_dict.get('automatic_captions')
1633         subtitles = info_dict.get('subtitles')
1634
1635         if self.params.get('listsubtitles', False):
1636             if 'automatic_captions' in info_dict:
1637                 self.list_subtitles(
1638                     info_dict['id'], automatic_captions, 'automatic captions')
1639             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1640             return
1641
1642         info_dict['requested_subtitles'] = self.process_subtitles(
1643             info_dict['id'], subtitles, automatic_captions)
1644
1645         # We now pick which formats have to be downloaded
1646         if info_dict.get('formats') is None:
1647             # There's only one format available
1648             formats = [info_dict]
1649         else:
1650             formats = info_dict['formats']
1651
1652         if not formats:
1653             raise ExtractorError('No video formats found!')
1654
1655         def is_wellformed(f):
1656             url = f.get('url')
1657             if not url:
1658                 self.report_warning(
1659                     '"url" field is missing or empty - skipping format, '
1660                     'there is an error in extractor')
1661                 return False
1662             if isinstance(url, bytes):
1663                 sanitize_string_field(f, 'url')
1664             return True
1665
1666         # Filter out malformed formats for better extraction robustness
1667         formats = list(filter(is_wellformed, formats))
1668
1669         formats_dict = {}
1670
1671         # We check that all the formats have the format and format_id fields
1672         for i, format in enumerate(formats):
1673             sanitize_string_field(format, 'format_id')
1674             sanitize_numeric_fields(format)
1675             format['url'] = sanitize_url(format['url'])
1676             if not format.get('format_id'):
1677                 format['format_id'] = compat_str(i)
1678             else:
1679                 # Sanitize format_id from characters used in format selector expression
1680                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1681             format_id = format['format_id']
1682             if format_id not in formats_dict:
1683                 formats_dict[format_id] = []
1684             formats_dict[format_id].append(format)
1685
1686         # Make sure all formats have unique format_id
1687         for format_id, ambiguous_formats in formats_dict.items():
1688             if len(ambiguous_formats) > 1:
1689                 for i, format in enumerate(ambiguous_formats):
1690                     format['format_id'] = '%s-%d' % (format_id, i)
1691
1692         for i, format in enumerate(formats):
1693             if format.get('format') is None:
1694                 format['format'] = '{id} - {res}{note}'.format(
1695                     id=format['format_id'],
1696                     res=self.format_resolution(format),
1697                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1698                 )
1699             # Automatically determine file extension if missing
1700             if format.get('ext') is None:
1701                 format['ext'] = determine_ext(format['url']).lower()
1702             # Automatically determine protocol if missing (useful for format
1703             # selection purposes)
1704             if format.get('protocol') is None:
1705                 format['protocol'] = determine_protocol(format)
1706             # Add HTTP headers, so that external programs can use them from the
1707             # json output
1708             full_format_info = info_dict.copy()
1709             full_format_info.update(format)
1710             format['http_headers'] = self._calc_headers(full_format_info)
1711         # Remove private housekeeping stuff
1712         if '__x_forwarded_for_ip' in info_dict:
1713             del info_dict['__x_forwarded_for_ip']
1714
1715         # TODO Central sorting goes here
1716
1717         if formats[0] is not info_dict:
1718             # only set the 'formats' fields if the original info_dict list them
1719             # otherwise we end up with a circular reference, the first (and unique)
1720             # element in the 'formats' field in info_dict is info_dict itself,
1721             # which can't be exported to json
1722             info_dict['formats'] = formats
1723         if self.params.get('listformats'):
1724             self.list_formats(info_dict)
1725             return
1726
1727         req_format = self.params.get('format')
1728         if req_format is None:
1729             req_format = self._default_format_spec(info_dict, download=download)
1730             if self.params.get('verbose'):
1731                 self._write_string('[debug] Default format spec: %s\n' % req_format)
1732
1733         format_selector = self.build_format_selector(req_format)
1734
1735         # While in format selection we may need to have an access to the original
1736         # format set in order to calculate some metrics or do some processing.
1737         # For now we need to be able to guess whether original formats provided
1738         # by extractor are incomplete or not (i.e. whether extractor provides only
1739         # video-only or audio-only formats) for proper formats selection for
1740         # extractors with such incomplete formats (see
1741         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1742         # Since formats may be filtered during format selection and may not match
1743         # the original formats the results may be incorrect. Thus original formats
1744         # or pre-calculated metrics should be passed to format selection routines
1745         # as well.
1746         # We will pass a context object containing all necessary additional data
1747         # instead of just formats.
1748         # This fixes incorrect format selection issue (see
1749         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1750         incomplete_formats = (
1751             # All formats are video-only or
1752             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1753             # all formats are audio-only
1754             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1755
1756         ctx = {
1757             'formats': formats,
1758             'incomplete_formats': incomplete_formats,
1759         }
1760
1761         formats_to_download = list(format_selector(ctx))
1762         if not formats_to_download:
1763             raise ExtractorError('requested format not available',
1764                                  expected=True)
1765
1766         if download:
1767             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1768             if len(formats_to_download) > 1:
1769                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1770             for format in formats_to_download:
1771                 new_info = dict(info_dict)
1772                 new_info.update(format)
1773                 self.process_info(new_info)
1774         # We update the info dict with the best quality format (backwards compatibility)
1775         info_dict.update(formats_to_download[-1])
1776         return info_dict
1777
1778     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1779         """Select the requested subtitles and their format"""
1780         available_subs = {}
1781         if normal_subtitles and self.params.get('writesubtitles'):
1782             available_subs.update(normal_subtitles)
1783         if automatic_captions and self.params.get('writeautomaticsub'):
1784             for lang, cap_info in automatic_captions.items():
1785                 if lang not in available_subs:
1786                     available_subs[lang] = cap_info
1787
1788         if (not self.params.get('writesubtitles') and not
1789                 self.params.get('writeautomaticsub') or not
1790                 available_subs):
1791             return None
1792
1793         if self.params.get('allsubtitles', False):
1794             requested_langs = available_subs.keys()
1795         else:
1796             if self.params.get('subtitleslangs', False):
1797                 requested_langs = self.params.get('subtitleslangs')
1798             elif 'en' in available_subs:
1799                 requested_langs = ['en']
1800             else:
1801                 requested_langs = [list(available_subs.keys())[0]]
1802
1803         formats_query = self.params.get('subtitlesformat', 'best')
1804         formats_preference = formats_query.split('/') if formats_query else []
1805         subs = {}
1806         for lang in requested_langs:
1807             formats = available_subs.get(lang)
1808             if formats is None:
1809                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1810                 continue
1811             for ext in formats_preference:
1812                 if ext == 'best':
1813                     f = formats[-1]
1814                     break
1815                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1816                 if matches:
1817                     f = matches[-1]
1818                     break
1819             else:
1820                 f = formats[-1]
1821                 self.report_warning(
1822                     'No subtitle format found matching "%s" for language %s, '
1823                     'using %s' % (formats_query, lang, f['ext']))
1824             subs[lang] = f
1825         return subs
1826
1827     def __forced_printings(self, info_dict, filename, incomplete):
1828         def print_mandatory(field):
1829             if (self.params.get('force%s' % field, False)
1830                     and (not incomplete or info_dict.get(field) is not None)):
1831                 self.to_stdout(info_dict[field])
1832
1833         def print_optional(field):
1834             if (self.params.get('force%s' % field, False)
1835                     and info_dict.get(field) is not None):
1836                 self.to_stdout(info_dict[field])
1837
1838         print_mandatory('title')
1839         print_mandatory('id')
1840         if self.params.get('forceurl', False) and not incomplete:
1841             if info_dict.get('requested_formats') is not None:
1842                 for f in info_dict['requested_formats']:
1843                     self.to_stdout(f['url'] + f.get('play_path', ''))
1844             else:
1845                 # For RTMP URLs, also include the playpath
1846                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1847         print_optional('thumbnail')
1848         print_optional('description')
1849         if self.params.get('forcefilename', False) and filename is not None:
1850             self.to_stdout(filename)
1851         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1852             self.to_stdout(formatSeconds(info_dict['duration']))
1853         print_mandatory('format')
1854         if self.params.get('forcejson', False):
1855             self.to_stdout(json.dumps(info_dict))
1856
1857     def process_info(self, info_dict):
1858         """Process a single resolved IE result."""
1859
1860         assert info_dict.get('_type', 'video') == 'video'
1861
1862         max_downloads = self.params.get('max_downloads')
1863         if max_downloads is not None:
1864             if self._num_downloads >= int(max_downloads):
1865                 raise MaxDownloadsReached()
1866
1867         # TODO: backward compatibility, to be removed
1868         info_dict['fulltitle'] = info_dict['title']
1869
1870         if 'format' not in info_dict:
1871             info_dict['format'] = info_dict['ext']
1872
1873         reason = self._match_entry(info_dict, incomplete=False)
1874         if reason is not None:
1875             self.to_screen('[download] ' + reason)
1876             return
1877
1878         self._num_downloads += 1
1879
1880         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1881
1882         # Forced printings
1883         self.__forced_printings(info_dict, filename, incomplete=False)
1884
1885         if self.params.get('simulate', False):
1886             if self.params.get('force_write_download_archive', False):
1887                 self.record_download_archive(info_dict)
1888
1889             # Do nothing else if in simulate mode
1890             return
1891
1892         if filename is None:
1893             return
1894
1895         def ensure_dir_exists(path):
1896             try:
1897                 dn = os.path.dirname(path)
1898                 if dn and not os.path.exists(dn):
1899                     os.makedirs(dn)
1900                 return True
1901             except (OSError, IOError) as err:
1902                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1903                 return False
1904
1905         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1906             return
1907
1908         if self.params.get('writedescription', False):
1909             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1910             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1911                 self.to_screen('[info] Video description is already present')
1912             elif info_dict.get('description') is None:
1913                 self.report_warning('There\'s no description to write.')
1914             else:
1915                 try:
1916                     self.to_screen('[info] Writing video description to: ' + descfn)
1917                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1918                         descfile.write(info_dict['description'])
1919                 except (OSError, IOError):
1920                     self.report_error('Cannot write description file ' + descfn)
1921                     return
1922
1923         if self.params.get('writeannotations', False):
1924             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1925             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
1926                 self.to_screen('[info] Video annotations are already present')
1927             elif not info_dict.get('annotations'):
1928                 self.report_warning('There are no annotations to write.')
1929             else:
1930                 try:
1931                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1932                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1933                         annofile.write(info_dict['annotations'])
1934                 except (KeyError, TypeError):
1935                     self.report_warning('There are no annotations to write.')
1936                 except (OSError, IOError):
1937                     self.report_error('Cannot write annotations file: ' + annofn)
1938                     return
1939
1940         def dl(name, info, subtitle=False):
1941             fd = get_suitable_downloader(info, self.params)(self, self.params)
1942             for ph in self._progress_hooks:
1943                 fd.add_progress_hook(ph)
1944             if self.params.get('verbose'):
1945                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1946             return fd.download(name, info, subtitle)
1947
1948         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1949                                        self.params.get('writeautomaticsub')])
1950
1951         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1952             # subtitles download errors are already managed as troubles in relevant IE
1953             # that way it will silently go on when used with unsupporting IE
1954             subtitles = info_dict['requested_subtitles']
1955             # ie = self.get_info_extractor(info_dict['extractor_key'])
1956             for sub_lang, sub_info in subtitles.items():
1957                 sub_format = sub_info['ext']
1958                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1959                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
1960                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1961                 else:
1962                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1963                     if sub_info.get('data') is not None:
1964                         try:
1965                             # Use newline='' to prevent conversion of newline characters
1966                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1967                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1968                                 subfile.write(sub_info['data'])
1969                         except (OSError, IOError):
1970                             self.report_error('Cannot write subtitles file ' + sub_filename)
1971                             return
1972                     else:
1973                         try:
1974                             dl(sub_filename, sub_info, subtitle=True)
1975                             '''
1976                             if self.params.get('sleep_interval_subtitles', False):
1977                                 dl(sub_filename, sub_info)
1978                             else:
1979                                 sub_data = ie._request_webpage(
1980                                     sub_info['url'], info_dict['id'], note=False).read()
1981                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1982                                     subfile.write(sub_data)
1983                             '''
1984                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1985                             self.report_warning('Unable to download subtitle for "%s": %s' %
1986                                                 (sub_lang, error_to_compat_str(err)))
1987                             continue
1988
1989         if self.params.get('skip_download', False):
1990             if self.params.get('convertsubtitles', False):
1991                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1992                 filename_real_ext = os.path.splitext(filename)[1][1:]
1993                 filename_wo_ext = (
1994                     os.path.splitext(filename)[0]
1995                     if filename_real_ext == info_dict['ext']
1996                     else filename)
1997                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1998                 if subconv.available:
1999                     info_dict.setdefault('__postprocessors', [])
2000                     # info_dict['__postprocessors'].append(subconv)
2001                 if os.path.exists(encodeFilename(afilename)):
2002                     self.to_screen(
2003                         '[download] %s has already been downloaded and '
2004                         'converted' % afilename)
2005                 else:
2006                     try:
2007                         self.post_process(filename, info_dict)
2008                     except (PostProcessingError) as err:
2009                         self.report_error('postprocessing: %s' % str(err))
2010                         return
2011
2012         if self.params.get('writeinfojson', False):
2013             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
2014             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2015                 self.to_screen('[info] Video description metadata is already present')
2016             else:
2017                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
2018                 try:
2019                     write_json_file(self.filter_requested_info(info_dict), infofn)
2020                 except (OSError, IOError):
2021                     self.report_error('Cannot write metadata to JSON file ' + infofn)
2022                     return
2023
2024         self._write_thumbnails(info_dict, filename)
2025
2026         # Write internet shortcut files
2027         url_link = webloc_link = desktop_link = False
2028         if self.params.get('writelink', False):
2029             if sys.platform == "darwin":  # macOS.
2030                 webloc_link = True
2031             elif sys.platform.startswith("linux"):
2032                 desktop_link = True
2033             else:  # if sys.platform in ['win32', 'cygwin']:
2034                 url_link = True
2035         if self.params.get('writeurllink', False):
2036             url_link = True
2037         if self.params.get('writewebloclink', False):
2038             webloc_link = True
2039         if self.params.get('writedesktoplink', False):
2040             desktop_link = True
2041
2042         if url_link or webloc_link or desktop_link:
2043             if 'webpage_url' not in info_dict:
2044                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2045                 return
2046             ascii_url = iri_to_uri(info_dict['webpage_url'])
2047
2048         def _write_link_file(extension, template, newline, embed_filename):
2049             linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2050             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2051                 self.to_screen('[info] Internet shortcut is already present')
2052             else:
2053                 try:
2054                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2055                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2056                         template_vars = {'url': ascii_url}
2057                         if embed_filename:
2058                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2059                         linkfile.write(template % template_vars)
2060                 except (OSError, IOError):
2061                     self.report_error('Cannot write internet shortcut ' + linkfn)
2062                     return False
2063             return True
2064
2065         if url_link:
2066             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2067                 return
2068         if webloc_link:
2069             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2070                 return
2071         if desktop_link:
2072             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2073                 return
2074
2075         # Download
2076         must_record_download_archive = False
2077         if not self.params.get('skip_download', False):
2078             try:
2079                 if info_dict.get('requested_formats') is not None:
2080                     downloaded = []
2081                     success = True
2082                     merger = FFmpegMergerPP(self)
2083                     if not merger.available:
2084                         postprocessors = []
2085                         self.report_warning('You have requested multiple '
2086                                             'formats but ffmpeg or avconv are not installed.'
2087                                             ' The formats won\'t be merged.')
2088                     else:
2089                         postprocessors = [merger]
2090
2091                     def compatible_formats(formats):
2092                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2093                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2094                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2095                         if len(video_formats) > 2 or len(audio_formats) > 2:
2096                             return False
2097
2098                         # Check extension
2099                         exts = set(format.get('ext') for format in formats)
2100                         COMPATIBLE_EXTS = (
2101                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2102                             set(('webm',)),
2103                         )
2104                         for ext_sets in COMPATIBLE_EXTS:
2105                             if ext_sets.issuperset(exts):
2106                                 return True
2107                         # TODO: Check acodec/vcodec
2108                         return False
2109
2110                     filename_real_ext = os.path.splitext(filename)[1][1:]
2111                     filename_wo_ext = (
2112                         os.path.splitext(filename)[0]
2113                         if filename_real_ext == info_dict['ext']
2114                         else filename)
2115                     requested_formats = info_dict['requested_formats']
2116                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2117                         info_dict['ext'] = 'mkv'
2118                         self.report_warning(
2119                             'Requested formats are incompatible for merge and will be merged into mkv.')
2120                     # Ensure filename always has a correct extension for successful merge
2121                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2122                     file_exists = os.path.exists(encodeFilename(filename))
2123                     if not self.params.get('overwrites', False) and file_exists:
2124                         self.to_screen(
2125                             '[download] %s has already been downloaded and '
2126                             'merged' % filename)
2127                     else:
2128                         if file_exists:
2129                             self.report_file_delete(filename)
2130                             os.remove(encodeFilename(filename))
2131                         for f in requested_formats:
2132                             new_info = dict(info_dict)
2133                             new_info.update(f)
2134                             fname = prepend_extension(
2135                                 self.prepare_filename(new_info),
2136                                 'f%s' % f['format_id'], new_info['ext'])
2137                             if not ensure_dir_exists(fname):
2138                                 return
2139                             downloaded.append(fname)
2140                             partial_success, real_download = dl(fname, new_info)
2141                             success = success and partial_success
2142                         info_dict['__postprocessors'] = postprocessors
2143                         info_dict['__files_to_merge'] = downloaded
2144                         # Even if there were no downloads, it is being merged only now
2145                         info_dict['__real_download'] = True
2146                 else:
2147                     # Delete existing file with --yes-overwrites
2148                     if self.params.get('overwrites', False):
2149                         if os.path.exists(encodeFilename(filename)):
2150                             self.report_file_delete(filename)
2151                             os.remove(encodeFilename(filename))
2152                     # Just a single file
2153                     success, real_download = dl(filename, info_dict)
2154                     info_dict['__real_download'] = real_download
2155             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2156                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2157                 return
2158             except (OSError, IOError) as err:
2159                 raise UnavailableVideoError(err)
2160             except (ContentTooShortError, ) as err:
2161                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2162                 return
2163
2164             if success and filename != '-':
2165                 # Fixup content
2166                 fixup_policy = self.params.get('fixup')
2167                 if fixup_policy is None:
2168                     fixup_policy = 'detect_or_warn'
2169
2170                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2171
2172                 stretched_ratio = info_dict.get('stretched_ratio')
2173                 if stretched_ratio is not None and stretched_ratio != 1:
2174                     if fixup_policy == 'warn':
2175                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2176                             info_dict['id'], stretched_ratio))
2177                     elif fixup_policy == 'detect_or_warn':
2178                         stretched_pp = FFmpegFixupStretchedPP(self)
2179                         if stretched_pp.available:
2180                             info_dict.setdefault('__postprocessors', [])
2181                             info_dict['__postprocessors'].append(stretched_pp)
2182                         else:
2183                             self.report_warning(
2184                                 '%s: Non-uniform pixel ratio (%s). %s'
2185                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2186                     else:
2187                         assert fixup_policy in ('ignore', 'never')
2188
2189                 if (info_dict.get('requested_formats') is None
2190                         and info_dict.get('container') == 'm4a_dash'):
2191                     if fixup_policy == 'warn':
2192                         self.report_warning(
2193                             '%s: writing DASH m4a. '
2194                             'Only some players support this container.'
2195                             % info_dict['id'])
2196                     elif fixup_policy == 'detect_or_warn':
2197                         fixup_pp = FFmpegFixupM4aPP(self)
2198                         if fixup_pp.available:
2199                             info_dict.setdefault('__postprocessors', [])
2200                             info_dict['__postprocessors'].append(fixup_pp)
2201                         else:
2202                             self.report_warning(
2203                                 '%s: writing DASH m4a. '
2204                                 'Only some players support this container. %s'
2205                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2206                     else:
2207                         assert fixup_policy in ('ignore', 'never')
2208
2209                 if (info_dict.get('protocol') == 'm3u8_native'
2210                         or info_dict.get('protocol') == 'm3u8'
2211                         and self.params.get('hls_prefer_native')):
2212                     if fixup_policy == 'warn':
2213                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2214                             info_dict['id']))
2215                     elif fixup_policy == 'detect_or_warn':
2216                         fixup_pp = FFmpegFixupM3u8PP(self)
2217                         if fixup_pp.available:
2218                             info_dict.setdefault('__postprocessors', [])
2219                             info_dict['__postprocessors'].append(fixup_pp)
2220                         else:
2221                             self.report_warning(
2222                                 '%s: malformed AAC bitstream detected. %s'
2223                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2224                     else:
2225                         assert fixup_policy in ('ignore', 'never')
2226
2227                 try:
2228                     self.post_process(filename, info_dict)
2229                 except (PostProcessingError) as err:
2230                     self.report_error('postprocessing: %s' % str(err))
2231                     return
2232                 try:
2233                     for ph in self._post_hooks:
2234                         ph(filename)
2235                 except Exception as err:
2236                     self.report_error('post hooks: %s' % str(err))
2237                     return
2238                 must_record_download_archive = True
2239
2240         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2241             self.record_download_archive(info_dict)
2242         max_downloads = self.params.get('max_downloads')
2243         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2244             raise MaxDownloadsReached()
2245
2246     def download(self, url_list):
2247         """Download a given list of URLs."""
2248         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2249         if (len(url_list) > 1
2250                 and outtmpl != '-'
2251                 and '%' not in outtmpl
2252                 and self.params.get('max_downloads') != 1):
2253             raise SameFileError(outtmpl)
2254
2255         for url in url_list:
2256             try:
2257                 # It also downloads the videos
2258                 res = self.extract_info(
2259                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2260             except UnavailableVideoError:
2261                 self.report_error('unable to download video')
2262             except MaxDownloadsReached:
2263                 self.to_screen('[info] Maximum number of downloaded files reached.')
2264                 raise
2265             else:
2266                 if self.params.get('dump_single_json', False):
2267                     self.to_stdout(json.dumps(res))
2268
2269         return self._download_retcode
2270
2271     def download_with_info_file(self, info_filename):
2272         with contextlib.closing(fileinput.FileInput(
2273                 [info_filename], mode='r',
2274                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2275             # FileInput doesn't have a read method, we can't call json.load
2276             info = self.filter_requested_info(json.loads('\n'.join(f)))
2277         try:
2278             self.process_ie_result(info, download=True)
2279         except DownloadError:
2280             webpage_url = info.get('webpage_url')
2281             if webpage_url is not None:
2282                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2283                 return self.download([webpage_url])
2284             else:
2285                 raise
2286         return self._download_retcode
2287
2288     @staticmethod
2289     def filter_requested_info(info_dict):
2290         return dict(
2291             (k, v) for k, v in info_dict.items()
2292             if k not in ['requested_formats', 'requested_subtitles'])
2293
2294     def post_process(self, filename, ie_info):
2295         """Run all the postprocessors on the given file."""
2296         info = dict(ie_info)
2297         info['filepath'] = filename
2298         pps_chain = []
2299         if ie_info.get('__postprocessors') is not None:
2300             pps_chain.extend(ie_info['__postprocessors'])
2301         pps_chain.extend(self._pps)
2302         for pp in pps_chain:
2303             files_to_delete = []
2304             try:
2305                 files_to_delete, info = pp.run(info)
2306             except PostProcessingError as e:
2307                 self.report_error(e.msg)
2308             if files_to_delete and not self.params.get('keepvideo', False):
2309                 for old_filename in set(files_to_delete):
2310                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2311                     try:
2312                         os.remove(encodeFilename(old_filename))
2313                     except (IOError, OSError):
2314                         self.report_warning('Unable to remove downloaded original file')
2315
2316     def _make_archive_id(self, info_dict):
2317         video_id = info_dict.get('id')
2318         if not video_id:
2319             return
2320         # Future-proof against any change in case
2321         # and backwards compatibility with prior versions
2322         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2323         if extractor is None:
2324             url = str_or_none(info_dict.get('url'))
2325             if not url:
2326                 return
2327             # Try to find matching extractor for the URL and take its ie_key
2328             for ie in self._ies:
2329                 if ie.suitable(url):
2330                     extractor = ie.ie_key()
2331                     break
2332             else:
2333                 return
2334         return extractor.lower() + ' ' + video_id
2335
2336     def in_download_archive(self, info_dict):
2337         fn = self.params.get('download_archive')
2338         if fn is None:
2339             return False
2340
2341         vid_id = self._make_archive_id(info_dict)
2342         if not vid_id:
2343             return False  # Incomplete video information
2344
2345         return vid_id in self.archive
2346
2347     def record_download_archive(self, info_dict):
2348         fn = self.params.get('download_archive')
2349         if fn is None:
2350             return
2351         vid_id = self._make_archive_id(info_dict)
2352         assert vid_id
2353         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2354             archive_file.write(vid_id + '\n')
2355         self.archive.add(vid_id)
2356
2357     @staticmethod
2358     def format_resolution(format, default='unknown'):
2359         if format.get('vcodec') == 'none':
2360             return 'audio only'
2361         if format.get('resolution') is not None:
2362             return format['resolution']
2363         if format.get('height') is not None:
2364             if format.get('width') is not None:
2365                 res = '%sx%s' % (format['width'], format['height'])
2366             else:
2367                 res = '%sp' % format['height']
2368         elif format.get('width') is not None:
2369             res = '%dx?' % format['width']
2370         else:
2371             res = default
2372         return res
2373
2374     def _format_note(self, fdict):
2375         res = ''
2376         if fdict.get('ext') in ['f4f', 'f4m']:
2377             res += '(unsupported) '
2378         if fdict.get('language'):
2379             if res:
2380                 res += ' '
2381             res += '[%s] ' % fdict['language']
2382         if fdict.get('format_note') is not None:
2383             res += fdict['format_note'] + ' '
2384         if fdict.get('tbr') is not None:
2385             res += '%4dk ' % fdict['tbr']
2386         if fdict.get('container') is not None:
2387             if res:
2388                 res += ', '
2389             res += '%s container' % fdict['container']
2390         if (fdict.get('vcodec') is not None
2391                 and fdict.get('vcodec') != 'none'):
2392             if res:
2393                 res += ', '
2394             res += fdict['vcodec']
2395             if fdict.get('vbr') is not None:
2396                 res += '@'
2397         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2398             res += 'video@'
2399         if fdict.get('vbr') is not None:
2400             res += '%4dk' % fdict['vbr']
2401         if fdict.get('fps') is not None:
2402             if res:
2403                 res += ', '
2404             res += '%sfps' % fdict['fps']
2405         if fdict.get('acodec') is not None:
2406             if res:
2407                 res += ', '
2408             if fdict['acodec'] == 'none':
2409                 res += 'video only'
2410             else:
2411                 res += '%-5s' % fdict['acodec']
2412         elif fdict.get('abr') is not None:
2413             if res:
2414                 res += ', '
2415             res += 'audio'
2416         if fdict.get('abr') is not None:
2417             res += '@%3dk' % fdict['abr']
2418         if fdict.get('asr') is not None:
2419             res += ' (%5dHz)' % fdict['asr']
2420         if fdict.get('filesize') is not None:
2421             if res:
2422                 res += ', '
2423             res += format_bytes(fdict['filesize'])
2424         elif fdict.get('filesize_approx') is not None:
2425             if res:
2426                 res += ', '
2427             res += '~' + format_bytes(fdict['filesize_approx'])
2428         return res
2429
2430     def _format_note_table(self, f):
2431         def join_fields(*vargs):
2432             return ', '.join((val for val in vargs if val != ''))
2433
2434         return join_fields(
2435             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2436             format_field(f, 'language', '[%s]'),
2437             format_field(f, 'format_note'),
2438             format_field(f, 'container', ignore=(None, f.get('ext'))),
2439             format_field(f, 'asr', '%5dHz'))
2440
2441     def list_formats(self, info_dict):
2442         formats = info_dict.get('formats', [info_dict])
2443         new_format = self.params.get('listformats_table', False)
2444         if new_format:
2445             table = [
2446                 [
2447                     format_field(f, 'format_id'),
2448                     format_field(f, 'ext'),
2449                     self.format_resolution(f),
2450                     format_field(f, 'fps', '%d'),
2451                     '|',
2452                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2453                     format_field(f, 'tbr', '%4dk'),
2454                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2455                     '|',
2456                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2457                     format_field(f, 'vbr', '%4dk'),
2458                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2459                     format_field(f, 'abr', '%3dk'),
2460                     format_field(f, 'asr', '%5dHz'),
2461                     self._format_note_table(f)]
2462                 for f in formats
2463                 if f.get('preference') is None or f['preference'] >= -1000]
2464             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2465                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2466         else:
2467             table = [
2468                 [
2469                     format_field(f, 'format_id'),
2470                     format_field(f, 'ext'),
2471                     self.format_resolution(f),
2472                     self._format_note(f)]
2473                 for f in formats
2474                 if f.get('preference') is None or f['preference'] >= -1000]
2475             header_line = ['format code', 'extension', 'resolution', 'note']
2476
2477         # if len(formats) > 1:
2478         #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2479         self.to_screen(
2480             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2481                 header_line,
2482                 table,
2483                 delim=new_format,
2484                 extraGap=(0 if new_format else 1),
2485                 hideEmpty=new_format)))
2486
2487     def list_thumbnails(self, info_dict):
2488         thumbnails = info_dict.get('thumbnails')
2489         if not thumbnails:
2490             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2491             return
2492
2493         self.to_screen(
2494             '[info] Thumbnails for %s:' % info_dict['id'])
2495         self.to_screen(render_table(
2496             ['ID', 'width', 'height', 'URL'],
2497             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2498
2499     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2500         if not subtitles:
2501             self.to_screen('%s has no %s' % (video_id, name))
2502             return
2503         self.to_screen(
2504             'Available %s for %s:' % (name, video_id))
2505         self.to_screen(render_table(
2506             ['Language', 'formats'],
2507             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2508                 for lang, formats in subtitles.items()]))
2509
2510     def urlopen(self, req):
2511         """ Start an HTTP download """
2512         if isinstance(req, compat_basestring):
2513             req = sanitized_Request(req)
2514         return self._opener.open(req, timeout=self._socket_timeout)
2515
2516     def print_debug_header(self):
2517         if not self.params.get('verbose'):
2518             return
2519
2520         if type('') is not compat_str:
2521             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2522             self.report_warning(
2523                 'Your Python is broken! Update to a newer and supported version')
2524
2525         stdout_encoding = getattr(
2526             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2527         encoding_str = (
2528             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2529                 locale.getpreferredencoding(),
2530                 sys.getfilesystemencoding(),
2531                 stdout_encoding,
2532                 self.get_encoding()))
2533         write_string(encoding_str, encoding=None)
2534
2535         self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
2536         if _LAZY_LOADER:
2537             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2538         try:
2539             sp = subprocess.Popen(
2540                 ['git', 'rev-parse', '--short', 'HEAD'],
2541                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2542                 cwd=os.path.dirname(os.path.abspath(__file__)))
2543             out, err = process_communicate_or_kill(sp)
2544             out = out.decode().strip()
2545             if re.match('[0-9a-f]+', out):
2546                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2547         except Exception:
2548             try:
2549                 sys.exc_clear()
2550             except Exception:
2551                 pass
2552
2553         def python_implementation():
2554             impl_name = platform.python_implementation()
2555             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2556                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2557             return impl_name
2558
2559         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2560             platform.python_version(), python_implementation(),
2561             platform_name()))
2562
2563         exe_versions = FFmpegPostProcessor.get_versions(self)
2564         exe_versions['rtmpdump'] = rtmpdump_version()
2565         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2566         exe_str = ', '.join(
2567             '%s %s' % (exe, v)
2568             for exe, v in sorted(exe_versions.items())
2569             if v
2570         )
2571         if not exe_str:
2572             exe_str = 'none'
2573         self._write_string('[debug] exe versions: %s\n' % exe_str)
2574
2575         proxy_map = {}
2576         for handler in self._opener.handlers:
2577             if hasattr(handler, 'proxies'):
2578                 proxy_map.update(handler.proxies)
2579         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2580
2581         if self.params.get('call_home', False):
2582             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2583             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2584             return
2585             latest_version = self.urlopen(
2586                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2587             if version_tuple(latest_version) > version_tuple(__version__):
2588                 self.report_warning(
2589                     'You are using an outdated version (newest version: %s)! '
2590                     'See https://yt-dl.org/update if you need help updating.' %
2591                     latest_version)
2592
2593     def _setup_opener(self):
2594         timeout_val = self.params.get('socket_timeout')
2595         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2596
2597         opts_cookiefile = self.params.get('cookiefile')
2598         opts_proxy = self.params.get('proxy')
2599
2600         if opts_cookiefile is None:
2601             self.cookiejar = compat_cookiejar.CookieJar()
2602         else:
2603             opts_cookiefile = expand_path(opts_cookiefile)
2604             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2605             if os.access(opts_cookiefile, os.R_OK):
2606                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2607
2608         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2609         if opts_proxy is not None:
2610             if opts_proxy == '':
2611                 proxies = {}
2612             else:
2613                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2614         else:
2615             proxies = compat_urllib_request.getproxies()
2616             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2617             if 'http' in proxies and 'https' not in proxies:
2618                 proxies['https'] = proxies['http']
2619         proxy_handler = PerRequestProxyHandler(proxies)
2620
2621         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2622         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2623         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2624         redirect_handler = YoutubeDLRedirectHandler()
2625         data_handler = compat_urllib_request_DataHandler()
2626
2627         # When passing our own FileHandler instance, build_opener won't add the
2628         # default FileHandler and allows us to disable the file protocol, which
2629         # can be used for malicious purposes (see
2630         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2631         file_handler = compat_urllib_request.FileHandler()
2632
2633         def file_open(*args, **kwargs):
2634             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2635         file_handler.file_open = file_open
2636
2637         opener = compat_urllib_request.build_opener(
2638             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2639
2640         # Delete the default user-agent header, which would otherwise apply in
2641         # cases where our custom HTTP handler doesn't come into play
2642         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2643         opener.addheaders = []
2644         self._opener = opener
2645
2646     def encode(self, s):
2647         if isinstance(s, bytes):
2648             return s  # Already encoded
2649
2650         try:
2651             return s.encode(self.get_encoding())
2652         except UnicodeEncodeError as err:
2653             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2654             raise
2655
2656     def get_encoding(self):
2657         encoding = self.params.get('encoding')
2658         if encoding is None:
2659             encoding = preferredencoding()
2660         return encoding
2661
2662     def _write_thumbnails(self, info_dict, filename):
2663         if self.params.get('writethumbnail', False):
2664             thumbnails = info_dict.get('thumbnails')
2665             if thumbnails:
2666                 thumbnails = [thumbnails[-1]]
2667         elif self.params.get('write_all_thumbnails', False):
2668             thumbnails = info_dict.get('thumbnails')
2669         else:
2670             return
2671
2672         if not thumbnails:
2673             # No thumbnails present, so return immediately
2674             return
2675
2676         for t in thumbnails:
2677             thumb_ext = determine_ext(t['url'], 'jpg')
2678             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2679             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2680             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2681
2682             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2683                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2684                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2685             else:
2686                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2687                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2688                 try:
2689                     uf = self.urlopen(t['url'])
2690                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2691                         shutil.copyfileobj(uf, thumbf)
2692                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2693                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2694                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2695                     self.report_warning('Unable to download thumbnail "%s": %s' %
2696                                         (t['url'], error_to_compat_str(err)))