youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     error_to_compat_str,
  61     expand_path,
  62     ExtractorError,
  63     format_bytes,
  64     format_field,
  65     formatSeconds,
  66     GeoRestrictedError,
  67     int_or_none,
  68     iri_to_uri,
  69     ISO3166Utils,
  70     locked_file,
  71     make_HTTPS_handler,
  72     MaxDownloadsReached,
  73     orderedSet,
  74     PagedList,
  75     parse_filesize,
  76     PerRequestProxyHandler,
  77     platform_name,
  78     PostProcessingError,
  79     preferredencoding,
  80     prepend_extension,
  81     register_socks_protocols,
  82     render_table,
  83     replace_extension,
  84     SameFileError,
  85     sanitize_filename,
  86     sanitize_path,
  87     sanitize_url,
  88     sanitized_Request,
  89     std_headers,
  90     str_or_none,
  91     subtitles_filename,
  92     to_high_limit_path,
  93     UnavailableVideoError,
  94     url_basename,
  95     version_tuple,
  96     write_json_file,
  97     write_string,
  98     YoutubeDLCookieJar,
  99     YoutubeDLCookieProcessor,
 100     YoutubeDLHandler,
 101     YoutubeDLRedirectHandler,
 102 )
 103 from .cache import Cache
 104 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 105 from .extractor.openload import PhantomJSwrapper
 106 from .downloader import get_suitable_downloader
 107 from .downloader.rtmp import rtmpdump_version
 108 from .postprocessor import (
 109     FFmpegFixupM3u8PP,
 110     FFmpegFixupM4aPP,
 111     FFmpegFixupStretchedPP,
 112     FFmpegMergerPP,
 113     FFmpegPostProcessor,
 114     FFmpegSubtitlesConvertorPP,
 115     get_postprocessor,
 116 )
 117 from .version import __version__
 118
 119 if compat_os_name == 'nt':
 120     import ctypes
 121
 122
 123 class YoutubeDL(object):
 124     """YoutubeDL class.
 125
 126     YoutubeDL objects are the ones responsible of downloading the
 127     actual video file and writing it to disk if the user has requested
 128     it, among some other tasks. In most cases there should be one per
 129     program. As, given a video URL, the downloader doesn't know how to
 130     extract all the needed information, task that InfoExtractors do, it
 131     has to pass the URL to one of them.
 132
 133     For this, YoutubeDL objects have a method that allows
 134     InfoExtractors to be registered in a given order. When it is passed
 135     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 136     finds that reports being able to handle it. The InfoExtractor extracts
 137     all the information about the video or videos the URL refers to, and
 138     YoutubeDL process the extracted information, possibly using a File
 139     Downloader to download the video.
 140
 141     YoutubeDL objects accept a lot of parameters. In order not to saturate
 142     the object constructor with arguments, it receives a dictionary of
 143     options instead. These options are available through the params
 144     attribute for the InfoExtractors to use. The YoutubeDL also
 145     registers itself as the downloader in charge for the InfoExtractors
 146     that are added to it, so this is a "mutual registration".
 147
 148     Available options:
 149
 150     username:          Username for authentication purposes.
 151     password:          Password for authentication purposes.
 152     videopassword:     Password for accessing a video.
 153     ap_mso:            Adobe Pass multiple-system operator identifier.
 154     ap_username:       Multiple-system operator account username.
 155     ap_password:       Multiple-system operator account password.
 156     usenetrc:          Use netrc for authentication instead.
 157     verbose:           Print additional info to stdout.
 158     quiet:             Do not print messages to stdout.
 159     no_warnings:       Do not print out anything for warnings.
 160     forceurl:          Force printing final URL.
 161     forcetitle:        Force printing title.
 162     forceid:           Force printing ID.
 163     forcethumbnail:    Force printing thumbnail URL.
 164     forcedescription:  Force printing description.
 165     forcefilename:     Force printing final filename.
 166     forceduration:     Force printing duration.
 167     forcejson:         Force printing info_dict as JSON.
 168     dump_single_json:  Force printing the info_dict of the whole playlist
 169                        (or video) as a single JSON line.
 170     force_write_download_archive: Force writing download archive regardless of
 171                        'skip_download' or 'simulate'.
 172     simulate:          Do not download the video files.
 173     format:            Video format code. see "FORMAT SELECTION" for more details.
 174     format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
 175     format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
 176     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
 177     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
 178     outtmpl:           Template for output names.
 179     restrictfilenames: Do not allow "&" and spaces in file names.
 180     trim_file_name:    Limit length of filename (extension excluded).
 181     ignoreerrors:      Do not stop on download errors. (Default False when running youtube-dlc, but True when directly accessing YoutubeDL class)
 182     force_generic_extractor: Force downloader to use the generic extractor
 183     nooverwrites:      Prevent overwriting files.
 184     playliststart:     Playlist item to start at.
 185     playlistend:       Playlist item to end at.
 186     playlist_items:    Specific indices of playlist to download.
 187     playlistreverse:   Download playlist items in reverse order.
 188     playlistrandom:    Download playlist items in random order.
 189     matchtitle:        Download only matching titles.
 190     rejecttitle:       Reject downloads for matching titles.
 191     logger:            Log messages to a logging.Logger instance.
 192     logtostderr:       Log messages to stderr instead of stdout.
 193     writedescription:  Write the video description to a .description file
 194     writeinfojson:     Write the video description to a .info.json file
 195     writeannotations:  Write the video annotations to a .annotations.xml file
 196     writethumbnail:    Write the thumbnail image to a file
 197     write_all_thumbnails:  Write all thumbnail formats to files
 198     writelink:         Write an internet shortcut file, depending on the
 199                        current platform (.url/.webloc/.desktop)
 200     writeurllink:      Write a Windows internet shortcut file (.url)
 201     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 202     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 203     writesubtitles:    Write the video subtitles to a file
 204     writeautomaticsub: Write the automatically generated subtitles to a file
 205     allsubtitles:      Downloads all the subtitles of the video
 206                        (requires writesubtitles or writeautomaticsub)
 207     listsubtitles:     Lists all available subtitles for the video
 208     subtitlesformat:   The format code for subtitles
 209     subtitleslangs:    List of languages of the subtitles to download
 210     keepvideo:         Keep the video file after post-processing
 211     daterange:         A DateRange object, download only if the upload_date is in the range.
 212     skip_download:     Skip the actual download of the video file
 213     cachedir:          Location of the cache files in the filesystem.
 214                        False to disable filesystem cache.
 215     noplaylist:        Download single video instead of a playlist if in doubt.
 216     age_limit:         An integer representing the user's age in years.
 217                        Unsuitable videos for the given age are skipped.
 218     min_views:         An integer representing the minimum view count the video
 219                        must have in order to not be skipped.
 220                        Videos without view count information are always
 221                        downloaded. None for no limit.
 222     max_views:         An integer representing the maximum view count.
 223                        Videos that are more popular than that are not
 224                        downloaded.
 225                        Videos without view count information are always
 226                        downloaded. None for no limit.
 227     download_archive:  File name of a file where all downloads are recorded.
 228                        Videos already present in the file are not downloaded
 229                        again.
 230     break_on_existing: Stop the download process after attempting to download a file that's
 231                        in the archive.
 232     cookiefile:        File name where cookies should be read from and dumped to.
 233     nocheckcertificate:Do not verify SSL certificates
 234     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 235                        At the moment, this is only supported by YouTube.
 236     proxy:             URL of the proxy server to use
 237     geo_verification_proxy:  URL of the proxy to use for IP address verification
 238                        on geo-restricted sites.
 239     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 240     bidi_workaround:   Work around buggy terminals without bidirectional text
 241                        support, using fridibi
 242     debug_printtraffic:Print out sent and received HTTP traffic
 243     include_ads:       Download ads as well
 244     default_search:    Prepend this string if an input url is not valid.
 245                        'auto' for elaborate guessing
 246     encoding:          Use this encoding instead of the system-specified.
 247     extract_flat:      Do not resolve URLs, return the immediate result.
 248                        Pass in 'in_playlist' to only show this behavior for
 249                        playlist items.
 250     postprocessors:    A list of dictionaries, each with an entry
 251                        * key:  The name of the postprocessor. See
 252                                youtube_dlc/postprocessor/__init__.py for a list.
 253                        as well as any further keyword arguments for the
 254                        postprocessor.
 255     post_hooks:        A list of functions that get called as the final step
 256                        for each video file, after all postprocessors have been
 257                        called. The filename will be passed as the only argument.
 258     progress_hooks:    A list of functions that get called on download
 259                        progress, with a dictionary with the entries
 260                        * status: One of "downloading", "error", or "finished".
 261                                  Check this first and ignore unknown values.
 262
 263                        If status is one of "downloading", or "finished", the
 264                        following properties may also be present:
 265                        * filename: The final filename (always present)
 266                        * tmpfilename: The filename we're currently writing to
 267                        * downloaded_bytes: Bytes on disk
 268                        * total_bytes: Size of the whole file, None if unknown
 269                        * total_bytes_estimate: Guess of the eventual file size,
 270                                                None if unavailable.
 271                        * elapsed: The number of seconds since download started.
 272                        * eta: The estimated time in seconds, None if unknown
 273                        * speed: The download speed in bytes/second, None if
 274                                 unknown
 275                        * fragment_index: The counter of the currently
 276                                          downloaded video fragment.
 277                        * fragment_count: The number of fragments (= individual
 278                                          files that will be merged)
 279
 280                        Progress hooks are guaranteed to be called at least once
 281                        (with status "finished") if the download is successful.
 282     merge_output_format: Extension to use when merging formats.
 283     fixup:             Automatically correct known faults of the file.
 284                        One of:
 285                        - "never": do nothing
 286                        - "warn": only emit a warning
 287                        - "detect_or_warn": check whether we can do anything
 288                                            about it, warn otherwise (default)
 289     source_address:    Client-side IP address to bind to.
 290     call_home:         Boolean, true iff we are allowed to contact the
 291                        youtube-dlc servers for debugging.
 292     sleep_interval:    Number of seconds to sleep before each download when
 293                        used alone or a lower bound of a range for randomized
 294                        sleep before each download (minimum possible number
 295                        of seconds to sleep) when used along with
 296                        max_sleep_interval.
 297     max_sleep_interval:Upper bound of a range for randomized sleep before each
 298                        download (maximum possible number of seconds to sleep).
 299                        Must only be used along with sleep_interval.
 300                        Actual sleep time will be a random float from range
 301                        [sleep_interval; max_sleep_interval].
 302     listformats:       Print an overview of available video formats and exit.
 303     list_thumbnails:   Print a table of all thumbnails and exit.
 304     match_filter:      A function that gets called with the info_dict of
 305                        every video.
 306                        If it returns a message, the video is ignored.
 307                        If it returns None, the video is downloaded.
 308                        match_filter_func in utils.py is one example for this.
 309     no_color:          Do not emit color codes in output.
 310     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 311                        HTTP header
 312     geo_bypass_country:
 313                        Two-letter ISO 3166-2 country code that will be used for
 314                        explicit geographic restriction bypassing via faking
 315                        X-Forwarded-For HTTP header
 316     geo_bypass_ip_block:
 317                        IP range in CIDR notation that will be used similarly to
 318                        geo_bypass_country
 319
 320     The following options determine which downloader is picked:
 321     external_downloader: Executable of the external downloader to call.
 322                        None or unset for standard (built-in) downloader.
 323     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 324                        if True, otherwise use ffmpeg/avconv if False, otherwise
 325                        use downloader suggested by extractor if None.
 326
 327     The following parameters are not used by YoutubeDL itself, they are used by
 328     the downloader (see youtube_dlc/downloader/common.py):
 329     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 330     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 331     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 332     http_chunk_size.
 333
 334     The following options are used by the post processors:
 335     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 336                        otherwise prefer ffmpeg.
 337     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 338                        to the binary or its containing directory.
 339     postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
 340                         of additional command-line arguments for the postprocessor.
 341                         Use 'default' as the name for arguments to passed to all PP.
 342
 343     The following options are used by the Youtube extractor:
 344     youtube_include_dash_manifest: If True (default), DASH manifests and related
 345                         data will be downloaded and processed by extractor.
 346                         You can reduce network I/O by disabling it if you don't
 347                         care about DASH.
 348     """
 349
 350     _NUMERIC_FIELDS = set((
 351         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 352         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 353         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 354         'average_rating', 'comment_count', 'age_limit',
 355         'start_time', 'end_time',
 356         'chapter_number', 'season_number', 'episode_number',
 357         'track_number', 'disc_number', 'release_year',
 358         'playlist_index',
 359     ))
 360
 361     params = None
 362     _ies = []
 363     _pps = []
 364     _download_retcode = None
 365     _num_downloads = None
 366     _screen_file = None
 367
 368     def __init__(self, params=None, auto_init=True):
 369         """Create a FileDownloader object with the given options."""
 370         if params is None:
 371             params = {}
 372         self._ies = []
 373         self._ies_instances = {}
 374         self._pps = []
 375         self._post_hooks = []
 376         self._progress_hooks = []
 377         self._download_retcode = 0
 378         self._num_downloads = 0
 379         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 380         self._err_file = sys.stderr
 381         self.params = {
 382             # Default parameters
 383             'nocheckcertificate': False,
 384         }
 385         self.params.update(params)
 386         self.cache = Cache(self)
 387         self.archive = set()
 388
 389         """Preload the archive, if any is specified"""
 390         def preload_download_archive(self):
 391             fn = self.params.get('download_archive')
 392             if fn is None:
 393                 return False
 394             try:
 395                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 396                     for line in archive_file:
 397                         self.archive.add(line.strip())
 398             except IOError as ioe:
 399                 if ioe.errno != errno.ENOENT:
 400                     raise
 401                 return False
 402             return True
 403
 404         def check_deprecated(param, option, suggestion):
 405             if self.params.get(param) is not None:
 406                 self.report_warning(
 407                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 408                 return True
 409             return False
 410
 411         if self.params.get('verbose'):
 412             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 413
 414         preload_download_archive(self)
 415
 416         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 417             if self.params.get('geo_verification_proxy') is None:
 418                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 419
 420         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 421         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 422         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 423
 424         if params.get('bidi_workaround', False):
 425             try:
 426                 import pty
 427                 master, slave = pty.openpty()
 428                 width = compat_get_terminal_size().columns
 429                 if width is None:
 430                     width_args = []
 431                 else:
 432                     width_args = ['-w', str(width)]
 433                 sp_kwargs = dict(
 434                     stdin=subprocess.PIPE,
 435                     stdout=slave,
 436                     stderr=self._err_file)
 437                 try:
 438                     self._output_process = subprocess.Popen(
 439                         ['bidiv'] + width_args, **sp_kwargs
 440                     )
 441                 except OSError:
 442                     self._output_process = subprocess.Popen(
 443                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 444                 self._output_channel = os.fdopen(master, 'rb')
 445             except OSError as ose:
 446                 if ose.errno == errno.ENOENT:
 447                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 448                 else:
 449                     raise
 450
 451         if (sys.platform != 'win32'
 452                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 453                 and not params.get('restrictfilenames', False)):
 454             # Unicode filesystem API will throw errors (#1474, #13027)
 455             self.report_warning(
 456                 'Assuming --restrict-filenames since file system encoding '
 457                 'cannot encode all characters. '
 458                 'Set the LC_ALL environment variable to fix this.')
 459             self.params['restrictfilenames'] = True
 460
 461         if isinstance(params.get('outtmpl'), bytes):
 462             self.report_warning(
 463                 'Parameter outtmpl is bytes, but should be a unicode string. '
 464                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 465
 466         self._setup_opener()
 467
 468         if auto_init:
 469             self.print_debug_header()
 470             self.add_default_info_extractors()
 471
 472         for pp_def_raw in self.params.get('postprocessors', []):
 473             pp_class = get_postprocessor(pp_def_raw['key'])
 474             pp_def = dict(pp_def_raw)
 475             del pp_def['key']
 476             pp = pp_class(self, **compat_kwargs(pp_def))
 477             self.add_post_processor(pp)
 478
 479         for ph in self.params.get('post_hooks', []):
 480             self.add_post_hook(ph)
 481
 482         for ph in self.params.get('progress_hooks', []):
 483             self.add_progress_hook(ph)
 484
 485         register_socks_protocols()
 486
 487     def warn_if_short_id(self, argv):
 488         # short YouTube ID starting with dash?
 489         idxs = [
 490             i for i, a in enumerate(argv)
 491             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 492         if idxs:
 493             correct_argv = (
 494                 ['youtube-dlc']
 495                 + [a for i, a in enumerate(argv) if i not in idxs]
 496                 + ['--'] + [argv[i] for i in idxs]
 497             )
 498             self.report_warning(
 499                 'Long argument string detected. '
 500                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 501                 args_to_str(correct_argv))
 502
 503     def add_info_extractor(self, ie):
 504         """Add an InfoExtractor object to the end of the list."""
 505         self._ies.append(ie)
 506         if not isinstance(ie, type):
 507             self._ies_instances[ie.ie_key()] = ie
 508             ie.set_downloader(self)
 509
 510     def get_info_extractor(self, ie_key):
 511         """
 512         Get an instance of an IE with name ie_key, it will try to get one from
 513         the _ies list, if there's no instance it will create a new one and add
 514         it to the extractor list.
 515         """
 516         ie = self._ies_instances.get(ie_key)
 517         if ie is None:
 518             ie = get_info_extractor(ie_key)()
 519             self.add_info_extractor(ie)
 520         return ie
 521
 522     def add_default_info_extractors(self):
 523         """
 524         Add the InfoExtractors returned by gen_extractors to the end of the list
 525         """
 526         for ie in gen_extractor_classes():
 527             self.add_info_extractor(ie)
 528
 529     def add_post_processor(self, pp):
 530         """Add a PostProcessor object to the end of the chain."""
 531         self._pps.append(pp)
 532         pp.set_downloader(self)
 533
 534     def add_post_hook(self, ph):
 535         """Add the post hook"""
 536         self._post_hooks.append(ph)
 537
 538     def add_progress_hook(self, ph):
 539         """Add the progress hook (currently only for the file downloader)"""
 540         self._progress_hooks.append(ph)
 541
 542     def _bidi_workaround(self, message):
 543         if not hasattr(self, '_output_channel'):
 544             return message
 545
 546         assert hasattr(self, '_output_process')
 547         assert isinstance(message, compat_str)
 548         line_count = message.count('\n') + 1
 549         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 550         self._output_process.stdin.flush()
 551         res = ''.join(self._output_channel.readline().decode('utf-8')
 552                       for _ in range(line_count))
 553         return res[:-len('\n')]
 554
 555     def to_screen(self, message, skip_eol=False):
 556         """Print message to stdout if not in quiet mode."""
 557         return self.to_stdout(message, skip_eol, check_quiet=True)
 558
 559     def _write_string(self, s, out=None):
 560         write_string(s, out=out, encoding=self.params.get('encoding'))
 561
 562     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 563         """Print message to stdout if not in quiet mode."""
 564         if self.params.get('logger'):
 565             self.params['logger'].debug(message)
 566         elif not check_quiet or not self.params.get('quiet', False):
 567             message = self._bidi_workaround(message)
 568             terminator = ['\n', ''][skip_eol]
 569             output = message + terminator
 570
 571             self._write_string(output, self._screen_file)
 572
 573     def to_stderr(self, message):
 574         """Print message to stderr."""
 575         assert isinstance(message, compat_str)
 576         if self.params.get('logger'):
 577             self.params['logger'].error(message)
 578         else:
 579             message = self._bidi_workaround(message)
 580             output = message + '\n'
 581             self._write_string(output, self._err_file)
 582
 583     def to_console_title(self, message):
 584         if not self.params.get('consoletitle', False):
 585             return
 586         if compat_os_name == 'nt':
 587             if ctypes.windll.kernel32.GetConsoleWindow():
 588                 # c_wchar_p() might not be necessary if `message` is
 589                 # already of type unicode()
 590                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 591         elif 'TERM' in os.environ:
 592             self._write_string('\033]0;%s\007' % message, self._screen_file)
 593
 594     def save_console_title(self):
 595         if not self.params.get('consoletitle', False):
 596             return
 597         if self.params.get('simulate', False):
 598             return
 599         if compat_os_name != 'nt' and 'TERM' in os.environ:
 600             # Save the title on stack
 601             self._write_string('\033[22;0t', self._screen_file)
 602
 603     def restore_console_title(self):
 604         if not self.params.get('consoletitle', False):
 605             return
 606         if self.params.get('simulate', False):
 607             return
 608         if compat_os_name != 'nt' and 'TERM' in os.environ:
 609             # Restore the title from stack
 610             self._write_string('\033[23;0t', self._screen_file)
 611
 612     def __enter__(self):
 613         self.save_console_title()
 614         return self
 615
 616     def __exit__(self, *args):
 617         self.restore_console_title()
 618
 619         if self.params.get('cookiefile') is not None:
 620             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 621
 622     def trouble(self, message=None, tb=None):
 623         """Determine action to take when a download problem appears.
 624
 625         Depending on if the downloader has been configured to ignore
 626         download errors or not, this method may throw an exception or
 627         not when errors are found, after printing the message.
 628
 629         tb, if given, is additional traceback information.
 630         """
 631         if message is not None:
 632             self.to_stderr(message)
 633         if self.params.get('verbose'):
 634             if tb is None:
 635                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 636                     tb = ''
 637                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 638                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 639                     tb += encode_compat_str(traceback.format_exc())
 640                 else:
 641                     tb_data = traceback.format_list(traceback.extract_stack())
 642                     tb = ''.join(tb_data)
 643             self.to_stderr(tb)
 644         if not self.params.get('ignoreerrors', False):
 645             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 646                 exc_info = sys.exc_info()[1].exc_info
 647             else:
 648                 exc_info = sys.exc_info()
 649             raise DownloadError(message, exc_info)
 650         self._download_retcode = 1
 651
 652     def report_warning(self, message):
 653         '''
 654         Print the message to stderr, it will be prefixed with 'WARNING:'
 655         If stderr is a tty file the 'WARNING:' will be colored
 656         '''
 657         if self.params.get('logger') is not None:
 658             self.params['logger'].warning(message)
 659         else:
 660             if self.params.get('no_warnings'):
 661                 return
 662             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 663                 _msg_header = '\033[0;33mWARNING:\033[0m'
 664             else:
 665                 _msg_header = 'WARNING:'
 666             warning_message = '%s %s' % (_msg_header, message)
 667             self.to_stderr(warning_message)
 668
 669     def report_error(self, message, tb=None):
 670         '''
 671         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 672         in red if stderr is a tty file.
 673         '''
 674         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 675             _msg_header = '\033[0;31mERROR:\033[0m'
 676         else:
 677             _msg_header = 'ERROR:'
 678         error_message = '%s %s' % (_msg_header, message)
 679         self.trouble(error_message, tb)
 680
 681     def report_file_already_downloaded(self, file_name):
 682         """Report file has already been fully downloaded."""
 683         try:
 684             self.to_screen('[download] %s has already been downloaded' % file_name)
 685         except UnicodeEncodeError:
 686             self.to_screen('[download] The file has already been downloaded')
 687
 688     def prepare_filename(self, info_dict):
 689         """Generate the output filename."""
 690         try:
 691             template_dict = dict(info_dict)
 692
 693             template_dict['epoch'] = int(time.time())
 694             autonumber_size = self.params.get('autonumber_size')
 695             if autonumber_size is None:
 696                 autonumber_size = 5
 697             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 698             if template_dict.get('resolution') is None:
 699                 if template_dict.get('width') and template_dict.get('height'):
 700                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 701                 elif template_dict.get('height'):
 702                     template_dict['resolution'] = '%sp' % template_dict['height']
 703                 elif template_dict.get('width'):
 704                     template_dict['resolution'] = '%dx?' % template_dict['width']
 705
 706             sanitize = lambda k, v: sanitize_filename(
 707                 compat_str(v),
 708                 restricted=self.params.get('restrictfilenames'),
 709                 is_id=(k == 'id' or k.endswith('_id')))
 710             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 711                                  for k, v in template_dict.items()
 712                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 713             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 714
 715             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 716
 717             # For fields playlist_index and autonumber convert all occurrences
 718             # of %(field)s to %(field)0Nd for backward compatibility
 719             field_size_compat_map = {
 720                 'playlist_index': len(str(template_dict['n_entries'])),
 721                 'autonumber': autonumber_size,
 722             }
 723             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 724             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 725             if mobj:
 726                 outtmpl = re.sub(
 727                     FIELD_SIZE_COMPAT_RE,
 728                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 729                     outtmpl)
 730
 731             # Missing numeric fields used together with integer presentation types
 732             # in format specification will break the argument substitution since
 733             # string 'NA' is returned for missing fields. We will patch output
 734             # template for missing fields to meet string presentation type.
 735             for numeric_field in self._NUMERIC_FIELDS:
 736                 if numeric_field not in template_dict:
 737                     # As of [1] format syntax is:
 738                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 739                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 740                     FORMAT_RE = r'''(?x)
 741                         (?<!%)
 742                         %
 743                         \({0}\)  # mapping key
 744                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 745                         (?:\d+)?  # minimum field width (optional)
 746                         (?:\.\d+)?  # precision (optional)
 747                         [hlL]?  # length modifier (optional)
 748                         [diouxXeEfFgGcrs%]  # conversion type
 749                     '''
 750                     outtmpl = re.sub(
 751                         FORMAT_RE.format(numeric_field),
 752                         r'%({0})s'.format(numeric_field), outtmpl)
 753
 754             # expand_path translates '%%' into '%' and '$$' into '$'
 755             # correspondingly that is not what we want since we need to keep
 756             # '%%' intact for template dict substitution step. Working around
 757             # with boundary-alike separator hack.
 758             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 759             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 760
 761             # outtmpl should be expand_path'ed before template dict substitution
 762             # because meta fields may contain env variables we don't want to
 763             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 764             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 765             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 766
 767             # https://github.com/blackjack4494/youtube-dlc/issues/85
 768             trim_file_name = self.params.get('trim_file_name', False)
 769             if trim_file_name:
 770                 fn_groups = filename.rsplit('.')
 771                 ext = fn_groups[-1]
 772                 sub_ext = ''
 773                 if len(fn_groups) > 2:
 774                     sub_ext = fn_groups[-2]
 775                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 776
 777             # Temporary fix for #4787
 778             # 'Treat' all problem characters by passing filename through preferredencoding
 779             # to workaround encoding issues with subprocess on python2 @ Windows
 780             if sys.version_info < (3, 0) and sys.platform == 'win32':
 781                 filename = encodeFilename(filename, True).decode(preferredencoding())
 782             return sanitize_path(filename)
 783         except ValueError as err:
 784             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 785             return None
 786
 787     def _match_entry(self, info_dict, incomplete):
 788         """ Returns None if the file should be downloaded """
 789
 790         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 791         if 'title' in info_dict:
 792             # This can happen when we're just evaluating the playlist
 793             title = info_dict['title']
 794             matchtitle = self.params.get('matchtitle', False)
 795             if matchtitle:
 796                 if not re.search(matchtitle, title, re.IGNORECASE):
 797                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 798             rejecttitle = self.params.get('rejecttitle', False)
 799             if rejecttitle:
 800                 if re.search(rejecttitle, title, re.IGNORECASE):
 801                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 802         date = info_dict.get('upload_date')
 803         if date is not None:
 804             dateRange = self.params.get('daterange', DateRange())
 805             if date not in dateRange:
 806                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 807         view_count = info_dict.get('view_count')
 808         if view_count is not None:
 809             min_views = self.params.get('min_views')
 810             if min_views is not None and view_count < min_views:
 811                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 812             max_views = self.params.get('max_views')
 813             if max_views is not None and view_count > max_views:
 814                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 815         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 816             return 'Skipping "%s" because it is age restricted' % video_title
 817         if self.in_download_archive(info_dict):
 818             return '%s has already been recorded in archive' % video_title
 819
 820         if not incomplete:
 821             match_filter = self.params.get('match_filter')
 822             if match_filter is not None:
 823                 ret = match_filter(info_dict)
 824                 if ret is not None:
 825                     return ret
 826
 827         return None
 828
 829     @staticmethod
 830     def add_extra_info(info_dict, extra_info):
 831         '''Set the keys from extra_info in info dict if they are missing'''
 832         for key, value in extra_info.items():
 833             info_dict.setdefault(key, value)
 834
 835     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 836                      process=True, force_generic_extractor=False):
 837         '''
 838         Returns a list with a dictionary for each video we find.
 839         If 'download', also downloads the videos.
 840         extra_info is a dict containing the extra values to add to each result
 841         '''
 842
 843         if not ie_key and force_generic_extractor:
 844             ie_key = 'Generic'
 845
 846         if ie_key:
 847             ies = [self.get_info_extractor(ie_key)]
 848         else:
 849             ies = self._ies
 850
 851         for ie in ies:
 852             if not ie.suitable(url):
 853                 continue
 854
 855             ie_key = ie.ie_key()
 856             ie = self.get_info_extractor(ie_key)
 857             if not ie.working():
 858                 self.report_warning('The program functionality for this site has been marked as broken, '
 859                                     'and will probably not work.')
 860
 861             try:
 862                 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
 863             except (AssertionError, IndexError, AttributeError):
 864                 temp_id = None
 865             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 866                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 867                                ie_key, temp_id))
 868                 break
 869
 870             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 871
 872         else:
 873             self.report_error('no suitable InfoExtractor for URL %s' % url)
 874
 875     def __handle_extraction_exceptions(func):
 876         def wrapper(self, *args, **kwargs):
 877             try:
 878                 return func(self, *args, **kwargs)
 879             except GeoRestrictedError as e:
 880                 msg = e.msg
 881                 if e.countries:
 882                     msg += '\nThis video is available in %s.' % ', '.join(
 883                         map(ISO3166Utils.short2full, e.countries))
 884                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 885                 self.report_error(msg)
 886             except ExtractorError as e:  # An error we somewhat expected
 887                 self.report_error(compat_str(e), e.format_traceback())
 888             except MaxDownloadsReached:
 889                 raise
 890             except Exception as e:
 891                 if self.params.get('ignoreerrors', False):
 892                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 893                 else:
 894                     raise
 895         return wrapper
 896
 897     @__handle_extraction_exceptions
 898     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 899         ie_result = ie.extract(url)
 900         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 901             return
 902         if isinstance(ie_result, list):
 903             # Backwards compatibility: old IE result format
 904             ie_result = {
 905                 '_type': 'compat_list',
 906                 'entries': ie_result,
 907             }
 908         if info_dict:
 909             if info_dict.get('id'):
 910                 ie_result['id'] = info_dict['id']
 911             if info_dict.get('title'):
 912                 ie_result['title'] = info_dict['title']
 913         self.add_default_extra_info(ie_result, ie, url)
 914         if process:
 915             return self.process_ie_result(ie_result, download, extra_info)
 916         else:
 917             return ie_result
 918
 919     def add_default_extra_info(self, ie_result, ie, url):
 920         self.add_extra_info(ie_result, {
 921             'extractor': ie.IE_NAME,
 922             'webpage_url': url,
 923             'duration_string': (
 924                 formatSeconds(ie_result['duration'], '-')
 925                 if ie_result.get('duration', None) is not None
 926                 else None),
 927             'webpage_url_basename': url_basename(url),
 928             'extractor_key': ie.ie_key(),
 929         })
 930
 931     def process_ie_result(self, ie_result, download=True, extra_info={}):
 932         """
 933         Take the result of the ie(may be modified) and resolve all unresolved
 934         references (URLs, playlist items).
 935
 936         It will also download the videos if 'download'.
 937         Returns the resolved ie_result.
 938         """
 939         result_type = ie_result.get('_type', 'video')
 940
 941         if result_type in ('url', 'url_transparent'):
 942             ie_result['url'] = sanitize_url(ie_result['url'])
 943             extract_flat = self.params.get('extract_flat', False)
 944             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 945                     or extract_flat is True):
 946                 self.__forced_printings(
 947                     ie_result, self.prepare_filename(ie_result),
 948                     incomplete=True)
 949                 return ie_result
 950
 951         if result_type == 'video':
 952             self.add_extra_info(ie_result, extra_info)
 953             return self.process_video_result(ie_result, download=download)
 954         elif result_type == 'url':
 955             # We have to add extra_info to the results because it may be
 956             # contained in a playlist
 957             return self.extract_info(ie_result['url'],
 958                                      download, info_dict=ie_result,
 959                                      ie_key=ie_result.get('ie_key'),
 960                                      extra_info=extra_info)
 961         elif result_type == 'url_transparent':
 962             # Use the information from the embedding page
 963             info = self.extract_info(
 964                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 965                 extra_info=extra_info, download=False, process=False)
 966
 967             # extract_info may return None when ignoreerrors is enabled and
 968             # extraction failed with an error, don't crash and return early
 969             # in this case
 970             if not info:
 971                 return info
 972
 973             force_properties = dict(
 974                 (k, v) for k, v in ie_result.items() if v is not None)
 975             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 976                 if f in force_properties:
 977                     del force_properties[f]
 978             new_result = info.copy()
 979             new_result.update(force_properties)
 980
 981             # Extracted info may not be a video result (i.e.
 982             # info.get('_type', 'video') != video) but rather an url or
 983             # url_transparent. In such cases outer metadata (from ie_result)
 984             # should be propagated to inner one (info). For this to happen
 985             # _type of info should be overridden with url_transparent. This
 986             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 987             if new_result.get('_type') == 'url':
 988                 new_result['_type'] = 'url_transparent'
 989
 990             return self.process_ie_result(
 991                 new_result, download=download, extra_info=extra_info)
 992         elif result_type in ('playlist', 'multi_video'):
 993             # We process each entry in the playlist
 994             playlist = ie_result.get('title') or ie_result.get('id')
 995             self.to_screen('[download] Downloading playlist: %s' % playlist)
 996
 997             playlist_results = []
 998
 999             playliststart = self.params.get('playliststart', 1) - 1
1000             playlistend = self.params.get('playlistend')
1001             # For backwards compatibility, interpret -1 as whole list
1002             if playlistend == -1:
1003                 playlistend = None
1004
1005             playlistitems_str = self.params.get('playlist_items')
1006             playlistitems = None
1007             if playlistitems_str is not None:
1008                 def iter_playlistitems(format):
1009                     for string_segment in format.split(','):
1010                         if '-' in string_segment:
1011                             start, end = string_segment.split('-')
1012                             for item in range(int(start), int(end) + 1):
1013                                 yield int(item)
1014                         else:
1015                             yield int(string_segment)
1016                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1017
1018             ie_entries = ie_result['entries']
1019
1020             def make_playlistitems_entries(list_ie_entries):
1021                 num_entries = len(list_ie_entries)
1022                 return [
1023                     list_ie_entries[i - 1] for i in playlistitems
1024                     if -num_entries <= i - 1 < num_entries]
1025
1026             def report_download(num_entries):
1027                 self.to_screen(
1028                     '[%s] playlist %s: Downloading %d videos' %
1029                     (ie_result['extractor'], playlist, num_entries))
1030
1031             if isinstance(ie_entries, list):
1032                 n_all_entries = len(ie_entries)
1033                 if playlistitems:
1034                     entries = make_playlistitems_entries(ie_entries)
1035                 else:
1036                     entries = ie_entries[playliststart:playlistend]
1037                 n_entries = len(entries)
1038                 self.to_screen(
1039                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1040                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
1041             elif isinstance(ie_entries, PagedList):
1042                 if playlistitems:
1043                     entries = []
1044                     for item in playlistitems:
1045                         entries.extend(ie_entries.getslice(
1046                             item - 1, item
1047                         ))
1048                 else:
1049                     entries = ie_entries.getslice(
1050                         playliststart, playlistend)
1051                 n_entries = len(entries)
1052                 report_download(n_entries)
1053             else:  # iterable
1054                 if playlistitems:
1055                     entries = make_playlistitems_entries(list(itertools.islice(
1056                         ie_entries, 0, max(playlistitems))))
1057                 else:
1058                     entries = list(itertools.islice(
1059                         ie_entries, playliststart, playlistend))
1060                 n_entries = len(entries)
1061                 report_download(n_entries)
1062
1063             if self.params.get('playlistreverse', False):
1064                 entries = entries[::-1]
1065
1066             if self.params.get('playlistrandom', False):
1067                 random.shuffle(entries)
1068
1069             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1070
1071             for i, entry in enumerate(entries, 1):
1072                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1073                 # This __x_forwarded_for_ip thing is a bit ugly but requires
1074                 # minimal changes
1075                 if x_forwarded_for:
1076                     entry['__x_forwarded_for_ip'] = x_forwarded_for
1077                 extra = {
1078                     'n_entries': n_entries,
1079                     'playlist': playlist,
1080                     'playlist_id': ie_result.get('id'),
1081                     'playlist_title': ie_result.get('title'),
1082                     'playlist_uploader': ie_result.get('uploader'),
1083                     'playlist_uploader_id': ie_result.get('uploader_id'),
1084                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1085                     'extractor': ie_result['extractor'],
1086                     'webpage_url': ie_result['webpage_url'],
1087                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1088                     'extractor_key': ie_result['extractor_key'],
1089                 }
1090
1091                 reason = self._match_entry(entry, incomplete=True)
1092                 if reason is not None:
1093                     if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1094                         print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1095                         break
1096                     else:
1097                         self.to_screen('[download] ' + reason)
1098                         continue
1099
1100                 entry_result = self.__process_iterable_entry(entry, download, extra)
1101                 # TODO: skip failed (empty) entries?
1102                 playlist_results.append(entry_result)
1103             ie_result['entries'] = playlist_results
1104             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1105             return ie_result
1106         elif result_type == 'compat_list':
1107             self.report_warning(
1108                 'Extractor %s returned a compat_list result. '
1109                 'It needs to be updated.' % ie_result.get('extractor'))
1110
1111             def _fixup(r):
1112                 self.add_extra_info(
1113                     r,
1114                     {
1115                         'extractor': ie_result['extractor'],
1116                         'webpage_url': ie_result['webpage_url'],
1117                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1118                         'extractor_key': ie_result['extractor_key'],
1119                     }
1120                 )
1121                 return r
1122             ie_result['entries'] = [
1123                 self.process_ie_result(_fixup(r), download, extra_info)
1124                 for r in ie_result['entries']
1125             ]
1126             return ie_result
1127         else:
1128             raise Exception('Invalid result type: %s' % result_type)
1129
1130     @__handle_extraction_exceptions
1131     def __process_iterable_entry(self, entry, download, extra_info):
1132         return self.process_ie_result(
1133             entry, download=download, extra_info=extra_info)
1134
1135     def _build_format_filter(self, filter_spec):
1136         " Returns a function to filter the formats according to the filter_spec "
1137
1138         OPERATORS = {
1139             '<': operator.lt,
1140             '<=': operator.le,
1141             '>': operator.gt,
1142             '>=': operator.ge,
1143             '=': operator.eq,
1144             '!=': operator.ne,
1145         }
1146         operator_rex = re.compile(r'''(?x)\s*
1147             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1148             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1149             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1150             $
1151             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1152         m = operator_rex.search(filter_spec)
1153         if m:
1154             try:
1155                 comparison_value = int(m.group('value'))
1156             except ValueError:
1157                 comparison_value = parse_filesize(m.group('value'))
1158                 if comparison_value is None:
1159                     comparison_value = parse_filesize(m.group('value') + 'B')
1160                 if comparison_value is None:
1161                     raise ValueError(
1162                         'Invalid value %r in format specification %r' % (
1163                             m.group('value'), filter_spec))
1164             op = OPERATORS[m.group('op')]
1165
1166         if not m:
1167             STR_OPERATORS = {
1168                 '=': operator.eq,
1169                 '^=': lambda attr, value: attr.startswith(value),
1170                 '$=': lambda attr, value: attr.endswith(value),
1171                 '*=': lambda attr, value: value in attr,
1172             }
1173             str_operator_rex = re.compile(r'''(?x)
1174                 \s*(?P<key>[a-zA-Z0-9._-]+)
1175                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1176                 \s*(?P<value>[a-zA-Z0-9._-]+)
1177                 \s*$
1178                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1179             m = str_operator_rex.search(filter_spec)
1180             if m:
1181                 comparison_value = m.group('value')
1182                 str_op = STR_OPERATORS[m.group('op')]
1183                 if m.group('negation'):
1184                     op = lambda attr, value: not str_op(attr, value)
1185                 else:
1186                     op = str_op
1187
1188         if not m:
1189             raise ValueError('Invalid filter specification %r' % filter_spec)
1190
1191         def _filter(f):
1192             actual_value = f.get(m.group('key'))
1193             if actual_value is None:
1194                 return m.group('none_inclusive')
1195             return op(actual_value, comparison_value)
1196         return _filter
1197
1198     def _default_format_spec(self, info_dict, download=True):
1199
1200         def can_merge():
1201             merger = FFmpegMergerPP(self)
1202             return merger.available and merger.can_merge()
1203
1204         prefer_best = (
1205             not self.params.get('simulate', False)
1206             and download
1207             and (
1208                 not can_merge()
1209                 or info_dict.get('is_live', False)
1210                 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1211
1212         return (
1213             'best/bestvideo+bestaudio'
1214             if prefer_best
1215             else 'bestvideo*+bestaudio/best'
1216             if not self.params.get('allow_multiple_audio_streams', False)
1217             else 'bestvideo+bestaudio/best')
1218
1219     def build_format_selector(self, format_spec):
1220         def syntax_error(note, start):
1221             message = (
1222                 'Invalid format specification: '
1223                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1224             return SyntaxError(message)
1225
1226         PICKFIRST = 'PICKFIRST'
1227         MERGE = 'MERGE'
1228         SINGLE = 'SINGLE'
1229         GROUP = 'GROUP'
1230         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1231
1232         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1233                                   'video': self.params.get('allow_multiple_video_streams', False)}
1234
1235         def _parse_filter(tokens):
1236             filter_parts = []
1237             for type, string, start, _, _ in tokens:
1238                 if type == tokenize.OP and string == ']':
1239                     return ''.join(filter_parts)
1240                 else:
1241                     filter_parts.append(string)
1242
1243         def _remove_unused_ops(tokens):
1244             # Remove operators that we don't use and join them with the surrounding strings
1245             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1246             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1247             last_string, last_start, last_end, last_line = None, None, None, None
1248             for type, string, start, end, line in tokens:
1249                 if type == tokenize.OP and string == '[':
1250                     if last_string:
1251                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1252                         last_string = None
1253                     yield type, string, start, end, line
1254                     # everything inside brackets will be handled by _parse_filter
1255                     for type, string, start, end, line in tokens:
1256                         yield type, string, start, end, line
1257                         if type == tokenize.OP and string == ']':
1258                             break
1259                 elif type == tokenize.OP and string in ALLOWED_OPS:
1260                     if last_string:
1261                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1262                         last_string = None
1263                     yield type, string, start, end, line
1264                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1265                     if not last_string:
1266                         last_string = string
1267                         last_start = start
1268                         last_end = end
1269                     else:
1270                         last_string += string
1271             if last_string:
1272                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1273
1274         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1275             selectors = []
1276             current_selector = None
1277             for type, string, start, _, _ in tokens:
1278                 # ENCODING is only defined in python 3.x
1279                 if type == getattr(tokenize, 'ENCODING', None):
1280                     continue
1281                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1282                     current_selector = FormatSelector(SINGLE, string, [])
1283                 elif type == tokenize.OP:
1284                     if string == ')':
1285                         if not inside_group:
1286                             # ')' will be handled by the parentheses group
1287                             tokens.restore_last_token()
1288                         break
1289                     elif inside_merge and string in ['/', ',']:
1290                         tokens.restore_last_token()
1291                         break
1292                     elif inside_choice and string == ',':
1293                         tokens.restore_last_token()
1294                         break
1295                     elif string == ',':
1296                         if not current_selector:
1297                             raise syntax_error('"," must follow a format selector', start)
1298                         selectors.append(current_selector)
1299                         current_selector = None
1300                     elif string == '/':
1301                         if not current_selector:
1302                             raise syntax_error('"/" must follow a format selector', start)
1303                         first_choice = current_selector
1304                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1305                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1306                     elif string == '[':
1307                         if not current_selector:
1308                             current_selector = FormatSelector(SINGLE, 'best', [])
1309                         format_filter = _parse_filter(tokens)
1310                         current_selector.filters.append(format_filter)
1311                     elif string == '(':
1312                         if current_selector:
1313                             raise syntax_error('Unexpected "("', start)
1314                         group = _parse_format_selection(tokens, inside_group=True)
1315                         current_selector = FormatSelector(GROUP, group, [])
1316                     elif string == '+':
1317                         if not current_selector:
1318                             raise syntax_error('Unexpected "+"', start)
1319                         selector_1 = current_selector
1320                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1321                         if not selector_2:
1322                             raise syntax_error('Expected a selector', start)
1323                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1324                     else:
1325                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1326                 elif type == tokenize.ENDMARKER:
1327                     break
1328             if current_selector:
1329                 selectors.append(current_selector)
1330             return selectors
1331
1332         def _build_selector_function(selector):
1333             if isinstance(selector, list):  # ,
1334                 fs = [_build_selector_function(s) for s in selector]
1335
1336                 def selector_function(ctx):
1337                     for f in fs:
1338                         for format in f(ctx):
1339                             yield format
1340                 return selector_function
1341
1342             elif selector.type == GROUP:  # ()
1343                 selector_function = _build_selector_function(selector.selector)
1344
1345             elif selector.type == PICKFIRST:  # /
1346                 fs = [_build_selector_function(s) for s in selector.selector]
1347
1348                 def selector_function(ctx):
1349                     for f in fs:
1350                         picked_formats = list(f(ctx))
1351                         if picked_formats:
1352                             return picked_formats
1353                     return []
1354
1355             elif selector.type == SINGLE:  # atom
1356                 format_spec = selector.selector if selector.selector is not None else 'best'
1357
1358                 if format_spec == 'all':
1359                     def selector_function(ctx):
1360                         formats = list(ctx['formats'])
1361                         if formats:
1362                             for f in formats:
1363                                 yield f
1364
1365                 else:
1366                     format_fallback = False
1367                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1368                     if format_spec_obj is not None:
1369                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1370                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1371                         not_format_type = 'v' if format_type == 'a' else 'a'
1372                         format_modified = format_spec_obj.group(3) is not None
1373
1374                         format_fallback = not format_type and not format_modified  # for b, w
1375                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1376                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1377                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1378                                     if format_type  # bv, ba, wv, wa
1379                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1380                                     if not format_modified  # b, w
1381                                     else None)  # b*, w*
1382                     else:
1383                         format_idx = -1
1384                         filter_f = ((lambda f: f.get('ext') == format_spec)
1385                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1386                                     else (lambda f: f.get('format_id') == format_spec))  # id
1387
1388                     def selector_function(ctx):
1389                         formats = list(ctx['formats'])
1390                         if not formats:
1391                             return
1392                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1393                         if matches:
1394                             yield matches[format_idx]
1395                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1396                             # for extractors with incomplete formats (audio only (soundcloud)
1397                             # or video only (imgur)) best/worst will fallback to
1398                             # best/worst {video,audio}-only format
1399                             yield formats[format_idx]
1400
1401             elif selector.type == MERGE:        # +
1402                 def _merge(formats_pair):
1403                     format_1, format_2 = formats_pair
1404
1405                     formats_info = []
1406                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1407                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1408
1409                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1410                         get_no_more = {"video": False, "audio": False}
1411                         for (i, fmt_info) in enumerate(formats_info):
1412                             for aud_vid in ["audio", "video"]:
1413                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1414                                     if get_no_more[aud_vid]:
1415                                         formats_info.pop(i)
1416                                     get_no_more[aud_vid] = True
1417
1418                     if len(formats_info) == 1:
1419                         return formats_info[0]
1420
1421                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1422                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1423
1424                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1425                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1426
1427                     output_ext = self.params.get('merge_output_format')
1428                     if not output_ext:
1429                         if the_only_video:
1430                             output_ext = the_only_video['ext']
1431                         elif the_only_audio and not video_fmts:
1432                             output_ext = the_only_audio['ext']
1433                         else:
1434                             output_ext = 'mkv'
1435
1436                     new_dict = {
1437                         'requested_formats': formats_info,
1438                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1439                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1440                         'ext': output_ext,
1441                     }
1442
1443                     if the_only_video:
1444                         new_dict.update({
1445                             'width': the_only_video.get('width'),
1446                             'height': the_only_video.get('height'),
1447                             'resolution': the_only_video.get('resolution'),
1448                             'fps': the_only_video.get('fps'),
1449                             'vcodec': the_only_video.get('vcodec'),
1450                             'vbr': the_only_video.get('vbr'),
1451                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1452                         })
1453
1454                     if the_only_audio:
1455                         new_dict.update({
1456                             'acodec': the_only_audio.get('acodec'),
1457                             'abr': the_only_audio.get('abr'),
1458                         })
1459
1460                     return new_dict
1461
1462                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1463
1464                 def selector_function(ctx):
1465                     for pair in itertools.product(
1466                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1467                         yield _merge(pair)
1468
1469             filters = [self._build_format_filter(f) for f in selector.filters]
1470
1471             def final_selector(ctx):
1472                 ctx_copy = copy.deepcopy(ctx)
1473                 for _filter in filters:
1474                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1475                 return selector_function(ctx_copy)
1476             return final_selector
1477
1478         stream = io.BytesIO(format_spec.encode('utf-8'))
1479         try:
1480             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1481         except tokenize.TokenError:
1482             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1483
1484         class TokenIterator(object):
1485             def __init__(self, tokens):
1486                 self.tokens = tokens
1487                 self.counter = 0
1488
1489             def __iter__(self):
1490                 return self
1491
1492             def __next__(self):
1493                 if self.counter >= len(self.tokens):
1494                     raise StopIteration()
1495                 value = self.tokens[self.counter]
1496                 self.counter += 1
1497                 return value
1498
1499             next = __next__
1500
1501             def restore_last_token(self):
1502                 self.counter -= 1
1503
1504         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1505         return _build_selector_function(parsed_selector)
1506
1507     def _calc_headers(self, info_dict):
1508         res = std_headers.copy()
1509
1510         add_headers = info_dict.get('http_headers')
1511         if add_headers:
1512             res.update(add_headers)
1513
1514         cookies = self._calc_cookies(info_dict)
1515         if cookies:
1516             res['Cookie'] = cookies
1517
1518         if 'X-Forwarded-For' not in res:
1519             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1520             if x_forwarded_for_ip:
1521                 res['X-Forwarded-For'] = x_forwarded_for_ip
1522
1523         return res
1524
1525     def _calc_cookies(self, info_dict):
1526         pr = sanitized_Request(info_dict['url'])
1527         self.cookiejar.add_cookie_header(pr)
1528         return pr.get_header('Cookie')
1529
1530     def process_video_result(self, info_dict, download=True):
1531         assert info_dict.get('_type', 'video') == 'video'
1532
1533         if 'id' not in info_dict:
1534             raise ExtractorError('Missing "id" field in extractor result')
1535         if 'title' not in info_dict:
1536             raise ExtractorError('Missing "title" field in extractor result')
1537
1538         def report_force_conversion(field, field_not, conversion):
1539             self.report_warning(
1540                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1541                 % (field, field_not, conversion))
1542
1543         def sanitize_string_field(info, string_field):
1544             field = info.get(string_field)
1545             if field is None or isinstance(field, compat_str):
1546                 return
1547             report_force_conversion(string_field, 'a string', 'string')
1548             info[string_field] = compat_str(field)
1549
1550         def sanitize_numeric_fields(info):
1551             for numeric_field in self._NUMERIC_FIELDS:
1552                 field = info.get(numeric_field)
1553                 if field is None or isinstance(field, compat_numeric_types):
1554                     continue
1555                 report_force_conversion(numeric_field, 'numeric', 'int')
1556                 info[numeric_field] = int_or_none(field)
1557
1558         sanitize_string_field(info_dict, 'id')
1559         sanitize_numeric_fields(info_dict)
1560
1561         if 'playlist' not in info_dict:
1562             # It isn't part of a playlist
1563             info_dict['playlist'] = None
1564             info_dict['playlist_index'] = None
1565
1566         thumbnails = info_dict.get('thumbnails')
1567         if thumbnails is None:
1568             thumbnail = info_dict.get('thumbnail')
1569             if thumbnail:
1570                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1571         if thumbnails:
1572             thumbnails.sort(key=lambda t: (
1573                 t.get('preference') if t.get('preference') is not None else -1,
1574                 t.get('width') if t.get('width') is not None else -1,
1575                 t.get('height') if t.get('height') is not None else -1,
1576                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1577             for i, t in enumerate(thumbnails):
1578                 t['url'] = sanitize_url(t['url'])
1579                 if t.get('width') and t.get('height'):
1580                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1581                 if t.get('id') is None:
1582                     t['id'] = '%d' % i
1583
1584         if self.params.get('list_thumbnails'):
1585             self.list_thumbnails(info_dict)
1586             return
1587
1588         thumbnail = info_dict.get('thumbnail')
1589         if thumbnail:
1590             info_dict['thumbnail'] = sanitize_url(thumbnail)
1591         elif thumbnails:
1592             info_dict['thumbnail'] = thumbnails[-1]['url']
1593
1594         if 'display_id' not in info_dict and 'id' in info_dict:
1595             info_dict['display_id'] = info_dict['id']
1596
1597         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1598             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1599             # see http://bugs.python.org/issue1646728)
1600             try:
1601                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1602                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1603             except (ValueError, OverflowError, OSError):
1604                 pass
1605
1606         # Auto generate title fields corresponding to the *_number fields when missing
1607         # in order to always have clean titles. This is very common for TV series.
1608         for field in ('chapter', 'season', 'episode'):
1609             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1610                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1611
1612         for cc_kind in ('subtitles', 'automatic_captions'):
1613             cc = info_dict.get(cc_kind)
1614             if cc:
1615                 for _, subtitle in cc.items():
1616                     for subtitle_format in subtitle:
1617                         if subtitle_format.get('url'):
1618                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1619                         if subtitle_format.get('ext') is None:
1620                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1621
1622         automatic_captions = info_dict.get('automatic_captions')
1623         subtitles = info_dict.get('subtitles')
1624
1625         if self.params.get('listsubtitles', False):
1626             if 'automatic_captions' in info_dict:
1627                 self.list_subtitles(
1628                     info_dict['id'], automatic_captions, 'automatic captions')
1629             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1630             return
1631
1632         info_dict['requested_subtitles'] = self.process_subtitles(
1633             info_dict['id'], subtitles, automatic_captions)
1634
1635         # We now pick which formats have to be downloaded
1636         if info_dict.get('formats') is None:
1637             # There's only one format available
1638             formats = [info_dict]
1639         else:
1640             formats = info_dict['formats']
1641
1642         if not formats:
1643             raise ExtractorError('No video formats found!')
1644
1645         def is_wellformed(f):
1646             url = f.get('url')
1647             if not url:
1648                 self.report_warning(
1649                     '"url" field is missing or empty - skipping format, '
1650                     'there is an error in extractor')
1651                 return False
1652             if isinstance(url, bytes):
1653                 sanitize_string_field(f, 'url')
1654             return True
1655
1656         # Filter out malformed formats for better extraction robustness
1657         formats = list(filter(is_wellformed, formats))
1658
1659         formats_dict = {}
1660
1661         # We check that all the formats have the format and format_id fields
1662         for i, format in enumerate(formats):
1663             sanitize_string_field(format, 'format_id')
1664             sanitize_numeric_fields(format)
1665             format['url'] = sanitize_url(format['url'])
1666             if not format.get('format_id'):
1667                 format['format_id'] = compat_str(i)
1668             else:
1669                 # Sanitize format_id from characters used in format selector expression
1670                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1671             format_id = format['format_id']
1672             if format_id not in formats_dict:
1673                 formats_dict[format_id] = []
1674             formats_dict[format_id].append(format)
1675
1676         # Make sure all formats have unique format_id
1677         for format_id, ambiguous_formats in formats_dict.items():
1678             if len(ambiguous_formats) > 1:
1679                 for i, format in enumerate(ambiguous_formats):
1680                     format['format_id'] = '%s-%d' % (format_id, i)
1681
1682         for i, format in enumerate(formats):
1683             if format.get('format') is None:
1684                 format['format'] = '{id} - {res}{note}'.format(
1685                     id=format['format_id'],
1686                     res=self.format_resolution(format),
1687                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1688                 )
1689             # Automatically determine file extension if missing
1690             if format.get('ext') is None:
1691                 format['ext'] = determine_ext(format['url']).lower()
1692             # Automatically determine protocol if missing (useful for format
1693             # selection purposes)
1694             if format.get('protocol') is None:
1695                 format['protocol'] = determine_protocol(format)
1696             # Add HTTP headers, so that external programs can use them from the
1697             # json output
1698             full_format_info = info_dict.copy()
1699             full_format_info.update(format)
1700             format['http_headers'] = self._calc_headers(full_format_info)
1701         # Remove private housekeeping stuff
1702         if '__x_forwarded_for_ip' in info_dict:
1703             del info_dict['__x_forwarded_for_ip']
1704
1705         # TODO Central sorting goes here
1706
1707         if formats[0] is not info_dict:
1708             # only set the 'formats' fields if the original info_dict list them
1709             # otherwise we end up with a circular reference, the first (and unique)
1710             # element in the 'formats' field in info_dict is info_dict itself,
1711             # which can't be exported to json
1712             info_dict['formats'] = formats
1713         if self.params.get('listformats'):
1714             self.list_formats(info_dict)
1715             return
1716
1717         req_format = self.params.get('format')
1718         if req_format is None:
1719             req_format = self._default_format_spec(info_dict, download=download)
1720             if self.params.get('verbose'):
1721                 self._write_string('[debug] Default format spec: %s\n' % req_format)
1722
1723         format_selector = self.build_format_selector(req_format)
1724
1725         # While in format selection we may need to have an access to the original
1726         # format set in order to calculate some metrics or do some processing.
1727         # For now we need to be able to guess whether original formats provided
1728         # by extractor are incomplete or not (i.e. whether extractor provides only
1729         # video-only or audio-only formats) for proper formats selection for
1730         # extractors with such incomplete formats (see
1731         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1732         # Since formats may be filtered during format selection and may not match
1733         # the original formats the results may be incorrect. Thus original formats
1734         # or pre-calculated metrics should be passed to format selection routines
1735         # as well.
1736         # We will pass a context object containing all necessary additional data
1737         # instead of just formats.
1738         # This fixes incorrect format selection issue (see
1739         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1740         incomplete_formats = (
1741             # All formats are video-only or
1742             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1743             # all formats are audio-only
1744             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1745
1746         ctx = {
1747             'formats': formats,
1748             'incomplete_formats': incomplete_formats,
1749         }
1750
1751         formats_to_download = list(format_selector(ctx))
1752         if not formats_to_download:
1753             raise ExtractorError('requested format not available',
1754                                  expected=True)
1755
1756         if download:
1757             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1758             if len(formats_to_download) > 1:
1759                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1760             for format in formats_to_download:
1761                 new_info = dict(info_dict)
1762                 new_info.update(format)
1763                 self.process_info(new_info)
1764         # We update the info dict with the best quality format (backwards compatibility)
1765         info_dict.update(formats_to_download[-1])
1766         return info_dict
1767
1768     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1769         """Select the requested subtitles and their format"""
1770         available_subs = {}
1771         if normal_subtitles and self.params.get('writesubtitles'):
1772             available_subs.update(normal_subtitles)
1773         if automatic_captions and self.params.get('writeautomaticsub'):
1774             for lang, cap_info in automatic_captions.items():
1775                 if lang not in available_subs:
1776                     available_subs[lang] = cap_info
1777
1778         if (not self.params.get('writesubtitles') and not
1779                 self.params.get('writeautomaticsub') or not
1780                 available_subs):
1781             return None
1782
1783         if self.params.get('allsubtitles', False):
1784             requested_langs = available_subs.keys()
1785         else:
1786             if self.params.get('subtitleslangs', False):
1787                 requested_langs = self.params.get('subtitleslangs')
1788             elif 'en' in available_subs:
1789                 requested_langs = ['en']
1790             else:
1791                 requested_langs = [list(available_subs.keys())[0]]
1792
1793         formats_query = self.params.get('subtitlesformat', 'best')
1794         formats_preference = formats_query.split('/') if formats_query else []
1795         subs = {}
1796         for lang in requested_langs:
1797             formats = available_subs.get(lang)
1798             if formats is None:
1799                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1800                 continue
1801             for ext in formats_preference:
1802                 if ext == 'best':
1803                     f = formats[-1]
1804                     break
1805                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1806                 if matches:
1807                     f = matches[-1]
1808                     break
1809             else:
1810                 f = formats[-1]
1811                 self.report_warning(
1812                     'No subtitle format found matching "%s" for language %s, '
1813                     'using %s' % (formats_query, lang, f['ext']))
1814             subs[lang] = f
1815         return subs
1816
1817     def __forced_printings(self, info_dict, filename, incomplete):
1818         def print_mandatory(field):
1819             if (self.params.get('force%s' % field, False)
1820                     and (not incomplete or info_dict.get(field) is not None)):
1821                 self.to_stdout(info_dict[field])
1822
1823         def print_optional(field):
1824             if (self.params.get('force%s' % field, False)
1825                     and info_dict.get(field) is not None):
1826                 self.to_stdout(info_dict[field])
1827
1828         print_mandatory('title')
1829         print_mandatory('id')
1830         if self.params.get('forceurl', False) and not incomplete:
1831             if info_dict.get('requested_formats') is not None:
1832                 for f in info_dict['requested_formats']:
1833                     self.to_stdout(f['url'] + f.get('play_path', ''))
1834             else:
1835                 # For RTMP URLs, also include the playpath
1836                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1837         print_optional('thumbnail')
1838         print_optional('description')
1839         if self.params.get('forcefilename', False) and filename is not None:
1840             self.to_stdout(filename)
1841         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1842             self.to_stdout(formatSeconds(info_dict['duration']))
1843         print_mandatory('format')
1844         if self.params.get('forcejson', False):
1845             self.to_stdout(json.dumps(info_dict))
1846
1847     def process_info(self, info_dict):
1848         """Process a single resolved IE result."""
1849
1850         assert info_dict.get('_type', 'video') == 'video'
1851
1852         max_downloads = self.params.get('max_downloads')
1853         if max_downloads is not None:
1854             if self._num_downloads >= int(max_downloads):
1855                 raise MaxDownloadsReached()
1856
1857         # TODO: backward compatibility, to be removed
1858         info_dict['fulltitle'] = info_dict['title']
1859
1860         if 'format' not in info_dict:
1861             info_dict['format'] = info_dict['ext']
1862
1863         reason = self._match_entry(info_dict, incomplete=False)
1864         if reason is not None:
1865             self.to_screen('[download] ' + reason)
1866             return
1867
1868         self._num_downloads += 1
1869
1870         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1871
1872         # Forced printings
1873         self.__forced_printings(info_dict, filename, incomplete=False)
1874
1875         if self.params.get('simulate', False):
1876             if self.params.get('force_write_download_archive', False):
1877                 self.record_download_archive(info_dict)
1878
1879             # Do nothing else if in simulate mode
1880             return
1881
1882         if filename is None:
1883             return
1884
1885         def ensure_dir_exists(path):
1886             try:
1887                 dn = os.path.dirname(path)
1888                 if dn and not os.path.exists(dn):
1889                     os.makedirs(dn)
1890                 return True
1891             except (OSError, IOError) as err:
1892                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1893                 return False
1894
1895         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1896             return
1897
1898         if self.params.get('writedescription', False):
1899             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1900             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1901                 self.to_screen('[info] Video description is already present')
1902             elif info_dict.get('description') is None:
1903                 self.report_warning('There\'s no description to write.')
1904             else:
1905                 try:
1906                     self.to_screen('[info] Writing video description to: ' + descfn)
1907                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1908                         descfile.write(info_dict['description'])
1909                 except (OSError, IOError):
1910                     self.report_error('Cannot write description file ' + descfn)
1911                     return
1912
1913         if self.params.get('writeannotations', False):
1914             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1915             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1916                 self.to_screen('[info] Video annotations are already present')
1917             elif not info_dict.get('annotations'):
1918                 self.report_warning('There are no annotations to write.')
1919             else:
1920                 try:
1921                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1922                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1923                         annofile.write(info_dict['annotations'])
1924                 except (KeyError, TypeError):
1925                     self.report_warning('There are no annotations to write.')
1926                 except (OSError, IOError):
1927                     self.report_error('Cannot write annotations file: ' + annofn)
1928                     return
1929
1930         def dl(name, info, subtitle=False):
1931             fd = get_suitable_downloader(info, self.params)(self, self.params)
1932             for ph in self._progress_hooks:
1933                 fd.add_progress_hook(ph)
1934             if self.params.get('verbose'):
1935                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1936             return fd.download(name, info, subtitle)
1937
1938         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1939                                        self.params.get('writeautomaticsub')])
1940
1941         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1942             # subtitles download errors are already managed as troubles in relevant IE
1943             # that way it will silently go on when used with unsupporting IE
1944             subtitles = info_dict['requested_subtitles']
1945             # ie = self.get_info_extractor(info_dict['extractor_key'])
1946             for sub_lang, sub_info in subtitles.items():
1947                 sub_format = sub_info['ext']
1948                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1949                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1950                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1951                 else:
1952                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1953                     if sub_info.get('data') is not None:
1954                         try:
1955                             # Use newline='' to prevent conversion of newline characters
1956                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1957                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1958                                 subfile.write(sub_info['data'])
1959                         except (OSError, IOError):
1960                             self.report_error('Cannot write subtitles file ' + sub_filename)
1961                             return
1962                     else:
1963                         try:
1964                             dl(sub_filename, sub_info, subtitle=True)
1965                             '''
1966                             if self.params.get('sleep_interval_subtitles', False):
1967                                 dl(sub_filename, sub_info)
1968                             else:
1969                                 sub_data = ie._request_webpage(
1970                                     sub_info['url'], info_dict['id'], note=False).read()
1971                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1972                                     subfile.write(sub_data)
1973                             '''
1974                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1975                             self.report_warning('Unable to download subtitle for "%s": %s' %
1976                                                 (sub_lang, error_to_compat_str(err)))
1977                             continue
1978
1979         if self.params.get('skip_download', False):
1980             if self.params.get('convertsubtitles', False):
1981                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1982                 filename_real_ext = os.path.splitext(filename)[1][1:]
1983                 filename_wo_ext = (
1984                     os.path.splitext(filename)[0]
1985                     if filename_real_ext == info_dict['ext']
1986                     else filename)
1987                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1988                 if subconv.available:
1989                     info_dict.setdefault('__postprocessors', [])
1990                     # info_dict['__postprocessors'].append(subconv)
1991                 if os.path.exists(encodeFilename(afilename)):
1992                     self.to_screen(
1993                         '[download] %s has already been downloaded and '
1994                         'converted' % afilename)
1995                 else:
1996                     try:
1997                         self.post_process(filename, info_dict)
1998                     except (PostProcessingError) as err:
1999                         self.report_error('postprocessing: %s' % str(err))
2000                         return
2001
2002         if self.params.get('writeinfojson', False):
2003             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
2004             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
2005                 self.to_screen('[info] Video description metadata is already present')
2006             else:
2007                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
2008                 try:
2009                     write_json_file(self.filter_requested_info(info_dict), infofn)
2010                 except (OSError, IOError):
2011                     self.report_error('Cannot write metadata to JSON file ' + infofn)
2012                     return
2013
2014         self._write_thumbnails(info_dict, filename)
2015
2016         # Write internet shortcut files
2017         url_link = webloc_link = desktop_link = False
2018         if self.params.get('writelink', False):
2019             if sys.platform == "darwin":  # macOS.
2020                 webloc_link = True
2021             elif sys.platform.startswith("linux"):
2022                 desktop_link = True
2023             else:  # if sys.platform in ['win32', 'cygwin']:
2024                 url_link = True
2025         if self.params.get('writeurllink', False):
2026             url_link = True
2027         if self.params.get('writewebloclink', False):
2028             webloc_link = True
2029         if self.params.get('writedesktoplink', False):
2030             desktop_link = True
2031
2032         if url_link or webloc_link or desktop_link:
2033             if 'webpage_url' not in info_dict:
2034                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2035                 return
2036             ascii_url = iri_to_uri(info_dict['webpage_url'])
2037
2038         def _write_link_file(extension, template, newline, embed_filename):
2039             linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2040             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2041                 self.to_screen('[info] Internet shortcut is already present')
2042             else:
2043                 try:
2044                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2045                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2046                         template_vars = {'url': ascii_url}
2047                         if embed_filename:
2048                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2049                         linkfile.write(template % template_vars)
2050                 except (OSError, IOError):
2051                     self.report_error('Cannot write internet shortcut ' + linkfn)
2052                     return False
2053             return True
2054
2055         if url_link:
2056             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2057                 return
2058         if webloc_link:
2059             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2060                 return
2061         if desktop_link:
2062             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2063                 return
2064
2065         # Download
2066         must_record_download_archive = False
2067         if not self.params.get('skip_download', False):
2068             try:
2069                 if info_dict.get('requested_formats') is not None:
2070                     downloaded = []
2071                     success = True
2072                     merger = FFmpegMergerPP(self)
2073                     if not merger.available:
2074                         postprocessors = []
2075                         self.report_warning('You have requested multiple '
2076                                             'formats but ffmpeg or avconv are not installed.'
2077                                             ' The formats won\'t be merged.')
2078                     else:
2079                         postprocessors = [merger]
2080
2081                     def compatible_formats(formats):
2082                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2083                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2084                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2085                         if len(video_formats) > 2 or len(audio_formats) > 2:
2086                             return False
2087
2088                         # Check extension
2089                         exts = set(format.get('ext') for format in formats)
2090                         COMPATIBLE_EXTS = (
2091                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2092                             set(('webm',)),
2093                         )
2094                         for ext_sets in COMPATIBLE_EXTS:
2095                             if ext_sets.issuperset(exts):
2096                                 return True
2097                         # TODO: Check acodec/vcodec
2098                         return False
2099
2100                     filename_real_ext = os.path.splitext(filename)[1][1:]
2101                     filename_wo_ext = (
2102                         os.path.splitext(filename)[0]
2103                         if filename_real_ext == info_dict['ext']
2104                         else filename)
2105                     requested_formats = info_dict['requested_formats']
2106                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2107                         info_dict['ext'] = 'mkv'
2108                         self.report_warning(
2109                             'Requested formats are incompatible for merge and will be merged into mkv.')
2110                     # Ensure filename always has a correct extension for successful merge
2111                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2112                     if os.path.exists(encodeFilename(filename)):
2113                         self.to_screen(
2114                             '[download] %s has already been downloaded and '
2115                             'merged' % filename)
2116                     else:
2117                         for f in requested_formats:
2118                             new_info = dict(info_dict)
2119                             new_info.update(f)
2120                             fname = prepend_extension(
2121                                 self.prepare_filename(new_info),
2122                                 'f%s' % f['format_id'], new_info['ext'])
2123                             if not ensure_dir_exists(fname):
2124                                 return
2125                             downloaded.append(fname)
2126                             partial_success, real_download = dl(fname, new_info)
2127                             success = success and partial_success
2128                         info_dict['__postprocessors'] = postprocessors
2129                         info_dict['__files_to_merge'] = downloaded
2130                         # Even if there were no downloads, it is being merged only now
2131                         info_dict['__real_download'] = True
2132                 else:
2133                     # Just a single file
2134                     success, real_download = dl(filename, info_dict)
2135                     info_dict['__real_download'] = real_download
2136             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2137                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2138                 return
2139             except (OSError, IOError) as err:
2140                 raise UnavailableVideoError(err)
2141             except (ContentTooShortError, ) as err:
2142                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2143                 return
2144
2145             if success and filename != '-':
2146                 # Fixup content
2147                 fixup_policy = self.params.get('fixup')
2148                 if fixup_policy is None:
2149                     fixup_policy = 'detect_or_warn'
2150
2151                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2152
2153                 stretched_ratio = info_dict.get('stretched_ratio')
2154                 if stretched_ratio is not None and stretched_ratio != 1:
2155                     if fixup_policy == 'warn':
2156                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2157                             info_dict['id'], stretched_ratio))
2158                     elif fixup_policy == 'detect_or_warn':
2159                         stretched_pp = FFmpegFixupStretchedPP(self)
2160                         if stretched_pp.available:
2161                             info_dict.setdefault('__postprocessors', [])
2162                             info_dict['__postprocessors'].append(stretched_pp)
2163                         else:
2164                             self.report_warning(
2165                                 '%s: Non-uniform pixel ratio (%s). %s'
2166                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2167                     else:
2168                         assert fixup_policy in ('ignore', 'never')
2169
2170                 if (info_dict.get('requested_formats') is None
2171                         and info_dict.get('container') == 'm4a_dash'):
2172                     if fixup_policy == 'warn':
2173                         self.report_warning(
2174                             '%s: writing DASH m4a. '
2175                             'Only some players support this container.'
2176                             % info_dict['id'])
2177                     elif fixup_policy == 'detect_or_warn':
2178                         fixup_pp = FFmpegFixupM4aPP(self)
2179                         if fixup_pp.available:
2180                             info_dict.setdefault('__postprocessors', [])
2181                             info_dict['__postprocessors'].append(fixup_pp)
2182                         else:
2183                             self.report_warning(
2184                                 '%s: writing DASH m4a. '
2185                                 'Only some players support this container. %s'
2186                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2187                     else:
2188                         assert fixup_policy in ('ignore', 'never')
2189
2190                 if (info_dict.get('protocol') == 'm3u8_native'
2191                         or info_dict.get('protocol') == 'm3u8'
2192                         and self.params.get('hls_prefer_native')):
2193                     if fixup_policy == 'warn':
2194                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2195                             info_dict['id']))
2196                     elif fixup_policy == 'detect_or_warn':
2197                         fixup_pp = FFmpegFixupM3u8PP(self)
2198                         if fixup_pp.available:
2199                             info_dict.setdefault('__postprocessors', [])
2200                             info_dict['__postprocessors'].append(fixup_pp)
2201                         else:
2202                             self.report_warning(
2203                                 '%s: malformed AAC bitstream detected. %s'
2204                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2205                     else:
2206                         assert fixup_policy in ('ignore', 'never')
2207
2208                 try:
2209                     self.post_process(filename, info_dict)
2210                 except (PostProcessingError) as err:
2211                     self.report_error('postprocessing: %s' % str(err))
2212                     return
2213                 try:
2214                     for ph in self._post_hooks:
2215                         ph(filename)
2216                 except Exception as err:
2217                     self.report_error('post hooks: %s' % str(err))
2218                     return
2219                 must_record_download_archive = True
2220
2221         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2222             self.record_download_archive(info_dict)
2223         max_downloads = self.params.get('max_downloads')
2224         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2225             raise MaxDownloadsReached()
2226
2227     def download(self, url_list):
2228         """Download a given list of URLs."""
2229         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2230         if (len(url_list) > 1
2231                 and outtmpl != '-'
2232                 and '%' not in outtmpl
2233                 and self.params.get('max_downloads') != 1):
2234             raise SameFileError(outtmpl)
2235
2236         for url in url_list:
2237             try:
2238                 # It also downloads the videos
2239                 res = self.extract_info(
2240                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2241             except UnavailableVideoError:
2242                 self.report_error('unable to download video')
2243             except MaxDownloadsReached:
2244                 self.to_screen('[info] Maximum number of downloaded files reached.')
2245                 raise
2246             else:
2247                 if self.params.get('dump_single_json', False):
2248                     self.to_stdout(json.dumps(res))
2249
2250         return self._download_retcode
2251
2252     def download_with_info_file(self, info_filename):
2253         with contextlib.closing(fileinput.FileInput(
2254                 [info_filename], mode='r',
2255                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2256             # FileInput doesn't have a read method, we can't call json.load
2257             info = self.filter_requested_info(json.loads('\n'.join(f)))
2258         try:
2259             self.process_ie_result(info, download=True)
2260         except DownloadError:
2261             webpage_url = info.get('webpage_url')
2262             if webpage_url is not None:
2263                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2264                 return self.download([webpage_url])
2265             else:
2266                 raise
2267         return self._download_retcode
2268
2269     @staticmethod
2270     def filter_requested_info(info_dict):
2271         return dict(
2272             (k, v) for k, v in info_dict.items()
2273             if k not in ['requested_formats', 'requested_subtitles'])
2274
2275     def post_process(self, filename, ie_info):
2276         """Run all the postprocessors on the given file."""
2277         info = dict(ie_info)
2278         info['filepath'] = filename
2279         pps_chain = []
2280         if ie_info.get('__postprocessors') is not None:
2281             pps_chain.extend(ie_info['__postprocessors'])
2282         pps_chain.extend(self._pps)
2283         for pp in pps_chain:
2284             files_to_delete = []
2285             try:
2286                 files_to_delete, info = pp.run(info)
2287             except PostProcessingError as e:
2288                 self.report_error(e.msg)
2289             if files_to_delete and not self.params.get('keepvideo', False):
2290                 for old_filename in set(files_to_delete):
2291                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2292                     try:
2293                         os.remove(encodeFilename(old_filename))
2294                     except (IOError, OSError):
2295                         self.report_warning('Unable to remove downloaded original file')
2296
2297     def _make_archive_id(self, info_dict):
2298         video_id = info_dict.get('id')
2299         if not video_id:
2300             return
2301         # Future-proof against any change in case
2302         # and backwards compatibility with prior versions
2303         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2304         if extractor is None:
2305             url = str_or_none(info_dict.get('url'))
2306             if not url:
2307                 return
2308             # Try to find matching extractor for the URL and take its ie_key
2309             for ie in self._ies:
2310                 if ie.suitable(url):
2311                     extractor = ie.ie_key()
2312                     break
2313             else:
2314                 return
2315         return extractor.lower() + ' ' + video_id
2316
2317     def in_download_archive(self, info_dict):
2318         fn = self.params.get('download_archive')
2319         if fn is None:
2320             return False
2321
2322         vid_id = self._make_archive_id(info_dict)
2323         if not vid_id:
2324             return False  # Incomplete video information
2325
2326         return vid_id in self.archive
2327
2328     def record_download_archive(self, info_dict):
2329         fn = self.params.get('download_archive')
2330         if fn is None:
2331             return
2332         vid_id = self._make_archive_id(info_dict)
2333         assert vid_id
2334         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2335             archive_file.write(vid_id + '\n')
2336         self.archive.add(vid_id)
2337
2338     @staticmethod
2339     def format_resolution(format, default='unknown'):
2340         if format.get('vcodec') == 'none':
2341             return 'audio only'
2342         if format.get('resolution') is not None:
2343             return format['resolution']
2344         if format.get('height') is not None:
2345             if format.get('width') is not None:
2346                 res = '%sx%s' % (format['width'], format['height'])
2347             else:
2348                 res = '%sp' % format['height']
2349         elif format.get('width') is not None:
2350             res = '%dx?' % format['width']
2351         else:
2352             res = default
2353         return res
2354
2355     def _format_note(self, fdict):
2356         res = ''
2357         if fdict.get('ext') in ['f4f', 'f4m']:
2358             res += '(unsupported) '
2359         if fdict.get('language'):
2360             if res:
2361                 res += ' '
2362             res += '[%s] ' % fdict['language']
2363         if fdict.get('format_note') is not None:
2364             res += fdict['format_note'] + ' '
2365         if fdict.get('tbr') is not None:
2366             res += '%4dk ' % fdict['tbr']
2367         if fdict.get('container') is not None:
2368             if res:
2369                 res += ', '
2370             res += '%s container' % fdict['container']
2371         if (fdict.get('vcodec') is not None
2372                 and fdict.get('vcodec') != 'none'):
2373             if res:
2374                 res += ', '
2375             res += fdict['vcodec']
2376             if fdict.get('vbr') is not None:
2377                 res += '@'
2378         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2379             res += 'video@'
2380         if fdict.get('vbr') is not None:
2381             res += '%4dk' % fdict['vbr']
2382         if fdict.get('fps') is not None:
2383             if res:
2384                 res += ', '
2385             res += '%sfps' % fdict['fps']
2386         if fdict.get('acodec') is not None:
2387             if res:
2388                 res += ', '
2389             if fdict['acodec'] == 'none':
2390                 res += 'video only'
2391             else:
2392                 res += '%-5s' % fdict['acodec']
2393         elif fdict.get('abr') is not None:
2394             if res:
2395                 res += ', '
2396             res += 'audio'
2397         if fdict.get('abr') is not None:
2398             res += '@%3dk' % fdict['abr']
2399         if fdict.get('asr') is not None:
2400             res += ' (%5dHz)' % fdict['asr']
2401         if fdict.get('filesize') is not None:
2402             if res:
2403                 res += ', '
2404             res += format_bytes(fdict['filesize'])
2405         elif fdict.get('filesize_approx') is not None:
2406             if res:
2407                 res += ', '
2408             res += '~' + format_bytes(fdict['filesize_approx'])
2409         return res
2410
2411     def _format_note_table(self, f):
2412         def join_fields(*vargs):
2413             return ', '.join((val for val in vargs if val != ''))
2414
2415         return join_fields(
2416             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2417             format_field(f, 'language', '[%s]'),
2418             format_field(f, 'format_note'),
2419             format_field(f, 'container', ignore=(None, f.get('ext'))),
2420             format_field(f, 'asr', '%5dHz'))
2421
2422     def list_formats(self, info_dict):
2423         formats = info_dict.get('formats', [info_dict])
2424         new_format = self.params.get('listformats_table', False)
2425         if new_format:
2426             table = [
2427                 [
2428                     format_field(f, 'format_id'),
2429                     format_field(f, 'ext'),
2430                     self.format_resolution(f),
2431                     format_field(f, 'fps', '%d'),
2432                     '|',
2433                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2434                     format_field(f, 'tbr', '%4dk'),
2435                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2436                     '|',
2437                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2438                     format_field(f, 'vbr', '%4dk'),
2439                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2440                     format_field(f, 'abr', '%3dk'),
2441                     format_field(f, 'asr', '%5dHz'),
2442                     self._format_note_table(f)]
2443                 for f in formats
2444                 if f.get('preference') is None or f['preference'] >= -1000]
2445             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2446                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2447         else:
2448             table = [
2449                 [
2450                     format_field(f, 'format_id'),
2451                     format_field(f, 'ext'),
2452                     self.format_resolution(f),
2453                     self._format_note(f)]
2454                 for f in formats
2455                 if f.get('preference') is None or f['preference'] >= -1000]
2456             header_line = ['format code', 'extension', 'resolution', 'note']
2457
2458         # if len(formats) > 1:
2459         #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2460         self.to_screen(
2461             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2462                 header_line,
2463                 table,
2464                 delim=new_format,
2465                 extraGap=(0 if new_format else 1),
2466                 hideEmpty=new_format)))
2467
2468     def list_thumbnails(self, info_dict):
2469         thumbnails = info_dict.get('thumbnails')
2470         if not thumbnails:
2471             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2472             return
2473
2474         self.to_screen(
2475             '[info] Thumbnails for %s:' % info_dict['id'])
2476         self.to_screen(render_table(
2477             ['ID', 'width', 'height', 'URL'],
2478             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2479
2480     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2481         if not subtitles:
2482             self.to_screen('%s has no %s' % (video_id, name))
2483             return
2484         self.to_screen(
2485             'Available %s for %s:' % (name, video_id))
2486         self.to_screen(render_table(
2487             ['Language', 'formats'],
2488             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2489                 for lang, formats in subtitles.items()]))
2490
2491     def urlopen(self, req):
2492         """ Start an HTTP download """
2493         if isinstance(req, compat_basestring):
2494             req = sanitized_Request(req)
2495         return self._opener.open(req, timeout=self._socket_timeout)
2496
2497     def print_debug_header(self):
2498         if not self.params.get('verbose'):
2499             return
2500
2501         if type('') is not compat_str:
2502             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2503             self.report_warning(
2504                 'Your Python is broken! Update to a newer and supported version')
2505
2506         stdout_encoding = getattr(
2507             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2508         encoding_str = (
2509             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2510                 locale.getpreferredencoding(),
2511                 sys.getfilesystemencoding(),
2512                 stdout_encoding,
2513                 self.get_encoding()))
2514         write_string(encoding_str, encoding=None)
2515
2516         self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2517         if _LAZY_LOADER:
2518             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2519         try:
2520             sp = subprocess.Popen(
2521                 ['git', 'rev-parse', '--short', 'HEAD'],
2522                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2523                 cwd=os.path.dirname(os.path.abspath(__file__)))
2524             out, err = sp.communicate()
2525             out = out.decode().strip()
2526             if re.match('[0-9a-f]+', out):
2527                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2528         except Exception:
2529             try:
2530                 sys.exc_clear()
2531             except Exception:
2532                 pass
2533
2534         def python_implementation():
2535             impl_name = platform.python_implementation()
2536             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2537                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2538             return impl_name
2539
2540         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2541             platform.python_version(), python_implementation(),
2542             platform_name()))
2543
2544         exe_versions = FFmpegPostProcessor.get_versions(self)
2545         exe_versions['rtmpdump'] = rtmpdump_version()
2546         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2547         exe_str = ', '.join(
2548             '%s %s' % (exe, v)
2549             for exe, v in sorted(exe_versions.items())
2550             if v
2551         )
2552         if not exe_str:
2553             exe_str = 'none'
2554         self._write_string('[debug] exe versions: %s\n' % exe_str)
2555
2556         proxy_map = {}
2557         for handler in self._opener.handlers:
2558             if hasattr(handler, 'proxies'):
2559                 proxy_map.update(handler.proxies)
2560         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2561
2562         if self.params.get('call_home', False):
2563             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2564             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2565             latest_version = self.urlopen(
2566                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2567             if version_tuple(latest_version) > version_tuple(__version__):
2568                 self.report_warning(
2569                     'You are using an outdated version (newest version: %s)! '
2570                     'See https://yt-dl.org/update if you need help updating.' %
2571                     latest_version)
2572
2573     def _setup_opener(self):
2574         timeout_val = self.params.get('socket_timeout')
2575         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2576
2577         opts_cookiefile = self.params.get('cookiefile')
2578         opts_proxy = self.params.get('proxy')
2579
2580         if opts_cookiefile is None:
2581             self.cookiejar = compat_cookiejar.CookieJar()
2582         else:
2583             opts_cookiefile = expand_path(opts_cookiefile)
2584             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2585             if os.access(opts_cookiefile, os.R_OK):
2586                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2587
2588         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2589         if opts_proxy is not None:
2590             if opts_proxy == '':
2591                 proxies = {}
2592             else:
2593                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2594         else:
2595             proxies = compat_urllib_request.getproxies()
2596             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2597             if 'http' in proxies and 'https' not in proxies:
2598                 proxies['https'] = proxies['http']
2599         proxy_handler = PerRequestProxyHandler(proxies)
2600
2601         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2602         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2603         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2604         redirect_handler = YoutubeDLRedirectHandler()
2605         data_handler = compat_urllib_request_DataHandler()
2606
2607         # When passing our own FileHandler instance, build_opener won't add the
2608         # default FileHandler and allows us to disable the file protocol, which
2609         # can be used for malicious purposes (see
2610         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2611         file_handler = compat_urllib_request.FileHandler()
2612
2613         def file_open(*args, **kwargs):
2614             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2615         file_handler.file_open = file_open
2616
2617         opener = compat_urllib_request.build_opener(
2618             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2619
2620         # Delete the default user-agent header, which would otherwise apply in
2621         # cases where our custom HTTP handler doesn't come into play
2622         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2623         opener.addheaders = []
2624         self._opener = opener
2625
2626     def encode(self, s):
2627         if isinstance(s, bytes):
2628             return s  # Already encoded
2629
2630         try:
2631             return s.encode(self.get_encoding())
2632         except UnicodeEncodeError as err:
2633             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2634             raise
2635
2636     def get_encoding(self):
2637         encoding = self.params.get('encoding')
2638         if encoding is None:
2639             encoding = preferredencoding()
2640         return encoding
2641
2642     def _write_thumbnails(self, info_dict, filename):
2643         if self.params.get('writethumbnail', False):
2644             thumbnails = info_dict.get('thumbnails')
2645             if thumbnails:
2646                 thumbnails = [thumbnails[-1]]
2647         elif self.params.get('write_all_thumbnails', False):
2648             thumbnails = info_dict.get('thumbnails')
2649         else:
2650             return
2651
2652         if not thumbnails:
2653             # No thumbnails present, so return immediately
2654             return
2655
2656         for t in thumbnails:
2657             thumb_ext = determine_ext(t['url'], 'jpg')
2658             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2659             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2660             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2661
2662             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2663                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2664                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2665             else:
2666                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2667                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2668                 try:
2669                     uf = self.urlopen(t['url'])
2670                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2671                         shutil.copyfileobj(uf, thumbf)
2672                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2673                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2674                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2675                     self.report_warning('Unable to download thumbnail "%s": %s' %
2676                                         (t['url'], error_to_compat_str(err)))