youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     error_to_compat_str,
  61     expand_path,
  62     ExtractorError,
  63     format_bytes,
  64     format_field,
  65     formatSeconds,
  66     GeoRestrictedError,
  67     int_or_none,
  68     iri_to_uri,
  69     ISO3166Utils,
  70     locked_file,
  71     make_HTTPS_handler,
  72     MaxDownloadsReached,
  73     orderedSet,
  74     PagedList,
  75     parse_filesize,
  76     PerRequestProxyHandler,
  77     platform_name,
  78     PostProcessingError,
  79     preferredencoding,
  80     prepend_extension,
  81     register_socks_protocols,
  82     render_table,
  83     replace_extension,
  84     SameFileError,
  85     sanitize_filename,
  86     sanitize_path,
  87     sanitize_url,
  88     sanitized_Request,
  89     std_headers,
  90     str_or_none,
  91     subtitles_filename,
  92     to_high_limit_path,
  93     UnavailableVideoError,
  94     url_basename,
  95     version_tuple,
  96     write_json_file,
  97     write_string,
  98     YoutubeDLCookieJar,
  99     YoutubeDLCookieProcessor,
 100     YoutubeDLHandler,
 101     YoutubeDLRedirectHandler,
 102 )
 103 from .cache import Cache
 104 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 105 from .extractor.openload import PhantomJSwrapper
 106 from .downloader import get_suitable_downloader
 107 from .downloader.rtmp import rtmpdump_version
 108 from .postprocessor import (
 109     FFmpegFixupM3u8PP,
 110     FFmpegFixupM4aPP,
 111     FFmpegFixupStretchedPP,
 112     FFmpegMergerPP,
 113     FFmpegPostProcessor,
 114     FFmpegSubtitlesConvertorPP,
 115     get_postprocessor,
 116 )
 117 from .version import __version__
 118
 119 if compat_os_name == 'nt':
 120     import ctypes
 121
 122
 123 class YoutubeDL(object):
 124     """YoutubeDL class.
 125
 126     YoutubeDL objects are the ones responsible of downloading the
 127     actual video file and writing it to disk if the user has requested
 128     it, among some other tasks. In most cases there should be one per
 129     program. As, given a video URL, the downloader doesn't know how to
 130     extract all the needed information, task that InfoExtractors do, it
 131     has to pass the URL to one of them.
 132
 133     For this, YoutubeDL objects have a method that allows
 134     InfoExtractors to be registered in a given order. When it is passed
 135     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 136     finds that reports being able to handle it. The InfoExtractor extracts
 137     all the information about the video or videos the URL refers to, and
 138     YoutubeDL process the extracted information, possibly using a File
 139     Downloader to download the video.
 140
 141     YoutubeDL objects accept a lot of parameters. In order not to saturate
 142     the object constructor with arguments, it receives a dictionary of
 143     options instead. These options are available through the params
 144     attribute for the InfoExtractors to use. The YoutubeDL also
 145     registers itself as the downloader in charge for the InfoExtractors
 146     that are added to it, so this is a "mutual registration".
 147
 148     Available options:
 149
 150     username:          Username for authentication purposes.
 151     password:          Password for authentication purposes.
 152     videopassword:     Password for accessing a video.
 153     ap_mso:            Adobe Pass multiple-system operator identifier.
 154     ap_username:       Multiple-system operator account username.
 155     ap_password:       Multiple-system operator account password.
 156     usenetrc:          Use netrc for authentication instead.
 157     verbose:           Print additional info to stdout.
 158     quiet:             Do not print messages to stdout.
 159     no_warnings:       Do not print out anything for warnings.
 160     forceurl:          Force printing final URL.
 161     forcetitle:        Force printing title.
 162     forceid:           Force printing ID.
 163     forcethumbnail:    Force printing thumbnail URL.
 164     forcedescription:  Force printing description.
 165     forcefilename:     Force printing final filename.
 166     forceduration:     Force printing duration.
 167     forcejson:         Force printing info_dict as JSON.
 168     dump_single_json:  Force printing the info_dict of the whole playlist
 169                        (or video) as a single JSON line.
 170     force_write_download_archive: Force writing download archive regardless of
 171                        'skip_download' or 'simulate'.
 172     simulate:          Do not download the video files.
 173     format:            Video format code. see "FORMAT SELECTION" for more details.
 174     format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
 175     format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
 176     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
 177     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
 178     outtmpl:           Template for output names.
 179     restrictfilenames: Do not allow "&" and spaces in file names.
 180     trim_file_name:    Limit length of filename (extension excluded).
 181     ignoreerrors:      Do not stop on download errors.
 182     force_generic_extractor: Force downloader to use the generic extractor
 183     nooverwrites:      Prevent overwriting files.
 184     playliststart:     Playlist item to start at.
 185     playlistend:       Playlist item to end at.
 186     playlist_items:    Specific indices of playlist to download.
 187     playlistreverse:   Download playlist items in reverse order.
 188     playlistrandom:    Download playlist items in random order.
 189     matchtitle:        Download only matching titles.
 190     rejecttitle:       Reject downloads for matching titles.
 191     logger:            Log messages to a logging.Logger instance.
 192     logtostderr:       Log messages to stderr instead of stdout.
 193     writedescription:  Write the video description to a .description file
 194     writeinfojson:     Write the video description to a .info.json file
 195     writeannotations:  Write the video annotations to a .annotations.xml file
 196     writethumbnail:    Write the thumbnail image to a file
 197     write_all_thumbnails:  Write all thumbnail formats to files
 198     writelink:         Write an internet shortcut file, depending on the
 199                        current platform (.url/.webloc/.desktop)
 200     writeurllink:      Write a Windows internet shortcut file (.url)
 201     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 202     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 203     writesubtitles:    Write the video subtitles to a file
 204     writeautomaticsub: Write the automatically generated subtitles to a file
 205     allsubtitles:      Downloads all the subtitles of the video
 206                        (requires writesubtitles or writeautomaticsub)
 207     listsubtitles:     Lists all available subtitles for the video
 208     subtitlesformat:   The format code for subtitles
 209     subtitleslangs:    List of languages of the subtitles to download
 210     keepvideo:         Keep the video file after post-processing
 211     daterange:         A DateRange object, download only if the upload_date is in the range.
 212     skip_download:     Skip the actual download of the video file
 213     cachedir:          Location of the cache files in the filesystem.
 214                        False to disable filesystem cache.
 215     noplaylist:        Download single video instead of a playlist if in doubt.
 216     age_limit:         An integer representing the user's age in years.
 217                        Unsuitable videos for the given age are skipped.
 218     min_views:         An integer representing the minimum view count the video
 219                        must have in order to not be skipped.
 220                        Videos without view count information are always
 221                        downloaded. None for no limit.
 222     max_views:         An integer representing the maximum view count.
 223                        Videos that are more popular than that are not
 224                        downloaded.
 225                        Videos without view count information are always
 226                        downloaded. None for no limit.
 227     download_archive:  File name of a file where all downloads are recorded.
 228                        Videos already present in the file are not downloaded
 229                        again.
 230     break_on_existing: Stop the download process after attempting to download a file that's
 231                        in the archive.
 232     cookiefile:        File name where cookies should be read from and dumped to.
 233     nocheckcertificate:Do not verify SSL certificates
 234     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 235                        At the moment, this is only supported by YouTube.
 236     proxy:             URL of the proxy server to use
 237     geo_verification_proxy:  URL of the proxy to use for IP address verification
 238                        on geo-restricted sites.
 239     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 240     bidi_workaround:   Work around buggy terminals without bidirectional text
 241                        support, using fridibi
 242     debug_printtraffic:Print out sent and received HTTP traffic
 243     include_ads:       Download ads as well
 244     default_search:    Prepend this string if an input url is not valid.
 245                        'auto' for elaborate guessing
 246     encoding:          Use this encoding instead of the system-specified.
 247     extract_flat:      Do not resolve URLs, return the immediate result.
 248                        Pass in 'in_playlist' to only show this behavior for
 249                        playlist items.
 250     postprocessors:    A list of dictionaries, each with an entry
 251                        * key:  The name of the postprocessor. See
 252                                youtube_dlc/postprocessor/__init__.py for a list.
 253                        as well as any further keyword arguments for the
 254                        postprocessor.
 255     progress_hooks:    A list of functions that get called on download
 256                        progress, with a dictionary with the entries
 257                        * status: One of "downloading", "error", or "finished".
 258                                  Check this first and ignore unknown values.
 259
 260                        If status is one of "downloading", or "finished", the
 261                        following properties may also be present:
 262                        * filename: The final filename (always present)
 263                        * tmpfilename: The filename we're currently writing to
 264                        * downloaded_bytes: Bytes on disk
 265                        * total_bytes: Size of the whole file, None if unknown
 266                        * total_bytes_estimate: Guess of the eventual file size,
 267                                                None if unavailable.
 268                        * elapsed: The number of seconds since download started.
 269                        * eta: The estimated time in seconds, None if unknown
 270                        * speed: The download speed in bytes/second, None if
 271                                 unknown
 272                        * fragment_index: The counter of the currently
 273                                          downloaded video fragment.
 274                        * fragment_count: The number of fragments (= individual
 275                                          files that will be merged)
 276
 277                        Progress hooks are guaranteed to be called at least once
 278                        (with status "finished") if the download is successful.
 279     merge_output_format: Extension to use when merging formats.
 280     fixup:             Automatically correct known faults of the file.
 281                        One of:
 282                        - "never": do nothing
 283                        - "warn": only emit a warning
 284                        - "detect_or_warn": check whether we can do anything
 285                                            about it, warn otherwise (default)
 286     source_address:    Client-side IP address to bind to.
 287     call_home:         Boolean, true iff we are allowed to contact the
 288                        youtube-dlc servers for debugging.
 289     sleep_interval:    Number of seconds to sleep before each download when
 290                        used alone or a lower bound of a range for randomized
 291                        sleep before each download (minimum possible number
 292                        of seconds to sleep) when used along with
 293                        max_sleep_interval.
 294     max_sleep_interval:Upper bound of a range for randomized sleep before each
 295                        download (maximum possible number of seconds to sleep).
 296                        Must only be used along with sleep_interval.
 297                        Actual sleep time will be a random float from range
 298                        [sleep_interval; max_sleep_interval].
 299     listformats:       Print an overview of available video formats and exit.
 300     list_thumbnails:   Print a table of all thumbnails and exit.
 301     match_filter:      A function that gets called with the info_dict of
 302                        every video.
 303                        If it returns a message, the video is ignored.
 304                        If it returns None, the video is downloaded.
 305                        match_filter_func in utils.py is one example for this.
 306     no_color:          Do not emit color codes in output.
 307     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 308                        HTTP header
 309     geo_bypass_country:
 310                        Two-letter ISO 3166-2 country code that will be used for
 311                        explicit geographic restriction bypassing via faking
 312                        X-Forwarded-For HTTP header
 313     geo_bypass_ip_block:
 314                        IP range in CIDR notation that will be used similarly to
 315                        geo_bypass_country
 316
 317     The following options determine which downloader is picked:
 318     external_downloader: Executable of the external downloader to call.
 319                        None or unset for standard (built-in) downloader.
 320     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 321                        if True, otherwise use ffmpeg/avconv if False, otherwise
 322                        use downloader suggested by extractor if None.
 323
 324     The following parameters are not used by YoutubeDL itself, they are used by
 325     the downloader (see youtube_dlc/downloader/common.py):
 326     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 327     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 328     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 329     http_chunk_size.
 330
 331     The following options are used by the post processors:
 332     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 333                        otherwise prefer ffmpeg.
 334     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 335                        to the binary or its containing directory.
 336     postprocessor_args: A list of additional command-line arguments for the
 337                         postprocessor.
 338
 339     The following options are used by the Youtube extractor:
 340     youtube_include_dash_manifest: If True (default), DASH manifests and related
 341                         data will be downloaded and processed by extractor.
 342                         You can reduce network I/O by disabling it if you don't
 343                         care about DASH.
 344     """
 345
 346     _NUMERIC_FIELDS = set((
 347         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 348         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 349         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 350         'average_rating', 'comment_count', 'age_limit',
 351         'start_time', 'end_time',
 352         'chapter_number', 'season_number', 'episode_number',
 353         'track_number', 'disc_number', 'release_year',
 354         'playlist_index',
 355     ))
 356
 357     params = None
 358     _ies = []
 359     _pps = []
 360     _download_retcode = None
 361     _num_downloads = None
 362     _screen_file = None
 363
 364     def __init__(self, params=None, auto_init=True):
 365         """Create a FileDownloader object with the given options."""
 366         if params is None:
 367             params = {}
 368         self._ies = []
 369         self._ies_instances = {}
 370         self._pps = []
 371         self._progress_hooks = []
 372         self._download_retcode = 0
 373         self._num_downloads = 0
 374         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 375         self._err_file = sys.stderr
 376         self.params = {
 377             # Default parameters
 378             'nocheckcertificate': False,
 379         }
 380         self.params.update(params)
 381         self.cache = Cache(self)
 382         self.archive = set()
 383
 384         """Preload the archive, if any is specified"""
 385         def preload_download_archive(self):
 386             fn = self.params.get('download_archive')
 387             if fn is None:
 388                 return False
 389             try:
 390                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 391                     for line in archive_file:
 392                         self.archive.add(line.strip())
 393             except IOError as ioe:
 394                 if ioe.errno != errno.ENOENT:
 395                     raise
 396                 return False
 397             return True
 398
 399         def check_deprecated(param, option, suggestion):
 400             if self.params.get(param) is not None:
 401                 self.report_warning(
 402                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 403                 return True
 404             return False
 405
 406         if self.params.get('verbose'):
 407             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 408
 409         preload_download_archive(self)
 410
 411         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 412             if self.params.get('geo_verification_proxy') is None:
 413                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 414
 415         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 416         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 417         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 418
 419         if params.get('bidi_workaround', False):
 420             try:
 421                 import pty
 422                 master, slave = pty.openpty()
 423                 width = compat_get_terminal_size().columns
 424                 if width is None:
 425                     width_args = []
 426                 else:
 427                     width_args = ['-w', str(width)]
 428                 sp_kwargs = dict(
 429                     stdin=subprocess.PIPE,
 430                     stdout=slave,
 431                     stderr=self._err_file)
 432                 try:
 433                     self._output_process = subprocess.Popen(
 434                         ['bidiv'] + width_args, **sp_kwargs
 435                     )
 436                 except OSError:
 437                     self._output_process = subprocess.Popen(
 438                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 439                 self._output_channel = os.fdopen(master, 'rb')
 440             except OSError as ose:
 441                 if ose.errno == errno.ENOENT:
 442                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 443                 else:
 444                     raise
 445
 446         if (sys.platform != 'win32'
 447                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 448                 and not params.get('restrictfilenames', False)):
 449             # Unicode filesystem API will throw errors (#1474, #13027)
 450             self.report_warning(
 451                 'Assuming --restrict-filenames since file system encoding '
 452                 'cannot encode all characters. '
 453                 'Set the LC_ALL environment variable to fix this.')
 454             self.params['restrictfilenames'] = True
 455
 456         if isinstance(params.get('outtmpl'), bytes):
 457             self.report_warning(
 458                 'Parameter outtmpl is bytes, but should be a unicode string. '
 459                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 460
 461         self._setup_opener()
 462
 463         if auto_init:
 464             self.print_debug_header()
 465             self.add_default_info_extractors()
 466
 467         for pp_def_raw in self.params.get('postprocessors', []):
 468             pp_class = get_postprocessor(pp_def_raw['key'])
 469             pp_def = dict(pp_def_raw)
 470             del pp_def['key']
 471             pp = pp_class(self, **compat_kwargs(pp_def))
 472             self.add_post_processor(pp)
 473
 474         for ph in self.params.get('progress_hooks', []):
 475             self.add_progress_hook(ph)
 476
 477         register_socks_protocols()
 478
 479     def warn_if_short_id(self, argv):
 480         # short YouTube ID starting with dash?
 481         idxs = [
 482             i for i, a in enumerate(argv)
 483             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 484         if idxs:
 485             correct_argv = (
 486                 ['youtube-dlc']
 487                 + [a for i, a in enumerate(argv) if i not in idxs]
 488                 + ['--'] + [argv[i] for i in idxs]
 489             )
 490             self.report_warning(
 491                 'Long argument string detected. '
 492                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 493                 args_to_str(correct_argv))
 494
 495     def add_info_extractor(self, ie):
 496         """Add an InfoExtractor object to the end of the list."""
 497         self._ies.append(ie)
 498         if not isinstance(ie, type):
 499             self._ies_instances[ie.ie_key()] = ie
 500             ie.set_downloader(self)
 501
 502     def get_info_extractor(self, ie_key):
 503         """
 504         Get an instance of an IE with name ie_key, it will try to get one from
 505         the _ies list, if there's no instance it will create a new one and add
 506         it to the extractor list.
 507         """
 508         ie = self._ies_instances.get(ie_key)
 509         if ie is None:
 510             ie = get_info_extractor(ie_key)()
 511             self.add_info_extractor(ie)
 512         return ie
 513
 514     def add_default_info_extractors(self):
 515         """
 516         Add the InfoExtractors returned by gen_extractors to the end of the list
 517         """
 518         for ie in gen_extractor_classes():
 519             self.add_info_extractor(ie)
 520
 521     def add_post_processor(self, pp):
 522         """Add a PostProcessor object to the end of the chain."""
 523         self._pps.append(pp)
 524         pp.set_downloader(self)
 525
 526     def add_progress_hook(self, ph):
 527         """Add the progress hook (currently only for the file downloader)"""
 528         self._progress_hooks.append(ph)
 529
 530     def _bidi_workaround(self, message):
 531         if not hasattr(self, '_output_channel'):
 532             return message
 533
 534         assert hasattr(self, '_output_process')
 535         assert isinstance(message, compat_str)
 536         line_count = message.count('\n') + 1
 537         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 538         self._output_process.stdin.flush()
 539         res = ''.join(self._output_channel.readline().decode('utf-8')
 540                       for _ in range(line_count))
 541         return res[:-len('\n')]
 542
 543     def to_screen(self, message, skip_eol=False):
 544         """Print message to stdout if not in quiet mode."""
 545         return self.to_stdout(message, skip_eol, check_quiet=True)
 546
 547     def _write_string(self, s, out=None):
 548         write_string(s, out=out, encoding=self.params.get('encoding'))
 549
 550     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 551         """Print message to stdout if not in quiet mode."""
 552         if self.params.get('logger'):
 553             self.params['logger'].debug(message)
 554         elif not check_quiet or not self.params.get('quiet', False):
 555             message = self._bidi_workaround(message)
 556             terminator = ['\n', ''][skip_eol]
 557             output = message + terminator
 558
 559             self._write_string(output, self._screen_file)
 560
 561     def to_stderr(self, message):
 562         """Print message to stderr."""
 563         assert isinstance(message, compat_str)
 564         if self.params.get('logger'):
 565             self.params['logger'].error(message)
 566         else:
 567             message = self._bidi_workaround(message)
 568             output = message + '\n'
 569             self._write_string(output, self._err_file)
 570
 571     def to_console_title(self, message):
 572         if not self.params.get('consoletitle', False):
 573             return
 574         if compat_os_name == 'nt':
 575             if ctypes.windll.kernel32.GetConsoleWindow():
 576                 # c_wchar_p() might not be necessary if `message` is
 577                 # already of type unicode()
 578                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 579         elif 'TERM' in os.environ:
 580             self._write_string('\033]0;%s\007' % message, self._screen_file)
 581
 582     def save_console_title(self):
 583         if not self.params.get('consoletitle', False):
 584             return
 585         if self.params.get('simulate', False):
 586             return
 587         if compat_os_name != 'nt' and 'TERM' in os.environ:
 588             # Save the title on stack
 589             self._write_string('\033[22;0t', self._screen_file)
 590
 591     def restore_console_title(self):
 592         if not self.params.get('consoletitle', False):
 593             return
 594         if self.params.get('simulate', False):
 595             return
 596         if compat_os_name != 'nt' and 'TERM' in os.environ:
 597             # Restore the title from stack
 598             self._write_string('\033[23;0t', self._screen_file)
 599
 600     def __enter__(self):
 601         self.save_console_title()
 602         return self
 603
 604     def __exit__(self, *args):
 605         self.restore_console_title()
 606
 607         if self.params.get('cookiefile') is not None:
 608             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 609
 610     def trouble(self, message=None, tb=None):
 611         """Determine action to take when a download problem appears.
 612
 613         Depending on if the downloader has been configured to ignore
 614         download errors or not, this method may throw an exception or
 615         not when errors are found, after printing the message.
 616
 617         tb, if given, is additional traceback information.
 618         """
 619         if message is not None:
 620             self.to_stderr(message)
 621         if self.params.get('verbose'):
 622             if tb is None:
 623                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 624                     tb = ''
 625                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 626                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 627                     tb += encode_compat_str(traceback.format_exc())
 628                 else:
 629                     tb_data = traceback.format_list(traceback.extract_stack())
 630                     tb = ''.join(tb_data)
 631             self.to_stderr(tb)
 632         if not self.params.get('ignoreerrors', False):
 633             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 634                 exc_info = sys.exc_info()[1].exc_info
 635             else:
 636                 exc_info = sys.exc_info()
 637             raise DownloadError(message, exc_info)
 638         self._download_retcode = 1
 639
 640     def report_warning(self, message):
 641         '''
 642         Print the message to stderr, it will be prefixed with 'WARNING:'
 643         If stderr is a tty file the 'WARNING:' will be colored
 644         '''
 645         if self.params.get('logger') is not None:
 646             self.params['logger'].warning(message)
 647         else:
 648             if self.params.get('no_warnings'):
 649                 return
 650             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 651                 _msg_header = '\033[0;33mWARNING:\033[0m'
 652             else:
 653                 _msg_header = 'WARNING:'
 654             warning_message = '%s %s' % (_msg_header, message)
 655             self.to_stderr(warning_message)
 656
 657     def report_error(self, message, tb=None):
 658         '''
 659         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 660         in red if stderr is a tty file.
 661         '''
 662         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 663             _msg_header = '\033[0;31mERROR:\033[0m'
 664         else:
 665             _msg_header = 'ERROR:'
 666         error_message = '%s %s' % (_msg_header, message)
 667         self.trouble(error_message, tb)
 668
 669     def report_file_already_downloaded(self, file_name):
 670         """Report file has already been fully downloaded."""
 671         try:
 672             self.to_screen('[download] %s has already been downloaded' % file_name)
 673         except UnicodeEncodeError:
 674             self.to_screen('[download] The file has already been downloaded')
 675
 676     def prepare_filename(self, info_dict):
 677         """Generate the output filename."""
 678         try:
 679             template_dict = dict(info_dict)
 680
 681             template_dict['epoch'] = int(time.time())
 682             autonumber_size = self.params.get('autonumber_size')
 683             if autonumber_size is None:
 684                 autonumber_size = 5
 685             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 686             if template_dict.get('resolution') is None:
 687                 if template_dict.get('width') and template_dict.get('height'):
 688                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 689                 elif template_dict.get('height'):
 690                     template_dict['resolution'] = '%sp' % template_dict['height']
 691                 elif template_dict.get('width'):
 692                     template_dict['resolution'] = '%dx?' % template_dict['width']
 693
 694             sanitize = lambda k, v: sanitize_filename(
 695                 compat_str(v),
 696                 restricted=self.params.get('restrictfilenames'),
 697                 is_id=(k == 'id' or k.endswith('_id')))
 698             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 699                                  for k, v in template_dict.items()
 700                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 701             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 702
 703             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 704
 705             # For fields playlist_index and autonumber convert all occurrences
 706             # of %(field)s to %(field)0Nd for backward compatibility
 707             field_size_compat_map = {
 708                 'playlist_index': len(str(template_dict['n_entries'])),
 709                 'autonumber': autonumber_size,
 710             }
 711             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 712             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 713             if mobj:
 714                 outtmpl = re.sub(
 715                     FIELD_SIZE_COMPAT_RE,
 716                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 717                     outtmpl)
 718
 719             # Missing numeric fields used together with integer presentation types
 720             # in format specification will break the argument substitution since
 721             # string 'NA' is returned for missing fields. We will patch output
 722             # template for missing fields to meet string presentation type.
 723             for numeric_field in self._NUMERIC_FIELDS:
 724                 if numeric_field not in template_dict:
 725                     # As of [1] format syntax is:
 726                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 727                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 728                     FORMAT_RE = r'''(?x)
 729                         (?<!%)
 730                         %
 731                         \({0}\)  # mapping key
 732                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 733                         (?:\d+)?  # minimum field width (optional)
 734                         (?:\.\d+)?  # precision (optional)
 735                         [hlL]?  # length modifier (optional)
 736                         [diouxXeEfFgGcrs%]  # conversion type
 737                     '''
 738                     outtmpl = re.sub(
 739                         FORMAT_RE.format(numeric_field),
 740                         r'%({0})s'.format(numeric_field), outtmpl)
 741
 742             # expand_path translates '%%' into '%' and '$$' into '$'
 743             # correspondingly that is not what we want since we need to keep
 744             # '%%' intact for template dict substitution step. Working around
 745             # with boundary-alike separator hack.
 746             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 747             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 748
 749             # outtmpl should be expand_path'ed before template dict substitution
 750             # because meta fields may contain env variables we don't want to
 751             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 752             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 753             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 754
 755             # https://github.com/blackjack4494/youtube-dlc/issues/85
 756             trim_file_name = self.params.get('trim_file_name', False)
 757             if trim_file_name:
 758                 fn_groups = filename.rsplit('.')
 759                 ext = fn_groups[-1]
 760                 sub_ext = ''
 761                 if len(fn_groups) > 2:
 762                     sub_ext = fn_groups[-2]
 763                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 764
 765             # Temporary fix for #4787
 766             # 'Treat' all problem characters by passing filename through preferredencoding
 767             # to workaround encoding issues with subprocess on python2 @ Windows
 768             if sys.version_info < (3, 0) and sys.platform == 'win32':
 769                 filename = encodeFilename(filename, True).decode(preferredencoding())
 770             return sanitize_path(filename)
 771         except ValueError as err:
 772             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 773             return None
 774
 775     def _match_entry(self, info_dict, incomplete):
 776         """ Returns None if the file should be downloaded """
 777
 778         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 779         if 'title' in info_dict:
 780             # This can happen when we're just evaluating the playlist
 781             title = info_dict['title']
 782             matchtitle = self.params.get('matchtitle', False)
 783             if matchtitle:
 784                 if not re.search(matchtitle, title, re.IGNORECASE):
 785                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 786             rejecttitle = self.params.get('rejecttitle', False)
 787             if rejecttitle:
 788                 if re.search(rejecttitle, title, re.IGNORECASE):
 789                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 790         date = info_dict.get('upload_date')
 791         if date is not None:
 792             dateRange = self.params.get('daterange', DateRange())
 793             if date not in dateRange:
 794                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 795         view_count = info_dict.get('view_count')
 796         if view_count is not None:
 797             min_views = self.params.get('min_views')
 798             if min_views is not None and view_count < min_views:
 799                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 800             max_views = self.params.get('max_views')
 801             if max_views is not None and view_count > max_views:
 802                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 803         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 804             return 'Skipping "%s" because it is age restricted' % video_title
 805         if self.in_download_archive(info_dict):
 806             return '%s has already been recorded in archive' % video_title
 807
 808         if not incomplete:
 809             match_filter = self.params.get('match_filter')
 810             if match_filter is not None:
 811                 ret = match_filter(info_dict)
 812                 if ret is not None:
 813                     return ret
 814
 815         return None
 816
 817     @staticmethod
 818     def add_extra_info(info_dict, extra_info):
 819         '''Set the keys from extra_info in info dict if they are missing'''
 820         for key, value in extra_info.items():
 821             info_dict.setdefault(key, value)
 822
 823     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 824                      process=True, force_generic_extractor=False):
 825         '''
 826         Returns a list with a dictionary for each video we find.
 827         If 'download', also downloads the videos.
 828         extra_info is a dict containing the extra values to add to each result
 829         '''
 830
 831         if not ie_key and force_generic_extractor:
 832             ie_key = 'Generic'
 833
 834         if ie_key:
 835             ies = [self.get_info_extractor(ie_key)]
 836         else:
 837             ies = self._ies
 838
 839         for ie in ies:
 840             if not ie.suitable(url):
 841                 continue
 842
 843             ie_key = ie.ie_key()
 844             ie = self.get_info_extractor(ie_key)
 845             if not ie.working():
 846                 self.report_warning('The program functionality for this site has been marked as broken, '
 847                                     'and will probably not work.')
 848
 849             try:
 850                 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
 851             except (AssertionError, IndexError, AttributeError):
 852                 temp_id = None
 853             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 854                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 855                                ie_key, temp_id))
 856                 break
 857
 858             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 859
 860         else:
 861             self.report_error('no suitable InfoExtractor for URL %s' % url)
 862
 863     def __handle_extraction_exceptions(func):
 864         def wrapper(self, *args, **kwargs):
 865             try:
 866                 return func(self, *args, **kwargs)
 867             except GeoRestrictedError as e:
 868                 msg = e.msg
 869                 if e.countries:
 870                     msg += '\nThis video is available in %s.' % ', '.join(
 871                         map(ISO3166Utils.short2full, e.countries))
 872                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 873                 self.report_error(msg)
 874             except ExtractorError as e:  # An error we somewhat expected
 875                 self.report_error(compat_str(e), e.format_traceback())
 876             except MaxDownloadsReached:
 877                 raise
 878             except Exception as e:
 879                 if self.params.get('ignoreerrors', False):
 880                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 881                 else:
 882                     raise
 883         return wrapper
 884
 885     @__handle_extraction_exceptions
 886     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 887         ie_result = ie.extract(url)
 888         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 889             return
 890         if isinstance(ie_result, list):
 891             # Backwards compatibility: old IE result format
 892             ie_result = {
 893                 '_type': 'compat_list',
 894                 'entries': ie_result,
 895             }
 896         if info_dict:
 897             if info_dict.get('id'):
 898                 ie_result['id'] = info_dict['id']
 899             if info_dict.get('title'):
 900                 ie_result['title'] = info_dict['title']
 901         self.add_default_extra_info(ie_result, ie, url)
 902         if process:
 903             return self.process_ie_result(ie_result, download, extra_info)
 904         else:
 905             return ie_result
 906
 907     def add_default_extra_info(self, ie_result, ie, url):
 908         self.add_extra_info(ie_result, {
 909             'extractor': ie.IE_NAME,
 910             'webpage_url': url,
 911             'webpage_url_basename': url_basename(url),
 912             'extractor_key': ie.ie_key(),
 913         })
 914
 915     def process_ie_result(self, ie_result, download=True, extra_info={}):
 916         """
 917         Take the result of the ie(may be modified) and resolve all unresolved
 918         references (URLs, playlist items).
 919
 920         It will also download the videos if 'download'.
 921         Returns the resolved ie_result.
 922         """
 923         result_type = ie_result.get('_type', 'video')
 924
 925         if result_type in ('url', 'url_transparent'):
 926             ie_result['url'] = sanitize_url(ie_result['url'])
 927             extract_flat = self.params.get('extract_flat', False)
 928             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 929                     or extract_flat is True):
 930                 self.__forced_printings(
 931                     ie_result, self.prepare_filename(ie_result),
 932                     incomplete=True)
 933                 return ie_result
 934
 935         if result_type == 'video':
 936             self.add_extra_info(ie_result, extra_info)
 937             return self.process_video_result(ie_result, download=download)
 938         elif result_type == 'url':
 939             # We have to add extra_info to the results because it may be
 940             # contained in a playlist
 941             return self.extract_info(ie_result['url'],
 942                                      download, info_dict=ie_result,
 943                                      ie_key=ie_result.get('ie_key'),
 944                                      extra_info=extra_info)
 945         elif result_type == 'url_transparent':
 946             # Use the information from the embedding page
 947             info = self.extract_info(
 948                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 949                 extra_info=extra_info, download=False, process=False)
 950
 951             # extract_info may return None when ignoreerrors is enabled and
 952             # extraction failed with an error, don't crash and return early
 953             # in this case
 954             if not info:
 955                 return info
 956
 957             force_properties = dict(
 958                 (k, v) for k, v in ie_result.items() if v is not None)
 959             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 960                 if f in force_properties:
 961                     del force_properties[f]
 962             new_result = info.copy()
 963             new_result.update(force_properties)
 964
 965             # Extracted info may not be a video result (i.e.
 966             # info.get('_type', 'video') != video) but rather an url or
 967             # url_transparent. In such cases outer metadata (from ie_result)
 968             # should be propagated to inner one (info). For this to happen
 969             # _type of info should be overridden with url_transparent. This
 970             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 971             if new_result.get('_type') == 'url':
 972                 new_result['_type'] = 'url_transparent'
 973
 974             return self.process_ie_result(
 975                 new_result, download=download, extra_info=extra_info)
 976         elif result_type in ('playlist', 'multi_video'):
 977             # We process each entry in the playlist
 978             playlist = ie_result.get('title') or ie_result.get('id')
 979             self.to_screen('[download] Downloading playlist: %s' % playlist)
 980
 981             playlist_results = []
 982
 983             playliststart = self.params.get('playliststart', 1) - 1
 984             playlistend = self.params.get('playlistend')
 985             # For backwards compatibility, interpret -1 as whole list
 986             if playlistend == -1:
 987                 playlistend = None
 988
 989             playlistitems_str = self.params.get('playlist_items')
 990             playlistitems = None
 991             if playlistitems_str is not None:
 992                 def iter_playlistitems(format):
 993                     for string_segment in format.split(','):
 994                         if '-' in string_segment:
 995                             start, end = string_segment.split('-')
 996                             for item in range(int(start), int(end) + 1):
 997                                 yield int(item)
 998                         else:
 999                             yield int(string_segment)
1000                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1001
1002             ie_entries = ie_result['entries']
1003
1004             def make_playlistitems_entries(list_ie_entries):
1005                 num_entries = len(list_ie_entries)
1006                 return [
1007                     list_ie_entries[i - 1] for i in playlistitems
1008                     if -num_entries <= i - 1 < num_entries]
1009
1010             def report_download(num_entries):
1011                 self.to_screen(
1012                     '[%s] playlist %s: Downloading %d videos' %
1013                     (ie_result['extractor'], playlist, num_entries))
1014
1015             if isinstance(ie_entries, list):
1016                 n_all_entries = len(ie_entries)
1017                 if playlistitems:
1018                     entries = make_playlistitems_entries(ie_entries)
1019                 else:
1020                     entries = ie_entries[playliststart:playlistend]
1021                 n_entries = len(entries)
1022                 self.to_screen(
1023                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1024                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
1025             elif isinstance(ie_entries, PagedList):
1026                 if playlistitems:
1027                     entries = []
1028                     for item in playlistitems:
1029                         entries.extend(ie_entries.getslice(
1030                             item - 1, item
1031                         ))
1032                 else:
1033                     entries = ie_entries.getslice(
1034                         playliststart, playlistend)
1035                 n_entries = len(entries)
1036                 report_download(n_entries)
1037             else:  # iterable
1038                 if playlistitems:
1039                     entries = make_playlistitems_entries(list(itertools.islice(
1040                         ie_entries, 0, max(playlistitems))))
1041                 else:
1042                     entries = list(itertools.islice(
1043                         ie_entries, playliststart, playlistend))
1044                 n_entries = len(entries)
1045                 report_download(n_entries)
1046
1047             if self.params.get('playlistreverse', False):
1048                 entries = entries[::-1]
1049
1050             if self.params.get('playlistrandom', False):
1051                 random.shuffle(entries)
1052
1053             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1054
1055             for i, entry in enumerate(entries, 1):
1056                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1057                 # This __x_forwarded_for_ip thing is a bit ugly but requires
1058                 # minimal changes
1059                 if x_forwarded_for:
1060                     entry['__x_forwarded_for_ip'] = x_forwarded_for
1061                 extra = {
1062                     'n_entries': n_entries,
1063                     'playlist': playlist,
1064                     'playlist_id': ie_result.get('id'),
1065                     'playlist_title': ie_result.get('title'),
1066                     'playlist_uploader': ie_result.get('uploader'),
1067                     'playlist_uploader_id': ie_result.get('uploader_id'),
1068                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1069                     'extractor': ie_result['extractor'],
1070                     'webpage_url': ie_result['webpage_url'],
1071                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1072                     'extractor_key': ie_result['extractor_key'],
1073                 }
1074
1075                 reason = self._match_entry(entry, incomplete=True)
1076                 if reason is not None:
1077                     if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1078                         print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1079                         break
1080                     else:
1081                         self.to_screen('[download] ' + reason)
1082                         continue
1083
1084                 entry_result = self.__process_iterable_entry(entry, download, extra)
1085                 # TODO: skip failed (empty) entries?
1086                 playlist_results.append(entry_result)
1087             ie_result['entries'] = playlist_results
1088             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1089             return ie_result
1090         elif result_type == 'compat_list':
1091             self.report_warning(
1092                 'Extractor %s returned a compat_list result. '
1093                 'It needs to be updated.' % ie_result.get('extractor'))
1094
1095             def _fixup(r):
1096                 self.add_extra_info(
1097                     r,
1098                     {
1099                         'extractor': ie_result['extractor'],
1100                         'webpage_url': ie_result['webpage_url'],
1101                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1102                         'extractor_key': ie_result['extractor_key'],
1103                     }
1104                 )
1105                 return r
1106             ie_result['entries'] = [
1107                 self.process_ie_result(_fixup(r), download, extra_info)
1108                 for r in ie_result['entries']
1109             ]
1110             return ie_result
1111         else:
1112             raise Exception('Invalid result type: %s' % result_type)
1113
1114     @__handle_extraction_exceptions
1115     def __process_iterable_entry(self, entry, download, extra_info):
1116         return self.process_ie_result(
1117             entry, download=download, extra_info=extra_info)
1118
1119     def _build_format_filter(self, filter_spec):
1120         " Returns a function to filter the formats according to the filter_spec "
1121
1122         OPERATORS = {
1123             '<': operator.lt,
1124             '<=': operator.le,
1125             '>': operator.gt,
1126             '>=': operator.ge,
1127             '=': operator.eq,
1128             '!=': operator.ne,
1129         }
1130         operator_rex = re.compile(r'''(?x)\s*
1131             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1132             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1133             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1134             $
1135             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1136         m = operator_rex.search(filter_spec)
1137         if m:
1138             try:
1139                 comparison_value = int(m.group('value'))
1140             except ValueError:
1141                 comparison_value = parse_filesize(m.group('value'))
1142                 if comparison_value is None:
1143                     comparison_value = parse_filesize(m.group('value') + 'B')
1144                 if comparison_value is None:
1145                     raise ValueError(
1146                         'Invalid value %r in format specification %r' % (
1147                             m.group('value'), filter_spec))
1148             op = OPERATORS[m.group('op')]
1149
1150         if not m:
1151             STR_OPERATORS = {
1152                 '=': operator.eq,
1153                 '^=': lambda attr, value: attr.startswith(value),
1154                 '$=': lambda attr, value: attr.endswith(value),
1155                 '*=': lambda attr, value: value in attr,
1156             }
1157             str_operator_rex = re.compile(r'''(?x)
1158                 \s*(?P<key>[a-zA-Z0-9._-]+)
1159                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1160                 \s*(?P<value>[a-zA-Z0-9._-]+)
1161                 \s*$
1162                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1163             m = str_operator_rex.search(filter_spec)
1164             if m:
1165                 comparison_value = m.group('value')
1166                 str_op = STR_OPERATORS[m.group('op')]
1167                 if m.group('negation'):
1168                     op = lambda attr, value: not str_op(attr, value)
1169                 else:
1170                     op = str_op
1171
1172         if not m:
1173             raise ValueError('Invalid filter specification %r' % filter_spec)
1174
1175         def _filter(f):
1176             actual_value = f.get(m.group('key'))
1177             if actual_value is None:
1178                 return m.group('none_inclusive')
1179             return op(actual_value, comparison_value)
1180         return _filter
1181
1182     def _default_format_spec(self, info_dict, download=True):
1183
1184         def can_merge():
1185             merger = FFmpegMergerPP(self)
1186             return merger.available and merger.can_merge()
1187
1188         def prefer_best():
1189             if self.params.get('simulate', False):
1190                 return False
1191             if not download:
1192                 return False
1193             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1194                 return True
1195             if info_dict.get('is_live'):
1196                 return True
1197             if not can_merge():
1198                 return True
1199             return False
1200
1201         req_format_list = ['bestvideo+bestaudio', 'best']
1202         if prefer_best():
1203             req_format_list.reverse()
1204         return '/'.join(req_format_list)
1205
1206     def build_format_selector(self, format_spec):
1207         def syntax_error(note, start):
1208             message = (
1209                 'Invalid format specification: '
1210                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1211             return SyntaxError(message)
1212
1213         PICKFIRST = 'PICKFIRST'
1214         MERGE = 'MERGE'
1215         SINGLE = 'SINGLE'
1216         GROUP = 'GROUP'
1217         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1218
1219         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),
1220                                   'video': self.params.get('allow_multiple_video_streams', True)}
1221
1222         def _parse_filter(tokens):
1223             filter_parts = []
1224             for type, string, start, _, _ in tokens:
1225                 if type == tokenize.OP and string == ']':
1226                     return ''.join(filter_parts)
1227                 else:
1228                     filter_parts.append(string)
1229
1230         def _remove_unused_ops(tokens):
1231             # Remove operators that we don't use and join them with the surrounding strings
1232             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1233             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1234             last_string, last_start, last_end, last_line = None, None, None, None
1235             for type, string, start, end, line in tokens:
1236                 if type == tokenize.OP and string == '[':
1237                     if last_string:
1238                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1239                         last_string = None
1240                     yield type, string, start, end, line
1241                     # everything inside brackets will be handled by _parse_filter
1242                     for type, string, start, end, line in tokens:
1243                         yield type, string, start, end, line
1244                         if type == tokenize.OP and string == ']':
1245                             break
1246                 elif type == tokenize.OP and string in ALLOWED_OPS:
1247                     if last_string:
1248                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1249                         last_string = None
1250                     yield type, string, start, end, line
1251                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1252                     if not last_string:
1253                         last_string = string
1254                         last_start = start
1255                         last_end = end
1256                     else:
1257                         last_string += string
1258             if last_string:
1259                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1260
1261         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1262             selectors = []
1263             current_selector = None
1264             for type, string, start, _, _ in tokens:
1265                 # ENCODING is only defined in python 3.x
1266                 if type == getattr(tokenize, 'ENCODING', None):
1267                     continue
1268                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1269                     current_selector = FormatSelector(SINGLE, string, [])
1270                 elif type == tokenize.OP:
1271                     if string == ')':
1272                         if not inside_group:
1273                             # ')' will be handled by the parentheses group
1274                             tokens.restore_last_token()
1275                         break
1276                     elif inside_merge and string in ['/', ',']:
1277                         tokens.restore_last_token()
1278                         break
1279                     elif inside_choice and string == ',':
1280                         tokens.restore_last_token()
1281                         break
1282                     elif string == ',':
1283                         if not current_selector:
1284                             raise syntax_error('"," must follow a format selector', start)
1285                         selectors.append(current_selector)
1286                         current_selector = None
1287                     elif string == '/':
1288                         if not current_selector:
1289                             raise syntax_error('"/" must follow a format selector', start)
1290                         first_choice = current_selector
1291                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1292                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1293                     elif string == '[':
1294                         if not current_selector:
1295                             current_selector = FormatSelector(SINGLE, 'best', [])
1296                         format_filter = _parse_filter(tokens)
1297                         current_selector.filters.append(format_filter)
1298                     elif string == '(':
1299                         if current_selector:
1300                             raise syntax_error('Unexpected "("', start)
1301                         group = _parse_format_selection(tokens, inside_group=True)
1302                         current_selector = FormatSelector(GROUP, group, [])
1303                     elif string == '+':
1304                         if not current_selector:
1305                             raise syntax_error('Unexpected "+"', start)
1306                         selector_1 = current_selector
1307                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1308                         if not selector_2:
1309                             raise syntax_error('Expected a selector', start)
1310                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1311                     else:
1312                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1313                 elif type == tokenize.ENDMARKER:
1314                     break
1315             if current_selector:
1316                 selectors.append(current_selector)
1317             return selectors
1318
1319         def _build_selector_function(selector):
1320             if isinstance(selector, list):  # ,
1321                 fs = [_build_selector_function(s) for s in selector]
1322
1323                 def selector_function(ctx):
1324                     for f in fs:
1325                         for format in f(ctx):
1326                             yield format
1327                 return selector_function
1328
1329             elif selector.type == GROUP:  # ()
1330                 selector_function = _build_selector_function(selector.selector)
1331
1332             elif selector.type == PICKFIRST:  # /
1333                 fs = [_build_selector_function(s) for s in selector.selector]
1334
1335                 def selector_function(ctx):
1336                     for f in fs:
1337                         picked_formats = list(f(ctx))
1338                         if picked_formats:
1339                             return picked_formats
1340                     return []
1341
1342             elif selector.type == SINGLE:  # atom
1343                 format_spec = selector.selector if selector.selector is not None else 'best'
1344
1345                 if format_spec == 'all':
1346                     def selector_function(ctx):
1347                         formats = list(ctx['formats'])
1348                         if formats:
1349                             for f in formats:
1350                                 yield f
1351
1352                 else:
1353                     format_fallback = False
1354                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1355                     if format_spec_obj is not None:
1356                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1357                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1358                         not_format_type = 'v' if format_type == 'a' else 'a'
1359                         format_modified = format_spec_obj.group(3) is not None
1360
1361                         format_fallback = not format_type and not format_modified  # for b, w
1362                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1363                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1364                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1365                                     if format_type  # bv, ba, wv, wa
1366                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1367                                     if not format_modified  # b, w
1368                                     else None)  # b*, w*
1369                     else:
1370                         format_idx = -1
1371                         filter_f = ((lambda f: f.get('ext') == format_spec)
1372                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1373                                     else (lambda f: f.get('format_id') == format_spec))  # id
1374
1375                     def selector_function(ctx):
1376                         formats = list(ctx['formats'])
1377                         if not formats:
1378                             return
1379                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1380                         if matches:
1381                             yield matches[format_idx]
1382                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1383                             # for extractors with incomplete formats (audio only (soundcloud)
1384                             # or video only (imgur)) best/worst will fallback to
1385                             # best/worst {video,audio}-only format
1386                             yield formats[format_idx]
1387
1388             elif selector.type == MERGE:        # +
1389                 def _merge(formats_pair):
1390                     format_1, format_2 = formats_pair
1391
1392                     formats_info = []
1393                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1394                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1395
1396                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1397                         get_no_more = {"video": False, "audio": False}
1398                         for (i, fmt_info) in enumerate(formats_info):
1399                             for aud_vid in ["audio", "video"]:
1400                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1401                                     if get_no_more[aud_vid]:
1402                                         formats_info.pop(i)
1403                                     get_no_more[aud_vid] = True
1404
1405                     if len(formats_info) == 1:
1406                         return formats_info[0]
1407
1408                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1409                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1410
1411                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1412                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1413
1414                     output_ext = self.params.get('merge_output_format')
1415                     if not output_ext:
1416                         if the_only_video:
1417                             output_ext = the_only_video['ext']
1418                         elif the_only_audio and not video_fmts:
1419                             output_ext = the_only_audio['ext']
1420                         else:
1421                             output_ext = 'mkv'
1422
1423                     new_dict = {
1424                         'requested_formats': formats_info,
1425                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1426                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1427                         'ext': output_ext,
1428                     }
1429
1430                     if the_only_video:
1431                         new_dict.update({
1432                             'width': the_only_video.get('width'),
1433                             'height': the_only_video.get('height'),
1434                             'resolution': the_only_video.get('resolution'),
1435                             'fps': the_only_video.get('fps'),
1436                             'vcodec': the_only_video.get('vcodec'),
1437                             'vbr': the_only_video.get('vbr'),
1438                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1439                         })
1440
1441                     if the_only_audio:
1442                         new_dict.update({
1443                             'acodec': the_only_audio.get('acodec'),
1444                             'abr': the_only_audio.get('abr'),
1445                         })
1446
1447                     return new_dict
1448
1449                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1450
1451                 def selector_function(ctx):
1452                     for pair in itertools.product(
1453                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1454                         yield _merge(pair)
1455
1456             filters = [self._build_format_filter(f) for f in selector.filters]
1457
1458             def final_selector(ctx):
1459                 ctx_copy = copy.deepcopy(ctx)
1460                 for _filter in filters:
1461                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1462                 return selector_function(ctx_copy)
1463             return final_selector
1464
1465         stream = io.BytesIO(format_spec.encode('utf-8'))
1466         try:
1467             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1468         except tokenize.TokenError:
1469             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1470
1471         class TokenIterator(object):
1472             def __init__(self, tokens):
1473                 self.tokens = tokens
1474                 self.counter = 0
1475
1476             def __iter__(self):
1477                 return self
1478
1479             def __next__(self):
1480                 if self.counter >= len(self.tokens):
1481                     raise StopIteration()
1482                 value = self.tokens[self.counter]
1483                 self.counter += 1
1484                 return value
1485
1486             next = __next__
1487
1488             def restore_last_token(self):
1489                 self.counter -= 1
1490
1491         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1492         return _build_selector_function(parsed_selector)
1493
1494     def _calc_headers(self, info_dict):
1495         res = std_headers.copy()
1496
1497         add_headers = info_dict.get('http_headers')
1498         if add_headers:
1499             res.update(add_headers)
1500
1501         cookies = self._calc_cookies(info_dict)
1502         if cookies:
1503             res['Cookie'] = cookies
1504
1505         if 'X-Forwarded-For' not in res:
1506             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1507             if x_forwarded_for_ip:
1508                 res['X-Forwarded-For'] = x_forwarded_for_ip
1509
1510         return res
1511
1512     def _calc_cookies(self, info_dict):
1513         pr = sanitized_Request(info_dict['url'])
1514         self.cookiejar.add_cookie_header(pr)
1515         return pr.get_header('Cookie')
1516
1517     def process_video_result(self, info_dict, download=True):
1518         assert info_dict.get('_type', 'video') == 'video'
1519
1520         if 'id' not in info_dict:
1521             raise ExtractorError('Missing "id" field in extractor result')
1522         if 'title' not in info_dict:
1523             raise ExtractorError('Missing "title" field in extractor result')
1524
1525         def report_force_conversion(field, field_not, conversion):
1526             self.report_warning(
1527                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1528                 % (field, field_not, conversion))
1529
1530         def sanitize_string_field(info, string_field):
1531             field = info.get(string_field)
1532             if field is None or isinstance(field, compat_str):
1533                 return
1534             report_force_conversion(string_field, 'a string', 'string')
1535             info[string_field] = compat_str(field)
1536
1537         def sanitize_numeric_fields(info):
1538             for numeric_field in self._NUMERIC_FIELDS:
1539                 field = info.get(numeric_field)
1540                 if field is None or isinstance(field, compat_numeric_types):
1541                     continue
1542                 report_force_conversion(numeric_field, 'numeric', 'int')
1543                 info[numeric_field] = int_or_none(field)
1544
1545         sanitize_string_field(info_dict, 'id')
1546         sanitize_numeric_fields(info_dict)
1547
1548         if 'playlist' not in info_dict:
1549             # It isn't part of a playlist
1550             info_dict['playlist'] = None
1551             info_dict['playlist_index'] = None
1552
1553         thumbnails = info_dict.get('thumbnails')
1554         if thumbnails is None:
1555             thumbnail = info_dict.get('thumbnail')
1556             if thumbnail:
1557                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1558         if thumbnails:
1559             thumbnails.sort(key=lambda t: (
1560                 t.get('preference') if t.get('preference') is not None else -1,
1561                 t.get('width') if t.get('width') is not None else -1,
1562                 t.get('height') if t.get('height') is not None else -1,
1563                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1564             for i, t in enumerate(thumbnails):
1565                 t['url'] = sanitize_url(t['url'])
1566                 if t.get('width') and t.get('height'):
1567                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1568                 if t.get('id') is None:
1569                     t['id'] = '%d' % i
1570
1571         if self.params.get('list_thumbnails'):
1572             self.list_thumbnails(info_dict)
1573             return
1574
1575         thumbnail = info_dict.get('thumbnail')
1576         if thumbnail:
1577             info_dict['thumbnail'] = sanitize_url(thumbnail)
1578         elif thumbnails:
1579             info_dict['thumbnail'] = thumbnails[-1]['url']
1580
1581         if 'display_id' not in info_dict and 'id' in info_dict:
1582             info_dict['display_id'] = info_dict['id']
1583
1584         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1585             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1586             # see http://bugs.python.org/issue1646728)
1587             try:
1588                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1589                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1590             except (ValueError, OverflowError, OSError):
1591                 pass
1592
1593         # Auto generate title fields corresponding to the *_number fields when missing
1594         # in order to always have clean titles. This is very common for TV series.
1595         for field in ('chapter', 'season', 'episode'):
1596             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1597                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1598
1599         for cc_kind in ('subtitles', 'automatic_captions'):
1600             cc = info_dict.get(cc_kind)
1601             if cc:
1602                 for _, subtitle in cc.items():
1603                     for subtitle_format in subtitle:
1604                         if subtitle_format.get('url'):
1605                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1606                         if subtitle_format.get('ext') is None:
1607                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1608
1609         automatic_captions = info_dict.get('automatic_captions')
1610         subtitles = info_dict.get('subtitles')
1611
1612         if self.params.get('listsubtitles', False):
1613             if 'automatic_captions' in info_dict:
1614                 self.list_subtitles(
1615                     info_dict['id'], automatic_captions, 'automatic captions')
1616             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1617             return
1618
1619         info_dict['requested_subtitles'] = self.process_subtitles(
1620             info_dict['id'], subtitles, automatic_captions)
1621
1622         # We now pick which formats have to be downloaded
1623         if info_dict.get('formats') is None:
1624             # There's only one format available
1625             formats = [info_dict]
1626         else:
1627             formats = info_dict['formats']
1628
1629         if not formats:
1630             raise ExtractorError('No video formats found!')
1631
1632         def is_wellformed(f):
1633             url = f.get('url')
1634             if not url:
1635                 self.report_warning(
1636                     '"url" field is missing or empty - skipping format, '
1637                     'there is an error in extractor')
1638                 return False
1639             if isinstance(url, bytes):
1640                 sanitize_string_field(f, 'url')
1641             return True
1642
1643         # Filter out malformed formats for better extraction robustness
1644         formats = list(filter(is_wellformed, formats))
1645
1646         formats_dict = {}
1647
1648         # We check that all the formats have the format and format_id fields
1649         for i, format in enumerate(formats):
1650             sanitize_string_field(format, 'format_id')
1651             sanitize_numeric_fields(format)
1652             format['url'] = sanitize_url(format['url'])
1653             if not format.get('format_id'):
1654                 format['format_id'] = compat_str(i)
1655             else:
1656                 # Sanitize format_id from characters used in format selector expression
1657                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1658             format_id = format['format_id']
1659             if format_id not in formats_dict:
1660                 formats_dict[format_id] = []
1661             formats_dict[format_id].append(format)
1662
1663         # Make sure all formats have unique format_id
1664         for format_id, ambiguous_formats in formats_dict.items():
1665             if len(ambiguous_formats) > 1:
1666                 for i, format in enumerate(ambiguous_formats):
1667                     format['format_id'] = '%s-%d' % (format_id, i)
1668
1669         for i, format in enumerate(formats):
1670             if format.get('format') is None:
1671                 format['format'] = '{id} - {res}{note}'.format(
1672                     id=format['format_id'],
1673                     res=self.format_resolution(format),
1674                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1675                 )
1676             # Automatically determine file extension if missing
1677             if format.get('ext') is None:
1678                 format['ext'] = determine_ext(format['url']).lower()
1679             # Automatically determine protocol if missing (useful for format
1680             # selection purposes)
1681             if format.get('protocol') is None:
1682                 format['protocol'] = determine_protocol(format)
1683             # Add HTTP headers, so that external programs can use them from the
1684             # json output
1685             full_format_info = info_dict.copy()
1686             full_format_info.update(format)
1687             format['http_headers'] = self._calc_headers(full_format_info)
1688         # Remove private housekeeping stuff
1689         if '__x_forwarded_for_ip' in info_dict:
1690             del info_dict['__x_forwarded_for_ip']
1691
1692         # TODO Central sorting goes here
1693
1694         if formats[0] is not info_dict:
1695             # only set the 'formats' fields if the original info_dict list them
1696             # otherwise we end up with a circular reference, the first (and unique)
1697             # element in the 'formats' field in info_dict is info_dict itself,
1698             # which can't be exported to json
1699             info_dict['formats'] = formats
1700         if self.params.get('listformats'):
1701             self.list_formats(info_dict)
1702             return
1703
1704         req_format = self.params.get('format')
1705         if req_format is None:
1706             req_format = self._default_format_spec(info_dict, download=download)
1707             if self.params.get('verbose'):
1708                 self._write_string('[debug] Default format spec: %s\n' % req_format)
1709
1710         format_selector = self.build_format_selector(req_format)
1711
1712         # While in format selection we may need to have an access to the original
1713         # format set in order to calculate some metrics or do some processing.
1714         # For now we need to be able to guess whether original formats provided
1715         # by extractor are incomplete or not (i.e. whether extractor provides only
1716         # video-only or audio-only formats) for proper formats selection for
1717         # extractors with such incomplete formats (see
1718         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1719         # Since formats may be filtered during format selection and may not match
1720         # the original formats the results may be incorrect. Thus original formats
1721         # or pre-calculated metrics should be passed to format selection routines
1722         # as well.
1723         # We will pass a context object containing all necessary additional data
1724         # instead of just formats.
1725         # This fixes incorrect format selection issue (see
1726         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1727         incomplete_formats = (
1728             # All formats are video-only or
1729             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1730             # all formats are audio-only
1731             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1732
1733         ctx = {
1734             'formats': formats,
1735             'incomplete_formats': incomplete_formats,
1736         }
1737
1738         formats_to_download = list(format_selector(ctx))
1739         if not formats_to_download:
1740             raise ExtractorError('requested format not available',
1741                                  expected=True)
1742
1743         if download:
1744             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1745             if len(formats_to_download) > 1:
1746                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1747             for format in formats_to_download:
1748                 new_info = dict(info_dict)
1749                 new_info.update(format)
1750                 self.process_info(new_info)
1751         # We update the info dict with the best quality format (backwards compatibility)
1752         info_dict.update(formats_to_download[-1])
1753         return info_dict
1754
1755     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1756         """Select the requested subtitles and their format"""
1757         available_subs = {}
1758         if normal_subtitles and self.params.get('writesubtitles'):
1759             available_subs.update(normal_subtitles)
1760         if automatic_captions and self.params.get('writeautomaticsub'):
1761             for lang, cap_info in automatic_captions.items():
1762                 if lang not in available_subs:
1763                     available_subs[lang] = cap_info
1764
1765         if (not self.params.get('writesubtitles') and not
1766                 self.params.get('writeautomaticsub') or not
1767                 available_subs):
1768             return None
1769
1770         if self.params.get('allsubtitles', False):
1771             requested_langs = available_subs.keys()
1772         else:
1773             if self.params.get('subtitleslangs', False):
1774                 requested_langs = self.params.get('subtitleslangs')
1775             elif 'en' in available_subs:
1776                 requested_langs = ['en']
1777             else:
1778                 requested_langs = [list(available_subs.keys())[0]]
1779
1780         formats_query = self.params.get('subtitlesformat', 'best')
1781         formats_preference = formats_query.split('/') if formats_query else []
1782         subs = {}
1783         for lang in requested_langs:
1784             formats = available_subs.get(lang)
1785             if formats is None:
1786                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1787                 continue
1788             for ext in formats_preference:
1789                 if ext == 'best':
1790                     f = formats[-1]
1791                     break
1792                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1793                 if matches:
1794                     f = matches[-1]
1795                     break
1796             else:
1797                 f = formats[-1]
1798                 self.report_warning(
1799                     'No subtitle format found matching "%s" for language %s, '
1800                     'using %s' % (formats_query, lang, f['ext']))
1801             subs[lang] = f
1802         return subs
1803
1804     def __forced_printings(self, info_dict, filename, incomplete):
1805         def print_mandatory(field):
1806             if (self.params.get('force%s' % field, False)
1807                     and (not incomplete or info_dict.get(field) is not None)):
1808                 self.to_stdout(info_dict[field])
1809
1810         def print_optional(field):
1811             if (self.params.get('force%s' % field, False)
1812                     and info_dict.get(field) is not None):
1813                 self.to_stdout(info_dict[field])
1814
1815         print_mandatory('title')
1816         print_mandatory('id')
1817         if self.params.get('forceurl', False) and not incomplete:
1818             if info_dict.get('requested_formats') is not None:
1819                 for f in info_dict['requested_formats']:
1820                     self.to_stdout(f['url'] + f.get('play_path', ''))
1821             else:
1822                 # For RTMP URLs, also include the playpath
1823                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1824         print_optional('thumbnail')
1825         print_optional('description')
1826         if self.params.get('forcefilename', False) and filename is not None:
1827             self.to_stdout(filename)
1828         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1829             self.to_stdout(formatSeconds(info_dict['duration']))
1830         print_mandatory('format')
1831         if self.params.get('forcejson', False):
1832             self.to_stdout(json.dumps(info_dict))
1833
1834     def process_info(self, info_dict):
1835         """Process a single resolved IE result."""
1836
1837         assert info_dict.get('_type', 'video') == 'video'
1838
1839         max_downloads = self.params.get('max_downloads')
1840         if max_downloads is not None:
1841             if self._num_downloads >= int(max_downloads):
1842                 raise MaxDownloadsReached()
1843
1844         # TODO: backward compatibility, to be removed
1845         info_dict['fulltitle'] = info_dict['title']
1846
1847         if 'format' not in info_dict:
1848             info_dict['format'] = info_dict['ext']
1849
1850         reason = self._match_entry(info_dict, incomplete=False)
1851         if reason is not None:
1852             self.to_screen('[download] ' + reason)
1853             return
1854
1855         self._num_downloads += 1
1856
1857         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1858
1859         # Forced printings
1860         self.__forced_printings(info_dict, filename, incomplete=False)
1861
1862         if self.params.get('simulate', False):
1863             if self.params.get('force_write_download_archive', False):
1864                 self.record_download_archive(info_dict)
1865
1866             # Do nothing else if in simulate mode
1867             return
1868
1869         if filename is None:
1870             return
1871
1872         def ensure_dir_exists(path):
1873             try:
1874                 dn = os.path.dirname(path)
1875                 if dn and not os.path.exists(dn):
1876                     os.makedirs(dn)
1877                 return True
1878             except (OSError, IOError) as err:
1879                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1880                 return False
1881
1882         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1883             return
1884
1885         if self.params.get('writedescription', False):
1886             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1887             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1888                 self.to_screen('[info] Video description is already present')
1889             elif info_dict.get('description') is None:
1890                 self.report_warning('There\'s no description to write.')
1891             else:
1892                 try:
1893                     self.to_screen('[info] Writing video description to: ' + descfn)
1894                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1895                         descfile.write(info_dict['description'])
1896                 except (OSError, IOError):
1897                     self.report_error('Cannot write description file ' + descfn)
1898                     return
1899
1900         if self.params.get('writeannotations', False):
1901             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1902             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1903                 self.to_screen('[info] Video annotations are already present')
1904             elif not info_dict.get('annotations'):
1905                 self.report_warning('There are no annotations to write.')
1906             else:
1907                 try:
1908                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1909                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1910                         annofile.write(info_dict['annotations'])
1911                 except (KeyError, TypeError):
1912                     self.report_warning('There are no annotations to write.')
1913                 except (OSError, IOError):
1914                     self.report_error('Cannot write annotations file: ' + annofn)
1915                     return
1916
1917         def dl(name, info, subtitle=False):
1918             fd = get_suitable_downloader(info, self.params)(self, self.params)
1919             for ph in self._progress_hooks:
1920                 fd.add_progress_hook(ph)
1921             if self.params.get('verbose'):
1922                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1923             return fd.download(name, info, subtitle)
1924
1925         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1926                                        self.params.get('writeautomaticsub')])
1927
1928         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1929             # subtitles download errors are already managed as troubles in relevant IE
1930             # that way it will silently go on when used with unsupporting IE
1931             subtitles = info_dict['requested_subtitles']
1932             # ie = self.get_info_extractor(info_dict['extractor_key'])
1933             for sub_lang, sub_info in subtitles.items():
1934                 sub_format = sub_info['ext']
1935                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1936                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1937                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1938                 else:
1939                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1940                     if sub_info.get('data') is not None:
1941                         try:
1942                             # Use newline='' to prevent conversion of newline characters
1943                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1944                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1945                                 subfile.write(sub_info['data'])
1946                         except (OSError, IOError):
1947                             self.report_error('Cannot write subtitles file ' + sub_filename)
1948                             return
1949                     else:
1950                         try:
1951                             dl(sub_filename, sub_info, subtitle=True)
1952                             '''
1953                             if self.params.get('sleep_interval_subtitles', False):
1954                                 dl(sub_filename, sub_info)
1955                             else:
1956                                 sub_data = ie._request_webpage(
1957                                     sub_info['url'], info_dict['id'], note=False).read()
1958                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1959                                     subfile.write(sub_data)
1960                             '''
1961                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1962                             self.report_warning('Unable to download subtitle for "%s": %s' %
1963                                                 (sub_lang, error_to_compat_str(err)))
1964                             continue
1965
1966         if self.params.get('skip_download', False):
1967             if self.params.get('convertsubtitles', False):
1968                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1969                 filename_real_ext = os.path.splitext(filename)[1][1:]
1970                 filename_wo_ext = (
1971                     os.path.splitext(filename)[0]
1972                     if filename_real_ext == info_dict['ext']
1973                     else filename)
1974                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1975                 if subconv.available:
1976                     info_dict.setdefault('__postprocessors', [])
1977                     # info_dict['__postprocessors'].append(subconv)
1978                 if os.path.exists(encodeFilename(afilename)):
1979                     self.to_screen(
1980                         '[download] %s has already been downloaded and '
1981                         'converted' % afilename)
1982                 else:
1983                     try:
1984                         self.post_process(filename, info_dict)
1985                     except (PostProcessingError) as err:
1986                         self.report_error('postprocessing: %s' % str(err))
1987                         return
1988
1989         if self.params.get('writeinfojson', False):
1990             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1991             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1992                 self.to_screen('[info] Video description metadata is already present')
1993             else:
1994                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1995                 try:
1996                     write_json_file(self.filter_requested_info(info_dict), infofn)
1997                 except (OSError, IOError):
1998                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1999                     return
2000
2001         self._write_thumbnails(info_dict, filename)
2002
2003         # Write internet shortcut files
2004         url_link = webloc_link = desktop_link = False
2005         if self.params.get('writelink', False):
2006             if sys.platform == "darwin":  # macOS.
2007                 webloc_link = True
2008             elif sys.platform.startswith("linux"):
2009                 desktop_link = True
2010             else:  # if sys.platform in ['win32', 'cygwin']:
2011                 url_link = True
2012         if self.params.get('writeurllink', False):
2013             url_link = True
2014         if self.params.get('writewebloclink', False):
2015             webloc_link = True
2016         if self.params.get('writedesktoplink', False):
2017             desktop_link = True
2018
2019         if url_link or webloc_link or desktop_link:
2020             if 'webpage_url' not in info_dict:
2021                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2022                 return
2023             ascii_url = iri_to_uri(info_dict['webpage_url'])
2024
2025         def _write_link_file(extension, template, newline, embed_filename):
2026             linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2027             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2028                 self.to_screen('[info] Internet shortcut is already present')
2029             else:
2030                 try:
2031                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2032                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2033                         template_vars = {'url': ascii_url}
2034                         if embed_filename:
2035                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2036                         linkfile.write(template % template_vars)
2037                 except (OSError, IOError):
2038                     self.report_error('Cannot write internet shortcut ' + linkfn)
2039                     return False
2040             return True
2041
2042         if url_link:
2043             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2044                 return
2045         if webloc_link:
2046             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2047                 return
2048         if desktop_link:
2049             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2050                 return
2051
2052         # Download
2053         must_record_download_archive = False
2054         if not self.params.get('skip_download', False):
2055             try:
2056                 if info_dict.get('requested_formats') is not None:
2057                     downloaded = []
2058                     success = True
2059                     merger = FFmpegMergerPP(self)
2060                     if not merger.available:
2061                         postprocessors = []
2062                         self.report_warning('You have requested multiple '
2063                                             'formats but ffmpeg or avconv are not installed.'
2064                                             ' The formats won\'t be merged.')
2065                     else:
2066                         postprocessors = [merger]
2067
2068                     def compatible_formats(formats):
2069                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2070                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2071                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2072                         if len(video_formats) > 2 or len(audio_formats) > 2:
2073                             return False
2074
2075                         # Check extension
2076                         exts = set(format.get('ext') for format in formats)
2077                         COMPATIBLE_EXTS = (
2078                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2079                             set(('webm',)),
2080                         )
2081                         for ext_sets in COMPATIBLE_EXTS:
2082                             if ext_sets.issuperset(exts):
2083                                 return True
2084                         # TODO: Check acodec/vcodec
2085                         return False
2086
2087                     filename_real_ext = os.path.splitext(filename)[1][1:]
2088                     filename_wo_ext = (
2089                         os.path.splitext(filename)[0]
2090                         if filename_real_ext == info_dict['ext']
2091                         else filename)
2092                     requested_formats = info_dict['requested_formats']
2093                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2094                         info_dict['ext'] = 'mkv'
2095                         self.report_warning(
2096                             'Requested formats are incompatible for merge and will be merged into mkv.')
2097                     # Ensure filename always has a correct extension for successful merge
2098                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2099                     if os.path.exists(encodeFilename(filename)):
2100                         self.to_screen(
2101                             '[download] %s has already been downloaded and '
2102                             'merged' % filename)
2103                     else:
2104                         for f in requested_formats:
2105                             new_info = dict(info_dict)
2106                             new_info.update(f)
2107                             fname = prepend_extension(
2108                                 self.prepare_filename(new_info),
2109                                 'f%s' % f['format_id'], new_info['ext'])
2110                             if not ensure_dir_exists(fname):
2111                                 return
2112                             downloaded.append(fname)
2113                             partial_success, real_download = dl(fname, new_info)
2114                             success = success and partial_success
2115                         info_dict['__postprocessors'] = postprocessors
2116                         info_dict['__files_to_merge'] = downloaded
2117                         # Even if there were no downloads, it is being merged only now
2118                         info_dict['__real_download'] = True
2119                 else:
2120                     # Just a single file
2121                     success, real_download = dl(filename, info_dict)
2122                     info_dict['__real_download'] = real_download
2123             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2124                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2125                 return
2126             except (OSError, IOError) as err:
2127                 raise UnavailableVideoError(err)
2128             except (ContentTooShortError, ) as err:
2129                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2130                 return
2131
2132             if success and filename != '-':
2133                 # Fixup content
2134                 fixup_policy = self.params.get('fixup')
2135                 if fixup_policy is None:
2136                     fixup_policy = 'detect_or_warn'
2137
2138                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2139
2140                 stretched_ratio = info_dict.get('stretched_ratio')
2141                 if stretched_ratio is not None and stretched_ratio != 1:
2142                     if fixup_policy == 'warn':
2143                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2144                             info_dict['id'], stretched_ratio))
2145                     elif fixup_policy == 'detect_or_warn':
2146                         stretched_pp = FFmpegFixupStretchedPP(self)
2147                         if stretched_pp.available:
2148                             info_dict.setdefault('__postprocessors', [])
2149                             info_dict['__postprocessors'].append(stretched_pp)
2150                         else:
2151                             self.report_warning(
2152                                 '%s: Non-uniform pixel ratio (%s). %s'
2153                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2154                     else:
2155                         assert fixup_policy in ('ignore', 'never')
2156
2157                 if (info_dict.get('requested_formats') is None
2158                         and info_dict.get('container') == 'm4a_dash'):
2159                     if fixup_policy == 'warn':
2160                         self.report_warning(
2161                             '%s: writing DASH m4a. '
2162                             'Only some players support this container.'
2163                             % info_dict['id'])
2164                     elif fixup_policy == 'detect_or_warn':
2165                         fixup_pp = FFmpegFixupM4aPP(self)
2166                         if fixup_pp.available:
2167                             info_dict.setdefault('__postprocessors', [])
2168                             info_dict['__postprocessors'].append(fixup_pp)
2169                         else:
2170                             self.report_warning(
2171                                 '%s: writing DASH m4a. '
2172                                 'Only some players support this container. %s'
2173                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2174                     else:
2175                         assert fixup_policy in ('ignore', 'never')
2176
2177                 if (info_dict.get('protocol') == 'm3u8_native'
2178                         or info_dict.get('protocol') == 'm3u8'
2179                         and self.params.get('hls_prefer_native')):
2180                     if fixup_policy == 'warn':
2181                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2182                             info_dict['id']))
2183                     elif fixup_policy == 'detect_or_warn':
2184                         fixup_pp = FFmpegFixupM3u8PP(self)
2185                         if fixup_pp.available:
2186                             info_dict.setdefault('__postprocessors', [])
2187                             info_dict['__postprocessors'].append(fixup_pp)
2188                         else:
2189                             self.report_warning(
2190                                 '%s: malformed AAC bitstream detected. %s'
2191                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2192                     else:
2193                         assert fixup_policy in ('ignore', 'never')
2194
2195                 try:
2196                     self.post_process(filename, info_dict)
2197                 except (PostProcessingError) as err:
2198                     self.report_error('postprocessing: %s' % str(err))
2199                     return
2200                 must_record_download_archive = True
2201
2202         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2203             self.record_download_archive(info_dict)
2204
2205     def download(self, url_list):
2206         """Download a given list of URLs."""
2207         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2208         if (len(url_list) > 1
2209                 and outtmpl != '-'
2210                 and '%' not in outtmpl
2211                 and self.params.get('max_downloads') != 1):
2212             raise SameFileError(outtmpl)
2213
2214         for url in url_list:
2215             try:
2216                 # It also downloads the videos
2217                 res = self.extract_info(
2218                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2219             except UnavailableVideoError:
2220                 self.report_error('unable to download video')
2221             except MaxDownloadsReached:
2222                 self.to_screen('[info] Maximum number of downloaded files reached.')
2223                 raise
2224             else:
2225                 if self.params.get('dump_single_json', False):
2226                     self.to_stdout(json.dumps(res))
2227
2228         return self._download_retcode
2229
2230     def download_with_info_file(self, info_filename):
2231         with contextlib.closing(fileinput.FileInput(
2232                 [info_filename], mode='r',
2233                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2234             # FileInput doesn't have a read method, we can't call json.load
2235             info = self.filter_requested_info(json.loads('\n'.join(f)))
2236         try:
2237             self.process_ie_result(info, download=True)
2238         except DownloadError:
2239             webpage_url = info.get('webpage_url')
2240             if webpage_url is not None:
2241                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2242                 return self.download([webpage_url])
2243             else:
2244                 raise
2245         return self._download_retcode
2246
2247     @staticmethod
2248     def filter_requested_info(info_dict):
2249         return dict(
2250             (k, v) for k, v in info_dict.items()
2251             if k not in ['requested_formats', 'requested_subtitles'])
2252
2253     def post_process(self, filename, ie_info):
2254         """Run all the postprocessors on the given file."""
2255         info = dict(ie_info)
2256         info['filepath'] = filename
2257         pps_chain = []
2258         if ie_info.get('__postprocessors') is not None:
2259             pps_chain.extend(ie_info['__postprocessors'])
2260         pps_chain.extend(self._pps)
2261         for pp in pps_chain:
2262             files_to_delete = []
2263             try:
2264                 files_to_delete, info = pp.run(info)
2265             except PostProcessingError as e:
2266                 self.report_error(e.msg)
2267             if files_to_delete and not self.params.get('keepvideo', False):
2268                 for old_filename in set(files_to_delete):
2269                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2270                     try:
2271                         os.remove(encodeFilename(old_filename))
2272                     except (IOError, OSError):
2273                         self.report_warning('Unable to remove downloaded original file')
2274
2275     def _make_archive_id(self, info_dict):
2276         video_id = info_dict.get('id')
2277         if not video_id:
2278             return
2279         # Future-proof against any change in case
2280         # and backwards compatibility with prior versions
2281         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2282         if extractor is None:
2283             url = str_or_none(info_dict.get('url'))
2284             if not url:
2285                 return
2286             # Try to find matching extractor for the URL and take its ie_key
2287             for ie in self._ies:
2288                 if ie.suitable(url):
2289                     extractor = ie.ie_key()
2290                     break
2291             else:
2292                 return
2293         return extractor.lower() + ' ' + video_id
2294
2295     def in_download_archive(self, info_dict):
2296         fn = self.params.get('download_archive')
2297         if fn is None:
2298             return False
2299
2300         vid_id = self._make_archive_id(info_dict)
2301         if not vid_id:
2302             return False  # Incomplete video information
2303
2304         return vid_id in self.archive
2305
2306     def record_download_archive(self, info_dict):
2307         fn = self.params.get('download_archive')
2308         if fn is None:
2309             return
2310         vid_id = self._make_archive_id(info_dict)
2311         assert vid_id
2312         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2313             archive_file.write(vid_id + '\n')
2314         self.archive.add(vid_id)
2315
2316     @staticmethod
2317     def format_resolution(format, default='unknown'):
2318         if format.get('vcodec') == 'none':
2319             return 'audio only'
2320         if format.get('resolution') is not None:
2321             return format['resolution']
2322         if format.get('height') is not None:
2323             if format.get('width') is not None:
2324                 res = '%sx%s' % (format['width'], format['height'])
2325             else:
2326                 res = '%sp' % format['height']
2327         elif format.get('width') is not None:
2328             res = '%dx?' % format['width']
2329         else:
2330             res = default
2331         return res
2332
2333     def _format_note(self, fdict):
2334         res = ''
2335         if fdict.get('ext') in ['f4f', 'f4m']:
2336             res += '(unsupported) '
2337         if fdict.get('language'):
2338             if res:
2339                 res += ' '
2340             res += '[%s] ' % fdict['language']
2341         if fdict.get('format_note') is not None:
2342             res += fdict['format_note'] + ' '
2343         if fdict.get('tbr') is not None:
2344             res += '%4dk ' % fdict['tbr']
2345         if fdict.get('container') is not None:
2346             if res:
2347                 res += ', '
2348             res += '%s container' % fdict['container']
2349         if (fdict.get('vcodec') is not None
2350                 and fdict.get('vcodec') != 'none'):
2351             if res:
2352                 res += ', '
2353             res += fdict['vcodec']
2354             if fdict.get('vbr') is not None:
2355                 res += '@'
2356         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2357             res += 'video@'
2358         if fdict.get('vbr') is not None:
2359             res += '%4dk' % fdict['vbr']
2360         if fdict.get('fps') is not None:
2361             if res:
2362                 res += ', '
2363             res += '%sfps' % fdict['fps']
2364         if fdict.get('acodec') is not None:
2365             if res:
2366                 res += ', '
2367             if fdict['acodec'] == 'none':
2368                 res += 'video only'
2369             else:
2370                 res += '%-5s' % fdict['acodec']
2371         elif fdict.get('abr') is not None:
2372             if res:
2373                 res += ', '
2374             res += 'audio'
2375         if fdict.get('abr') is not None:
2376             res += '@%3dk' % fdict['abr']
2377         if fdict.get('asr') is not None:
2378             res += ' (%5dHz)' % fdict['asr']
2379         if fdict.get('filesize') is not None:
2380             if res:
2381                 res += ', '
2382             res += format_bytes(fdict['filesize'])
2383         elif fdict.get('filesize_approx') is not None:
2384             if res:
2385                 res += ', '
2386             res += '~' + format_bytes(fdict['filesize_approx'])
2387         return res
2388
2389     def _format_note_table(self, f):
2390         def join_fields(*vargs):
2391             return ', '.join((val for val in vargs if val != ''))
2392
2393         return join_fields(
2394             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2395             format_field(f, 'language', '[%s]'),
2396             format_field(f, 'format_note'),
2397             format_field(f, 'container', ignore=(None, f.get('ext'))),
2398             format_field(f, 'asr', '%5dHz'))
2399
2400     def list_formats(self, info_dict):
2401         formats = info_dict.get('formats', [info_dict])
2402         new_format = self.params.get('listformats_table', False)
2403         if new_format:
2404             table = [
2405                 [
2406                     format_field(f, 'format_id'),
2407                     format_field(f, 'ext'),
2408                     self.format_resolution(f),
2409                     format_field(f, 'fps', '%d'),
2410                     '|',
2411                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2412                     format_field(f, 'tbr', '%4dk'),
2413                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2414                     '|',
2415                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2416                     format_field(f, 'vbr', '%4dk'),
2417                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2418                     format_field(f, 'abr', '%3dk'),
2419                     format_field(f, 'asr', '%5dHz'),
2420                     self._format_note_table(f)]
2421                 for f in formats
2422                 if f.get('preference') is None or f['preference'] >= -1000]
2423             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2424                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2425         else:
2426             table = [
2427                 [
2428                     format_field(f, 'format_id'),
2429                     format_field(f, 'ext'),
2430                     self.format_resolution(f),
2431                     self._format_note(f)]
2432                 for f in formats
2433                 if f.get('preference') is None or f['preference'] >= -1000]
2434             header_line = ['format code', 'extension', 'resolution', 'note']
2435
2436         # if len(formats) > 1:
2437         #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2438         self.to_screen(
2439             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2440                 header_line,
2441                 table,
2442                 delim=new_format,
2443                 extraGap=(0 if new_format else 1),
2444                 hideEmpty=new_format)))
2445
2446     def list_thumbnails(self, info_dict):
2447         thumbnails = info_dict.get('thumbnails')
2448         if not thumbnails:
2449             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2450             return
2451
2452         self.to_screen(
2453             '[info] Thumbnails for %s:' % info_dict['id'])
2454         self.to_screen(render_table(
2455             ['ID', 'width', 'height', 'URL'],
2456             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2457
2458     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2459         if not subtitles:
2460             self.to_screen('%s has no %s' % (video_id, name))
2461             return
2462         self.to_screen(
2463             'Available %s for %s:' % (name, video_id))
2464         self.to_screen(render_table(
2465             ['Language', 'formats'],
2466             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2467                 for lang, formats in subtitles.items()]))
2468
2469     def urlopen(self, req):
2470         """ Start an HTTP download """
2471         if isinstance(req, compat_basestring):
2472             req = sanitized_Request(req)
2473         return self._opener.open(req, timeout=self._socket_timeout)
2474
2475     def print_debug_header(self):
2476         if not self.params.get('verbose'):
2477             return
2478
2479         if type('') is not compat_str:
2480             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2481             self.report_warning(
2482                 'Your Python is broken! Update to a newer and supported version')
2483
2484         stdout_encoding = getattr(
2485             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2486         encoding_str = (
2487             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2488                 locale.getpreferredencoding(),
2489                 sys.getfilesystemencoding(),
2490                 stdout_encoding,
2491                 self.get_encoding()))
2492         write_string(encoding_str, encoding=None)
2493
2494         self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2495         if _LAZY_LOADER:
2496             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2497         try:
2498             sp = subprocess.Popen(
2499                 ['git', 'rev-parse', '--short', 'HEAD'],
2500                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2501                 cwd=os.path.dirname(os.path.abspath(__file__)))
2502             out, err = sp.communicate()
2503             out = out.decode().strip()
2504             if re.match('[0-9a-f]+', out):
2505                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2506         except Exception:
2507             try:
2508                 sys.exc_clear()
2509             except Exception:
2510                 pass
2511
2512         def python_implementation():
2513             impl_name = platform.python_implementation()
2514             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2515                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2516             return impl_name
2517
2518         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2519             platform.python_version(), python_implementation(),
2520             platform_name()))
2521
2522         exe_versions = FFmpegPostProcessor.get_versions(self)
2523         exe_versions['rtmpdump'] = rtmpdump_version()
2524         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2525         exe_str = ', '.join(
2526             '%s %s' % (exe, v)
2527             for exe, v in sorted(exe_versions.items())
2528             if v
2529         )
2530         if not exe_str:
2531             exe_str = 'none'
2532         self._write_string('[debug] exe versions: %s\n' % exe_str)
2533
2534         proxy_map = {}
2535         for handler in self._opener.handlers:
2536             if hasattr(handler, 'proxies'):
2537                 proxy_map.update(handler.proxies)
2538         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2539
2540         if self.params.get('call_home', False):
2541             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2542             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2543             latest_version = self.urlopen(
2544                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2545             if version_tuple(latest_version) > version_tuple(__version__):
2546                 self.report_warning(
2547                     'You are using an outdated version (newest version: %s)! '
2548                     'See https://yt-dl.org/update if you need help updating.' %
2549                     latest_version)
2550
2551     def _setup_opener(self):
2552         timeout_val = self.params.get('socket_timeout')
2553         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2554
2555         opts_cookiefile = self.params.get('cookiefile')
2556         opts_proxy = self.params.get('proxy')
2557
2558         if opts_cookiefile is None:
2559             self.cookiejar = compat_cookiejar.CookieJar()
2560         else:
2561             opts_cookiefile = expand_path(opts_cookiefile)
2562             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2563             if os.access(opts_cookiefile, os.R_OK):
2564                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2565
2566         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2567         if opts_proxy is not None:
2568             if opts_proxy == '':
2569                 proxies = {}
2570             else:
2571                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2572         else:
2573             proxies = compat_urllib_request.getproxies()
2574             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2575             if 'http' in proxies and 'https' not in proxies:
2576                 proxies['https'] = proxies['http']
2577         proxy_handler = PerRequestProxyHandler(proxies)
2578
2579         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2580         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2581         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2582         redirect_handler = YoutubeDLRedirectHandler()
2583         data_handler = compat_urllib_request_DataHandler()
2584
2585         # When passing our own FileHandler instance, build_opener won't add the
2586         # default FileHandler and allows us to disable the file protocol, which
2587         # can be used for malicious purposes (see
2588         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2589         file_handler = compat_urllib_request.FileHandler()
2590
2591         def file_open(*args, **kwargs):
2592             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2593         file_handler.file_open = file_open
2594
2595         opener = compat_urllib_request.build_opener(
2596             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2597
2598         # Delete the default user-agent header, which would otherwise apply in
2599         # cases where our custom HTTP handler doesn't come into play
2600         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2601         opener.addheaders = []
2602         self._opener = opener
2603
2604     def encode(self, s):
2605         if isinstance(s, bytes):
2606             return s  # Already encoded
2607
2608         try:
2609             return s.encode(self.get_encoding())
2610         except UnicodeEncodeError as err:
2611             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2612             raise
2613
2614     def get_encoding(self):
2615         encoding = self.params.get('encoding')
2616         if encoding is None:
2617             encoding = preferredencoding()
2618         return encoding
2619
2620     def _write_thumbnails(self, info_dict, filename):
2621         if self.params.get('writethumbnail', False):
2622             thumbnails = info_dict.get('thumbnails')
2623             if thumbnails:
2624                 thumbnails = [thumbnails[-1]]
2625         elif self.params.get('write_all_thumbnails', False):
2626             thumbnails = info_dict.get('thumbnails')
2627         else:
2628             return
2629
2630         if not thumbnails:
2631             # No thumbnails present, so return immediately
2632             return
2633
2634         for t in thumbnails:
2635             thumb_ext = determine_ext(t['url'], 'jpg')
2636             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2637             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2638             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2639
2640             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2641                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2642                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2643             else:
2644                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2645                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2646                 try:
2647                     uf = self.urlopen(t['url'])
2648                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2649                         shutil.copyfileobj(uf, thumbf)
2650                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2651                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2652                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2653                     self.report_warning('Unable to download thumbnail "%s": %s' %
2654                                         (t['url'], error_to_compat_str(err)))