youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     error_to_compat_str,
  61     expand_path,
  62     ExtractorError,
  63     format_bytes,
  64     formatSeconds,
  65     GeoRestrictedError,
  66     int_or_none,
  67     iri_to_uri,
  68     ISO3166Utils,
  69     locked_file,
  70     make_HTTPS_handler,
  71     MaxDownloadsReached,
  72     orderedSet,
  73     PagedList,
  74     parse_filesize,
  75     PerRequestProxyHandler,
  76     platform_name,
  77     PostProcessingError,
  78     preferredencoding,
  79     prepend_extension,
  80     register_socks_protocols,
  81     render_table,
  82     replace_extension,
  83     SameFileError,
  84     sanitize_filename,
  85     sanitize_path,
  86     sanitize_url,
  87     sanitized_Request,
  88     std_headers,
  89     str_or_none,
  90     subtitles_filename,
  91     to_high_limit_path,
  92     UnavailableVideoError,
  93     url_basename,
  94     version_tuple,
  95     write_json_file,
  96     write_string,
  97     YoutubeDLCookieJar,
  98     YoutubeDLCookieProcessor,
  99     YoutubeDLHandler,
 100     YoutubeDLRedirectHandler,
 101 )
 102 from .cache import Cache
 103 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 104 from .extractor.openload import PhantomJSwrapper
 105 from .downloader import get_suitable_downloader
 106 from .downloader.rtmp import rtmpdump_version
 107 from .postprocessor import (
 108     FFmpegFixupM3u8PP,
 109     FFmpegFixupM4aPP,
 110     FFmpegFixupStretchedPP,
 111     FFmpegMergerPP,
 112     FFmpegPostProcessor,
 113     FFmpegSubtitlesConvertorPP,
 114     get_postprocessor,
 115 )
 116 from .version import __version__
 117
 118 if compat_os_name == 'nt':
 119     import ctypes
 120
 121
 122 class YoutubeDL(object):
 123     """YoutubeDL class.
 124
 125     YoutubeDL objects are the ones responsible of downloading the
 126     actual video file and writing it to disk if the user has requested
 127     it, among some other tasks. In most cases there should be one per
 128     program. As, given a video URL, the downloader doesn't know how to
 129     extract all the needed information, task that InfoExtractors do, it
 130     has to pass the URL to one of them.
 131
 132     For this, YoutubeDL objects have a method that allows
 133     InfoExtractors to be registered in a given order. When it is passed
 134     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 135     finds that reports being able to handle it. The InfoExtractor extracts
 136     all the information about the video or videos the URL refers to, and
 137     YoutubeDL process the extracted information, possibly using a File
 138     Downloader to download the video.
 139
 140     YoutubeDL objects accept a lot of parameters. In order not to saturate
 141     the object constructor with arguments, it receives a dictionary of
 142     options instead. These options are available through the params
 143     attribute for the InfoExtractors to use. The YoutubeDL also
 144     registers itself as the downloader in charge for the InfoExtractors
 145     that are added to it, so this is a "mutual registration".
 146
 147     Available options:
 148
 149     username:          Username for authentication purposes.
 150     password:          Password for authentication purposes.
 151     videopassword:     Password for accessing a video.
 152     ap_mso:            Adobe Pass multiple-system operator identifier.
 153     ap_username:       Multiple-system operator account username.
 154     ap_password:       Multiple-system operator account password.
 155     usenetrc:          Use netrc for authentication instead.
 156     verbose:           Print additional info to stdout.
 157     quiet:             Do not print messages to stdout.
 158     no_warnings:       Do not print out anything for warnings.
 159     forceurl:          Force printing final URL.
 160     forcetitle:        Force printing title.
 161     forceid:           Force printing ID.
 162     forcethumbnail:    Force printing thumbnail URL.
 163     forcedescription:  Force printing description.
 164     forcefilename:     Force printing final filename.
 165     forceduration:     Force printing duration.
 166     forcejson:         Force printing info_dict as JSON.
 167     dump_single_json:  Force printing the info_dict of the whole playlist
 168                        (or video) as a single JSON line.
 169     simulate:          Do not download the video files.
 170     format:            Video format code. see "FORMAT SELECTION" for more details.
 171     format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
 172     format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
 173     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
 174     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
 175     outtmpl:           Template for output names.
 176     restrictfilenames: Do not allow "&" and spaces in file names.
 177     trim_file_name:    Limit length of filename (extension excluded).
 178     ignoreerrors:      Do not stop on download errors.
 179     force_generic_extractor: Force downloader to use the generic extractor
 180     nooverwrites:      Prevent overwriting files.
 181     playliststart:     Playlist item to start at.
 182     playlistend:       Playlist item to end at.
 183     playlist_items:    Specific indices of playlist to download.
 184     playlistreverse:   Download playlist items in reverse order.
 185     playlistrandom:    Download playlist items in random order.
 186     matchtitle:        Download only matching titles.
 187     rejecttitle:       Reject downloads for matching titles.
 188     logger:            Log messages to a logging.Logger instance.
 189     logtostderr:       Log messages to stderr instead of stdout.
 190     writedescription:  Write the video description to a .description file
 191     writeinfojson:     Write the video description to a .info.json file
 192     writeannotations:  Write the video annotations to a .annotations.xml file
 193     writethumbnail:    Write the thumbnail image to a file
 194     write_all_thumbnails:  Write all thumbnail formats to files
 195     writelink:         Write an internet shortcut file, depending on the
 196                        current platform (.url/.webloc/.desktop)
 197     writeurllink:      Write a Windows internet shortcut file (.url)
 198     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 199     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 200     writesubtitles:    Write the video subtitles to a file
 201     writeautomaticsub: Write the automatically generated subtitles to a file
 202     allsubtitles:      Downloads all the subtitles of the video
 203                        (requires writesubtitles or writeautomaticsub)
 204     listsubtitles:     Lists all available subtitles for the video
 205     subtitlesformat:   The format code for subtitles
 206     subtitleslangs:    List of languages of the subtitles to download
 207     keepvideo:         Keep the video file after post-processing
 208     daterange:         A DateRange object, download only if the upload_date is in the range.
 209     skip_download:     Skip the actual download of the video file
 210     cachedir:          Location of the cache files in the filesystem.
 211                        False to disable filesystem cache.
 212     noplaylist:        Download single video instead of a playlist if in doubt.
 213     age_limit:         An integer representing the user's age in years.
 214                        Unsuitable videos for the given age are skipped.
 215     min_views:         An integer representing the minimum view count the video
 216                        must have in order to not be skipped.
 217                        Videos without view count information are always
 218                        downloaded. None for no limit.
 219     max_views:         An integer representing the maximum view count.
 220                        Videos that are more popular than that are not
 221                        downloaded.
 222                        Videos without view count information are always
 223                        downloaded. None for no limit.
 224     download_archive:  File name of a file where all downloads are recorded.
 225                        Videos already present in the file are not downloaded
 226                        again.
 227     break_on_existing: Stop the download process after attempting to download a file that's
 228                        in the archive.
 229     cookiefile:        File name where cookies should be read from and dumped to.
 230     nocheckcertificate:Do not verify SSL certificates
 231     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 232                        At the moment, this is only supported by YouTube.
 233     proxy:             URL of the proxy server to use
 234     geo_verification_proxy:  URL of the proxy to use for IP address verification
 235                        on geo-restricted sites.
 236     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 237     bidi_workaround:   Work around buggy terminals without bidirectional text
 238                        support, using fridibi
 239     debug_printtraffic:Print out sent and received HTTP traffic
 240     include_ads:       Download ads as well
 241     default_search:    Prepend this string if an input url is not valid.
 242                        'auto' for elaborate guessing
 243     encoding:          Use this encoding instead of the system-specified.
 244     extract_flat:      Do not resolve URLs, return the immediate result.
 245                        Pass in 'in_playlist' to only show this behavior for
 246                        playlist items.
 247     postprocessors:    A list of dictionaries, each with an entry
 248                        * key:  The name of the postprocessor. See
 249                                youtube_dlc/postprocessor/__init__.py for a list.
 250                        as well as any further keyword arguments for the
 251                        postprocessor.
 252     progress_hooks:    A list of functions that get called on download
 253                        progress, with a dictionary with the entries
 254                        * status: One of "downloading", "error", or "finished".
 255                                  Check this first and ignore unknown values.
 256
 257                        If status is one of "downloading", or "finished", the
 258                        following properties may also be present:
 259                        * filename: The final filename (always present)
 260                        * tmpfilename: The filename we're currently writing to
 261                        * downloaded_bytes: Bytes on disk
 262                        * total_bytes: Size of the whole file, None if unknown
 263                        * total_bytes_estimate: Guess of the eventual file size,
 264                                                None if unavailable.
 265                        * elapsed: The number of seconds since download started.
 266                        * eta: The estimated time in seconds, None if unknown
 267                        * speed: The download speed in bytes/second, None if
 268                                 unknown
 269                        * fragment_index: The counter of the currently
 270                                          downloaded video fragment.
 271                        * fragment_count: The number of fragments (= individual
 272                                          files that will be merged)
 273
 274                        Progress hooks are guaranteed to be called at least once
 275                        (with status "finished") if the download is successful.
 276     merge_output_format: Extension to use when merging formats.
 277     fixup:             Automatically correct known faults of the file.
 278                        One of:
 279                        - "never": do nothing
 280                        - "warn": only emit a warning
 281                        - "detect_or_warn": check whether we can do anything
 282                                            about it, warn otherwise (default)
 283     source_address:    Client-side IP address to bind to.
 284     call_home:         Boolean, true iff we are allowed to contact the
 285                        youtube-dlc servers for debugging.
 286     sleep_interval:    Number of seconds to sleep before each download when
 287                        used alone or a lower bound of a range for randomized
 288                        sleep before each download (minimum possible number
 289                        of seconds to sleep) when used along with
 290                        max_sleep_interval.
 291     max_sleep_interval:Upper bound of a range for randomized sleep before each
 292                        download (maximum possible number of seconds to sleep).
 293                        Must only be used along with sleep_interval.
 294                        Actual sleep time will be a random float from range
 295                        [sleep_interval; max_sleep_interval].
 296     listformats:       Print an overview of available video formats and exit.
 297     list_thumbnails:   Print a table of all thumbnails and exit.
 298     match_filter:      A function that gets called with the info_dict of
 299                        every video.
 300                        If it returns a message, the video is ignored.
 301                        If it returns None, the video is downloaded.
 302                        match_filter_func in utils.py is one example for this.
 303     no_color:          Do not emit color codes in output.
 304     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 305                        HTTP header
 306     geo_bypass_country:
 307                        Two-letter ISO 3166-2 country code that will be used for
 308                        explicit geographic restriction bypassing via faking
 309                        X-Forwarded-For HTTP header
 310     geo_bypass_ip_block:
 311                        IP range in CIDR notation that will be used similarly to
 312                        geo_bypass_country
 313
 314     The following options determine which downloader is picked:
 315     external_downloader: Executable of the external downloader to call.
 316                        None or unset for standard (built-in) downloader.
 317     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 318                        if True, otherwise use ffmpeg/avconv if False, otherwise
 319                        use downloader suggested by extractor if None.
 320
 321     The following parameters are not used by YoutubeDL itself, they are used by
 322     the downloader (see youtube_dlc/downloader/common.py):
 323     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 324     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 325     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 326     http_chunk_size.
 327
 328     The following options are used by the post processors:
 329     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 330                        otherwise prefer ffmpeg.
 331     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 332                        to the binary or its containing directory.
 333     postprocessor_args: A list of additional command-line arguments for the
 334                         postprocessor.
 335
 336     The following options are used by the Youtube extractor:
 337     youtube_include_dash_manifest: If True (default), DASH manifests and related
 338                         data will be downloaded and processed by extractor.
 339                         You can reduce network I/O by disabling it if you don't
 340                         care about DASH.
 341     """
 342
 343     _NUMERIC_FIELDS = set((
 344         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 345         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 346         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 347         'average_rating', 'comment_count', 'age_limit',
 348         'start_time', 'end_time',
 349         'chapter_number', 'season_number', 'episode_number',
 350         'track_number', 'disc_number', 'release_year',
 351         'playlist_index',
 352     ))
 353
 354     params = None
 355     _ies = []
 356     _pps = []
 357     _download_retcode = None
 358     _num_downloads = None
 359     _screen_file = None
 360
 361     def __init__(self, params=None, auto_init=True):
 362         """Create a FileDownloader object with the given options."""
 363         if params is None:
 364             params = {}
 365         self._ies = []
 366         self._ies_instances = {}
 367         self._pps = []
 368         self._progress_hooks = []
 369         self._download_retcode = 0
 370         self._num_downloads = 0
 371         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 372         self._err_file = sys.stderr
 373         self.params = {
 374             # Default parameters
 375             'nocheckcertificate': False,
 376         }
 377         self.params.update(params)
 378         self.cache = Cache(self)
 379         self.archive = set()
 380
 381         """Preload the archive, if any is specified"""
 382         def preload_download_archive(self):
 383             fn = self.params.get('download_archive')
 384             if fn is None:
 385                 return False
 386             try:
 387                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 388                     for line in archive_file:
 389                         self.archive.add(line.strip())
 390             except IOError as ioe:
 391                 if ioe.errno != errno.ENOENT:
 392                     raise
 393                 return False
 394             return True
 395
 396         def check_deprecated(param, option, suggestion):
 397             if self.params.get(param) is not None:
 398                 self.report_warning(
 399                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 400                 return True
 401             return False
 402
 403         if self.params.get('verbose'):
 404             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 405
 406         preload_download_archive(self)
 407
 408         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 409             if self.params.get('geo_verification_proxy') is None:
 410                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 411
 412         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 413         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 414         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 415
 416         if params.get('bidi_workaround', False):
 417             try:
 418                 import pty
 419                 master, slave = pty.openpty()
 420                 width = compat_get_terminal_size().columns
 421                 if width is None:
 422                     width_args = []
 423                 else:
 424                     width_args = ['-w', str(width)]
 425                 sp_kwargs = dict(
 426                     stdin=subprocess.PIPE,
 427                     stdout=slave,
 428                     stderr=self._err_file)
 429                 try:
 430                     self._output_process = subprocess.Popen(
 431                         ['bidiv'] + width_args, **sp_kwargs
 432                     )
 433                 except OSError:
 434                     self._output_process = subprocess.Popen(
 435                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 436                 self._output_channel = os.fdopen(master, 'rb')
 437             except OSError as ose:
 438                 if ose.errno == errno.ENOENT:
 439                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 440                 else:
 441                     raise
 442
 443         if (sys.platform != 'win32'
 444                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 445                 and not params.get('restrictfilenames', False)):
 446             # Unicode filesystem API will throw errors (#1474, #13027)
 447             self.report_warning(
 448                 'Assuming --restrict-filenames since file system encoding '
 449                 'cannot encode all characters. '
 450                 'Set the LC_ALL environment variable to fix this.')
 451             self.params['restrictfilenames'] = True
 452
 453         if isinstance(params.get('outtmpl'), bytes):
 454             self.report_warning(
 455                 'Parameter outtmpl is bytes, but should be a unicode string. '
 456                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 457
 458         self._setup_opener()
 459
 460         if auto_init:
 461             self.print_debug_header()
 462             self.add_default_info_extractors()
 463
 464         for pp_def_raw in self.params.get('postprocessors', []):
 465             pp_class = get_postprocessor(pp_def_raw['key'])
 466             pp_def = dict(pp_def_raw)
 467             del pp_def['key']
 468             pp = pp_class(self, **compat_kwargs(pp_def))
 469             self.add_post_processor(pp)
 470
 471         for ph in self.params.get('progress_hooks', []):
 472             self.add_progress_hook(ph)
 473
 474         register_socks_protocols()
 475
 476     def warn_if_short_id(self, argv):
 477         # short YouTube ID starting with dash?
 478         idxs = [
 479             i for i, a in enumerate(argv)
 480             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 481         if idxs:
 482             correct_argv = (
 483                 ['youtube-dlc']
 484                 + [a for i, a in enumerate(argv) if i not in idxs]
 485                 + ['--'] + [argv[i] for i in idxs]
 486             )
 487             self.report_warning(
 488                 'Long argument string detected. '
 489                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 490                 args_to_str(correct_argv))
 491
 492     def add_info_extractor(self, ie):
 493         """Add an InfoExtractor object to the end of the list."""
 494         self._ies.append(ie)
 495         if not isinstance(ie, type):
 496             self._ies_instances[ie.ie_key()] = ie
 497             ie.set_downloader(self)
 498
 499     def get_info_extractor(self, ie_key):
 500         """
 501         Get an instance of an IE with name ie_key, it will try to get one from
 502         the _ies list, if there's no instance it will create a new one and add
 503         it to the extractor list.
 504         """
 505         ie = self._ies_instances.get(ie_key)
 506         if ie is None:
 507             ie = get_info_extractor(ie_key)()
 508             self.add_info_extractor(ie)
 509         return ie
 510
 511     def add_default_info_extractors(self):
 512         """
 513         Add the InfoExtractors returned by gen_extractors to the end of the list
 514         """
 515         for ie in gen_extractor_classes():
 516             self.add_info_extractor(ie)
 517
 518     def add_post_processor(self, pp):
 519         """Add a PostProcessor object to the end of the chain."""
 520         self._pps.append(pp)
 521         pp.set_downloader(self)
 522
 523     def add_progress_hook(self, ph):
 524         """Add the progress hook (currently only for the file downloader)"""
 525         self._progress_hooks.append(ph)
 526
 527     def _bidi_workaround(self, message):
 528         if not hasattr(self, '_output_channel'):
 529             return message
 530
 531         assert hasattr(self, '_output_process')
 532         assert isinstance(message, compat_str)
 533         line_count = message.count('\n') + 1
 534         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 535         self._output_process.stdin.flush()
 536         res = ''.join(self._output_channel.readline().decode('utf-8')
 537                       for _ in range(line_count))
 538         return res[:-len('\n')]
 539
 540     def to_screen(self, message, skip_eol=False):
 541         """Print message to stdout if not in quiet mode."""
 542         return self.to_stdout(message, skip_eol, check_quiet=True)
 543
 544     def _write_string(self, s, out=None):
 545         write_string(s, out=out, encoding=self.params.get('encoding'))
 546
 547     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 548         """Print message to stdout if not in quiet mode."""
 549         if self.params.get('logger'):
 550             self.params['logger'].debug(message)
 551         elif not check_quiet or not self.params.get('quiet', False):
 552             message = self._bidi_workaround(message)
 553             terminator = ['\n', ''][skip_eol]
 554             output = message + terminator
 555
 556             self._write_string(output, self._screen_file)
 557
 558     def to_stderr(self, message):
 559         """Print message to stderr."""
 560         assert isinstance(message, compat_str)
 561         if self.params.get('logger'):
 562             self.params['logger'].error(message)
 563         else:
 564             message = self._bidi_workaround(message)
 565             output = message + '\n'
 566             self._write_string(output, self._err_file)
 567
 568     def to_console_title(self, message):
 569         if not self.params.get('consoletitle', False):
 570             return
 571         if compat_os_name == 'nt':
 572             if ctypes.windll.kernel32.GetConsoleWindow():
 573                 # c_wchar_p() might not be necessary if `message` is
 574                 # already of type unicode()
 575                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 576         elif 'TERM' in os.environ:
 577             self._write_string('\033]0;%s\007' % message, self._screen_file)
 578
 579     def save_console_title(self):
 580         if not self.params.get('consoletitle', False):
 581             return
 582         if self.params.get('simulate', False):
 583             return
 584         if compat_os_name != 'nt' and 'TERM' in os.environ:
 585             # Save the title on stack
 586             self._write_string('\033[22;0t', self._screen_file)
 587
 588     def restore_console_title(self):
 589         if not self.params.get('consoletitle', False):
 590             return
 591         if self.params.get('simulate', False):
 592             return
 593         if compat_os_name != 'nt' and 'TERM' in os.environ:
 594             # Restore the title from stack
 595             self._write_string('\033[23;0t', self._screen_file)
 596
 597     def __enter__(self):
 598         self.save_console_title()
 599         return self
 600
 601     def __exit__(self, *args):
 602         self.restore_console_title()
 603
 604         if self.params.get('cookiefile') is not None:
 605             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 606
 607     def trouble(self, message=None, tb=None):
 608         """Determine action to take when a download problem appears.
 609
 610         Depending on if the downloader has been configured to ignore
 611         download errors or not, this method may throw an exception or
 612         not when errors are found, after printing the message.
 613
 614         tb, if given, is additional traceback information.
 615         """
 616         if message is not None:
 617             self.to_stderr(message)
 618         if self.params.get('verbose'):
 619             if tb is None:
 620                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 621                     tb = ''
 622                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 623                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 624                     tb += encode_compat_str(traceback.format_exc())
 625                 else:
 626                     tb_data = traceback.format_list(traceback.extract_stack())
 627                     tb = ''.join(tb_data)
 628             self.to_stderr(tb)
 629         if not self.params.get('ignoreerrors', False):
 630             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 631                 exc_info = sys.exc_info()[1].exc_info
 632             else:
 633                 exc_info = sys.exc_info()
 634             raise DownloadError(message, exc_info)
 635         self._download_retcode = 1
 636
 637     def report_warning(self, message):
 638         '''
 639         Print the message to stderr, it will be prefixed with 'WARNING:'
 640         If stderr is a tty file the 'WARNING:' will be colored
 641         '''
 642         if self.params.get('logger') is not None:
 643             self.params['logger'].warning(message)
 644         else:
 645             if self.params.get('no_warnings'):
 646                 return
 647             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 648                 _msg_header = '\033[0;33mWARNING:\033[0m'
 649             else:
 650                 _msg_header = 'WARNING:'
 651             warning_message = '%s %s' % (_msg_header, message)
 652             self.to_stderr(warning_message)
 653
 654     def report_error(self, message, tb=None):
 655         '''
 656         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 657         in red if stderr is a tty file.
 658         '''
 659         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 660             _msg_header = '\033[0;31mERROR:\033[0m'
 661         else:
 662             _msg_header = 'ERROR:'
 663         error_message = '%s %s' % (_msg_header, message)
 664         self.trouble(error_message, tb)
 665
 666     def report_file_already_downloaded(self, file_name):
 667         """Report file has already been fully downloaded."""
 668         try:
 669             self.to_screen('[download] %s has already been downloaded' % file_name)
 670         except UnicodeEncodeError:
 671             self.to_screen('[download] The file has already been downloaded')
 672
 673     def prepare_filename(self, info_dict):
 674         """Generate the output filename."""
 675         try:
 676             template_dict = dict(info_dict)
 677
 678             template_dict['epoch'] = int(time.time())
 679             autonumber_size = self.params.get('autonumber_size')
 680             if autonumber_size is None:
 681                 autonumber_size = 5
 682             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 683             if template_dict.get('resolution') is None:
 684                 if template_dict.get('width') and template_dict.get('height'):
 685                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 686                 elif template_dict.get('height'):
 687                     template_dict['resolution'] = '%sp' % template_dict['height']
 688                 elif template_dict.get('width'):
 689                     template_dict['resolution'] = '%dx?' % template_dict['width']
 690
 691             sanitize = lambda k, v: sanitize_filename(
 692                 compat_str(v),
 693                 restricted=self.params.get('restrictfilenames'),
 694                 is_id=(k == 'id' or k.endswith('_id')))
 695             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 696                                  for k, v in template_dict.items()
 697                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 698             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 699
 700             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 701
 702             # For fields playlist_index and autonumber convert all occurrences
 703             # of %(field)s to %(field)0Nd for backward compatibility
 704             field_size_compat_map = {
 705                 'playlist_index': len(str(template_dict['n_entries'])),
 706                 'autonumber': autonumber_size,
 707             }
 708             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 709             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 710             if mobj:
 711                 outtmpl = re.sub(
 712                     FIELD_SIZE_COMPAT_RE,
 713                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 714                     outtmpl)
 715
 716             # Missing numeric fields used together with integer presentation types
 717             # in format specification will break the argument substitution since
 718             # string 'NA' is returned for missing fields. We will patch output
 719             # template for missing fields to meet string presentation type.
 720             for numeric_field in self._NUMERIC_FIELDS:
 721                 if numeric_field not in template_dict:
 722                     # As of [1] format syntax is:
 723                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 724                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 725                     FORMAT_RE = r'''(?x)
 726                         (?<!%)
 727                         %
 728                         \({0}\)  # mapping key
 729                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 730                         (?:\d+)?  # minimum field width (optional)
 731                         (?:\.\d+)?  # precision (optional)
 732                         [hlL]?  # length modifier (optional)
 733                         [diouxXeEfFgGcrs%]  # conversion type
 734                     '''
 735                     outtmpl = re.sub(
 736                         FORMAT_RE.format(numeric_field),
 737                         r'%({0})s'.format(numeric_field), outtmpl)
 738
 739             # expand_path translates '%%' into '%' and '$$' into '$'
 740             # correspondingly that is not what we want since we need to keep
 741             # '%%' intact for template dict substitution step. Working around
 742             # with boundary-alike separator hack.
 743             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 744             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 745
 746             # outtmpl should be expand_path'ed before template dict substitution
 747             # because meta fields may contain env variables we don't want to
 748             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 749             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 750             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 751
 752             # https://github.com/blackjack4494/youtube-dlc/issues/85
 753             trim_file_name = self.params.get('trim_file_name', False)
 754             if trim_file_name:
 755                 fn_groups = filename.rsplit('.')
 756                 ext = fn_groups[-1]
 757                 sub_ext = ''
 758                 if len(fn_groups) > 2:
 759                     sub_ext = fn_groups[-2]
 760                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 761
 762             # Temporary fix for #4787
 763             # 'Treat' all problem characters by passing filename through preferredencoding
 764             # to workaround encoding issues with subprocess on python2 @ Windows
 765             if sys.version_info < (3, 0) and sys.platform == 'win32':
 766                 filename = encodeFilename(filename, True).decode(preferredencoding())
 767             return sanitize_path(filename)
 768         except ValueError as err:
 769             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 770             return None
 771
 772     def _match_entry(self, info_dict, incomplete):
 773         """ Returns None if the file should be downloaded """
 774
 775         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 776         if 'title' in info_dict:
 777             # This can happen when we're just evaluating the playlist
 778             title = info_dict['title']
 779             matchtitle = self.params.get('matchtitle', False)
 780             if matchtitle:
 781                 if not re.search(matchtitle, title, re.IGNORECASE):
 782                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 783             rejecttitle = self.params.get('rejecttitle', False)
 784             if rejecttitle:
 785                 if re.search(rejecttitle, title, re.IGNORECASE):
 786                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 787         date = info_dict.get('upload_date')
 788         if date is not None:
 789             dateRange = self.params.get('daterange', DateRange())
 790             if date not in dateRange:
 791                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 792         view_count = info_dict.get('view_count')
 793         if view_count is not None:
 794             min_views = self.params.get('min_views')
 795             if min_views is not None and view_count < min_views:
 796                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 797             max_views = self.params.get('max_views')
 798             if max_views is not None and view_count > max_views:
 799                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 800         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 801             return 'Skipping "%s" because it is age restricted' % video_title
 802         if self.in_download_archive(info_dict):
 803             return '%s has already been recorded in archive' % video_title
 804
 805         if not incomplete:
 806             match_filter = self.params.get('match_filter')
 807             if match_filter is not None:
 808                 ret = match_filter(info_dict)
 809                 if ret is not None:
 810                     return ret
 811
 812         return None
 813
 814     @staticmethod
 815     def add_extra_info(info_dict, extra_info):
 816         '''Set the keys from extra_info in info dict if they are missing'''
 817         for key, value in extra_info.items():
 818             info_dict.setdefault(key, value)
 819
 820     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 821                      process=True, force_generic_extractor=False):
 822         '''
 823         Returns a list with a dictionary for each video we find.
 824         If 'download', also downloads the videos.
 825         extra_info is a dict containing the extra values to add to each result
 826         '''
 827
 828         if not ie_key and force_generic_extractor:
 829             ie_key = 'Generic'
 830
 831         if ie_key:
 832             ies = [self.get_info_extractor(ie_key)]
 833         else:
 834             ies = self._ies
 835
 836         for ie in ies:
 837             if not ie.suitable(url):
 838                 continue
 839
 840             ie_key = ie.ie_key()
 841             ie = self.get_info_extractor(ie_key)
 842             if not ie.working():
 843                 self.report_warning('The program functionality for this site has been marked as broken, '
 844                                     'and will probably not work.')
 845
 846             try:
 847                 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
 848             except (AssertionError, IndexError, AttributeError):
 849                 temp_id = None
 850             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 851                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 852                                ie_key, temp_id))
 853                 break
 854
 855             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 856
 857         else:
 858             self.report_error('no suitable InfoExtractor for URL %s' % url)
 859
 860     def __handle_extraction_exceptions(func):
 861         def wrapper(self, *args, **kwargs):
 862             try:
 863                 return func(self, *args, **kwargs)
 864             except GeoRestrictedError as e:
 865                 msg = e.msg
 866                 if e.countries:
 867                     msg += '\nThis video is available in %s.' % ', '.join(
 868                         map(ISO3166Utils.short2full, e.countries))
 869                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 870                 self.report_error(msg)
 871             except ExtractorError as e:  # An error we somewhat expected
 872                 self.report_error(compat_str(e), e.format_traceback())
 873             except MaxDownloadsReached:
 874                 raise
 875             except Exception as e:
 876                 if self.params.get('ignoreerrors', False):
 877                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 878                 else:
 879                     raise
 880         return wrapper
 881
 882     @__handle_extraction_exceptions
 883     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 884         ie_result = ie.extract(url)
 885         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 886             return
 887         if isinstance(ie_result, list):
 888             # Backwards compatibility: old IE result format
 889             ie_result = {
 890                 '_type': 'compat_list',
 891                 'entries': ie_result,
 892             }
 893         if info_dict:
 894             if info_dict.get('id'):
 895                 ie_result['id'] = info_dict['id']
 896             if info_dict.get('title'):
 897                 ie_result['title'] = info_dict['title']
 898         self.add_default_extra_info(ie_result, ie, url)
 899         if process:
 900             return self.process_ie_result(ie_result, download, extra_info)
 901         else:
 902             return ie_result
 903
 904     def add_default_extra_info(self, ie_result, ie, url):
 905         self.add_extra_info(ie_result, {
 906             'extractor': ie.IE_NAME,
 907             'webpage_url': url,
 908             'webpage_url_basename': url_basename(url),
 909             'extractor_key': ie.ie_key(),
 910         })
 911
 912     def process_ie_result(self, ie_result, download=True, extra_info={}):
 913         """
 914         Take the result of the ie(may be modified) and resolve all unresolved
 915         references (URLs, playlist items).
 916
 917         It will also download the videos if 'download'.
 918         Returns the resolved ie_result.
 919         """
 920         result_type = ie_result.get('_type', 'video')
 921
 922         if result_type in ('url', 'url_transparent'):
 923             ie_result['url'] = sanitize_url(ie_result['url'])
 924             extract_flat = self.params.get('extract_flat', False)
 925             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 926                     or extract_flat is True):
 927                 self.__forced_printings(
 928                     ie_result, self.prepare_filename(ie_result),
 929                     incomplete=True)
 930                 return ie_result
 931
 932         if result_type == 'video':
 933             self.add_extra_info(ie_result, extra_info)
 934             return self.process_video_result(ie_result, download=download)
 935         elif result_type == 'url':
 936             # We have to add extra_info to the results because it may be
 937             # contained in a playlist
 938             return self.extract_info(ie_result['url'],
 939                                      download, info_dict=ie_result,
 940                                      ie_key=ie_result.get('ie_key'),
 941                                      extra_info=extra_info)
 942         elif result_type == 'url_transparent':
 943             # Use the information from the embedding page
 944             info = self.extract_info(
 945                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 946                 extra_info=extra_info, download=False, process=False)
 947
 948             # extract_info may return None when ignoreerrors is enabled and
 949             # extraction failed with an error, don't crash and return early
 950             # in this case
 951             if not info:
 952                 return info
 953
 954             force_properties = dict(
 955                 (k, v) for k, v in ie_result.items() if v is not None)
 956             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 957                 if f in force_properties:
 958                     del force_properties[f]
 959             new_result = info.copy()
 960             new_result.update(force_properties)
 961
 962             # Extracted info may not be a video result (i.e.
 963             # info.get('_type', 'video') != video) but rather an url or
 964             # url_transparent. In such cases outer metadata (from ie_result)
 965             # should be propagated to inner one (info). For this to happen
 966             # _type of info should be overridden with url_transparent. This
 967             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 968             if new_result.get('_type') == 'url':
 969                 new_result['_type'] = 'url_transparent'
 970
 971             return self.process_ie_result(
 972                 new_result, download=download, extra_info=extra_info)
 973         elif result_type in ('playlist', 'multi_video'):
 974             # We process each entry in the playlist
 975             playlist = ie_result.get('title') or ie_result.get('id')
 976             self.to_screen('[download] Downloading playlist: %s' % playlist)
 977
 978             playlist_results = []
 979
 980             playliststart = self.params.get('playliststart', 1) - 1
 981             playlistend = self.params.get('playlistend')
 982             # For backwards compatibility, interpret -1 as whole list
 983             if playlistend == -1:
 984                 playlistend = None
 985
 986             playlistitems_str = self.params.get('playlist_items')
 987             playlistitems = None
 988             if playlistitems_str is not None:
 989                 def iter_playlistitems(format):
 990                     for string_segment in format.split(','):
 991                         if '-' in string_segment:
 992                             start, end = string_segment.split('-')
 993                             for item in range(int(start), int(end) + 1):
 994                                 yield int(item)
 995                         else:
 996                             yield int(string_segment)
 997                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
 998
 999             ie_entries = ie_result['entries']
1000
1001             def make_playlistitems_entries(list_ie_entries):
1002                 num_entries = len(list_ie_entries)
1003                 return [
1004                     list_ie_entries[i - 1] for i in playlistitems
1005                     if -num_entries <= i - 1 < num_entries]
1006
1007             def report_download(num_entries):
1008                 self.to_screen(
1009                     '[%s] playlist %s: Downloading %d videos' %
1010                     (ie_result['extractor'], playlist, num_entries))
1011
1012             if isinstance(ie_entries, list):
1013                 n_all_entries = len(ie_entries)
1014                 if playlistitems:
1015                     entries = make_playlistitems_entries(ie_entries)
1016                 else:
1017                     entries = ie_entries[playliststart:playlistend]
1018                 n_entries = len(entries)
1019                 self.to_screen(
1020                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1021                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
1022             elif isinstance(ie_entries, PagedList):
1023                 if playlistitems:
1024                     entries = []
1025                     for item in playlistitems:
1026                         entries.extend(ie_entries.getslice(
1027                             item - 1, item
1028                         ))
1029                 else:
1030                     entries = ie_entries.getslice(
1031                         playliststart, playlistend)
1032                 n_entries = len(entries)
1033                 report_download(n_entries)
1034             else:  # iterable
1035                 if playlistitems:
1036                     entries = make_playlistitems_entries(list(itertools.islice(
1037                         ie_entries, 0, max(playlistitems))))
1038                 else:
1039                     entries = list(itertools.islice(
1040                         ie_entries, playliststart, playlistend))
1041                 n_entries = len(entries)
1042                 report_download(n_entries)
1043
1044             if self.params.get('playlistreverse', False):
1045                 entries = entries[::-1]
1046
1047             if self.params.get('playlistrandom', False):
1048                 random.shuffle(entries)
1049
1050             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1051
1052             for i, entry in enumerate(entries, 1):
1053                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1054                 # This __x_forwarded_for_ip thing is a bit ugly but requires
1055                 # minimal changes
1056                 if x_forwarded_for:
1057                     entry['__x_forwarded_for_ip'] = x_forwarded_for
1058                 extra = {
1059                     'n_entries': n_entries,
1060                     'playlist': playlist,
1061                     'playlist_id': ie_result.get('id'),
1062                     'playlist_title': ie_result.get('title'),
1063                     'playlist_uploader': ie_result.get('uploader'),
1064                     'playlist_uploader_id': ie_result.get('uploader_id'),
1065                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1066                     'extractor': ie_result['extractor'],
1067                     'webpage_url': ie_result['webpage_url'],
1068                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1069                     'extractor_key': ie_result['extractor_key'],
1070                 }
1071
1072                 reason = self._match_entry(entry, incomplete=True)
1073                 if reason is not None:
1074                     if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1075                         print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1076                         break
1077                     else:
1078                         self.to_screen('[download] ' + reason)
1079                         continue
1080
1081                 entry_result = self.__process_iterable_entry(entry, download, extra)
1082                 # TODO: skip failed (empty) entries?
1083                 playlist_results.append(entry_result)
1084             ie_result['entries'] = playlist_results
1085             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1086             return ie_result
1087         elif result_type == 'compat_list':
1088             self.report_warning(
1089                 'Extractor %s returned a compat_list result. '
1090                 'It needs to be updated.' % ie_result.get('extractor'))
1091
1092             def _fixup(r):
1093                 self.add_extra_info(
1094                     r,
1095                     {
1096                         'extractor': ie_result['extractor'],
1097                         'webpage_url': ie_result['webpage_url'],
1098                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1099                         'extractor_key': ie_result['extractor_key'],
1100                     }
1101                 )
1102                 return r
1103             ie_result['entries'] = [
1104                 self.process_ie_result(_fixup(r), download, extra_info)
1105                 for r in ie_result['entries']
1106             ]
1107             return ie_result
1108         else:
1109             raise Exception('Invalid result type: %s' % result_type)
1110
1111     @__handle_extraction_exceptions
1112     def __process_iterable_entry(self, entry, download, extra_info):
1113         return self.process_ie_result(
1114             entry, download=download, extra_info=extra_info)
1115
1116     def _build_format_filter(self, filter_spec):
1117         " Returns a function to filter the formats according to the filter_spec "
1118
1119         OPERATORS = {
1120             '<': operator.lt,
1121             '<=': operator.le,
1122             '>': operator.gt,
1123             '>=': operator.ge,
1124             '=': operator.eq,
1125             '!=': operator.ne,
1126         }
1127         operator_rex = re.compile(r'''(?x)\s*
1128             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1129             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1130             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1131             $
1132             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1133         m = operator_rex.search(filter_spec)
1134         if m:
1135             try:
1136                 comparison_value = int(m.group('value'))
1137             except ValueError:
1138                 comparison_value = parse_filesize(m.group('value'))
1139                 if comparison_value is None:
1140                     comparison_value = parse_filesize(m.group('value') + 'B')
1141                 if comparison_value is None:
1142                     raise ValueError(
1143                         'Invalid value %r in format specification %r' % (
1144                             m.group('value'), filter_spec))
1145             op = OPERATORS[m.group('op')]
1146
1147         if not m:
1148             STR_OPERATORS = {
1149                 '=': operator.eq,
1150                 '^=': lambda attr, value: attr.startswith(value),
1151                 '$=': lambda attr, value: attr.endswith(value),
1152                 '*=': lambda attr, value: value in attr,
1153             }
1154             str_operator_rex = re.compile(r'''(?x)
1155                 \s*(?P<key>[a-zA-Z0-9._-]+)
1156                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1157                 \s*(?P<value>[a-zA-Z0-9._-]+)
1158                 \s*$
1159                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1160             m = str_operator_rex.search(filter_spec)
1161             if m:
1162                 comparison_value = m.group('value')
1163                 str_op = STR_OPERATORS[m.group('op')]
1164                 if m.group('negation'):
1165                     op = lambda attr, value: not str_op(attr, value)
1166                 else:
1167                     op = str_op
1168
1169         if not m:
1170             raise ValueError('Invalid filter specification %r' % filter_spec)
1171
1172         def _filter(f):
1173             actual_value = f.get(m.group('key'))
1174             if actual_value is None:
1175                 return m.group('none_inclusive')
1176             return op(actual_value, comparison_value)
1177         return _filter
1178
1179     def _default_format_spec(self, info_dict, download=True):
1180
1181         def can_merge():
1182             merger = FFmpegMergerPP(self)
1183             return merger.available and merger.can_merge()
1184
1185         def prefer_best():
1186             if self.params.get('simulate', False):
1187                 return False
1188             if not download:
1189                 return False
1190             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1191                 return True
1192             if info_dict.get('is_live'):
1193                 return True
1194             if not can_merge():
1195                 return True
1196             return False
1197
1198         req_format_list = ['bestvideo+bestaudio', 'best']
1199         if prefer_best():
1200             req_format_list.reverse()
1201         return '/'.join(req_format_list)
1202
1203     def build_format_selector(self, format_spec):
1204         def syntax_error(note, start):
1205             message = (
1206                 'Invalid format specification: '
1207                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1208             return SyntaxError(message)
1209
1210         PICKFIRST = 'PICKFIRST'
1211         MERGE = 'MERGE'
1212         SINGLE = 'SINGLE'
1213         GROUP = 'GROUP'
1214         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1215
1216         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),
1217                                   'video': self.params.get('allow_multiple_video_streams', True)}
1218
1219         def _parse_filter(tokens):
1220             filter_parts = []
1221             for type, string, start, _, _ in tokens:
1222                 if type == tokenize.OP and string == ']':
1223                     return ''.join(filter_parts)
1224                 else:
1225                     filter_parts.append(string)
1226
1227         def _remove_unused_ops(tokens):
1228             # Remove operators that we don't use and join them with the surrounding strings
1229             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1230             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1231             last_string, last_start, last_end, last_line = None, None, None, None
1232             for type, string, start, end, line in tokens:
1233                 if type == tokenize.OP and string == '[':
1234                     if last_string:
1235                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1236                         last_string = None
1237                     yield type, string, start, end, line
1238                     # everything inside brackets will be handled by _parse_filter
1239                     for type, string, start, end, line in tokens:
1240                         yield type, string, start, end, line
1241                         if type == tokenize.OP and string == ']':
1242                             break
1243                 elif type == tokenize.OP and string in ALLOWED_OPS:
1244                     if last_string:
1245                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1246                         last_string = None
1247                     yield type, string, start, end, line
1248                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1249                     if not last_string:
1250                         last_string = string
1251                         last_start = start
1252                         last_end = end
1253                     else:
1254                         last_string += string
1255             if last_string:
1256                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1257
1258         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1259             selectors = []
1260             current_selector = None
1261             for type, string, start, _, _ in tokens:
1262                 # ENCODING is only defined in python 3.x
1263                 if type == getattr(tokenize, 'ENCODING', None):
1264                     continue
1265                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1266                     current_selector = FormatSelector(SINGLE, string, [])
1267                 elif type == tokenize.OP:
1268                     if string == ')':
1269                         if not inside_group:
1270                             # ')' will be handled by the parentheses group
1271                             tokens.restore_last_token()
1272                         break
1273                     elif inside_merge and string in ['/', ',']:
1274                         tokens.restore_last_token()
1275                         break
1276                     elif inside_choice and string == ',':
1277                         tokens.restore_last_token()
1278                         break
1279                     elif string == ',':
1280                         if not current_selector:
1281                             raise syntax_error('"," must follow a format selector', start)
1282                         selectors.append(current_selector)
1283                         current_selector = None
1284                     elif string == '/':
1285                         if not current_selector:
1286                             raise syntax_error('"/" must follow a format selector', start)
1287                         first_choice = current_selector
1288                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1289                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1290                     elif string == '[':
1291                         if not current_selector:
1292                             current_selector = FormatSelector(SINGLE, 'best', [])
1293                         format_filter = _parse_filter(tokens)
1294                         current_selector.filters.append(format_filter)
1295                     elif string == '(':
1296                         if current_selector:
1297                             raise syntax_error('Unexpected "("', start)
1298                         group = _parse_format_selection(tokens, inside_group=True)
1299                         current_selector = FormatSelector(GROUP, group, [])
1300                     elif string == '+':
1301                         if not current_selector:
1302                             raise syntax_error('Unexpected "+"', start)
1303                         selector_1 = current_selector
1304                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1305                         if not selector_2:
1306                             raise syntax_error('Expected a selector', start)
1307                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1308                     else:
1309                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1310                 elif type == tokenize.ENDMARKER:
1311                     break
1312             if current_selector:
1313                 selectors.append(current_selector)
1314             return selectors
1315
1316         def _build_selector_function(selector):
1317             if isinstance(selector, list):  # ,
1318                 fs = [_build_selector_function(s) for s in selector]
1319
1320                 def selector_function(ctx):
1321                     for f in fs:
1322                         for format in f(ctx):
1323                             yield format
1324                 return selector_function
1325
1326             elif selector.type == GROUP:  # ()
1327                 selector_function = _build_selector_function(selector.selector)
1328
1329             elif selector.type == PICKFIRST:  # /
1330                 fs = [_build_selector_function(s) for s in selector.selector]
1331
1332                 def selector_function(ctx):
1333                     for f in fs:
1334                         picked_formats = list(f(ctx))
1335                         if picked_formats:
1336                             return picked_formats
1337                     return []
1338
1339             elif selector.type == SINGLE:  # atom
1340                 format_spec = selector.selector if selector.selector is not None else 'best'
1341
1342                 if format_spec == 'all':
1343                     def selector_function(ctx):
1344                         formats = list(ctx['formats'])
1345                         if formats:
1346                             for f in formats:
1347                                 yield f
1348
1349                 else:
1350                     format_fallback = False
1351                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1352                     if format_spec_obj is not None:
1353                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1354                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1355                         not_format_type = 'v' if format_type == 'a' else 'a'
1356                         format_modified = format_spec_obj.group(3) is not None
1357
1358                         format_fallback = not format_type and not format_modified  # for b, w
1359                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1360                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1361                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1362                                     if format_type  # bv, ba, wv, wa
1363                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1364                                     if not format_modified  # b, w
1365                                     else None)  # b*, w*
1366                     else:
1367                         format_idx = -1
1368                         filter_f = ((lambda f: f.get('ext') == format_spec)
1369                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1370                                     else (lambda f: f.get('format_id') == format_spec))  # id
1371
1372                     def selector_function(ctx):
1373                         formats = list(ctx['formats'])
1374                         if not formats:
1375                             return
1376                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1377                         if matches:
1378                             yield matches[format_idx]
1379                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1380                             # for extractors with incomplete formats (audio only (soundcloud)
1381                             # or video only (imgur)) best/worst will fallback to
1382                             # best/worst {video,audio}-only format
1383                             yield formats[format_idx]
1384
1385             elif selector.type == MERGE:        # +
1386                 def _merge(formats_pair):
1387                     format_1, format_2 = formats_pair
1388
1389                     formats_info = []
1390                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1391                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1392
1393                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1394                         get_no_more = {"video": False, "audio": False}
1395                         for (i, fmt_info) in enumerate(formats_info):
1396                             for aud_vid in ["audio", "video"]:
1397                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1398                                     if get_no_more[aud_vid]:
1399                                         formats_info.pop(i)
1400                                     get_no_more[aud_vid] = True
1401
1402                     if len(formats_info) == 1:
1403                         return formats_info[0]
1404
1405                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1406                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1407
1408                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1409                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1410
1411                     output_ext = self.params.get('merge_output_format')
1412                     if not output_ext:
1413                         if the_only_video:
1414                             output_ext = the_only_video['ext']
1415                         elif the_only_audio and not video_fmts:
1416                             output_ext = the_only_audio['ext']
1417                         else:
1418                             output_ext = 'mkv'
1419
1420                     new_dict = {
1421                         'requested_formats': formats_info,
1422                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1423                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1424                         'ext': output_ext,
1425                     }
1426
1427                     if the_only_video:
1428                         new_dict.update({
1429                             'width': the_only_video.get('width'),
1430                             'height': the_only_video.get('height'),
1431                             'resolution': the_only_video.get('resolution'),
1432                             'fps': the_only_video.get('fps'),
1433                             'vcodec': the_only_video.get('vcodec'),
1434                             'vbr': the_only_video.get('vbr'),
1435                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1436                         })
1437
1438                     if the_only_audio:
1439                         new_dict.update({
1440                             'acodec': the_only_audio.get('acodec'),
1441                             'abr': the_only_audio.get('abr'),
1442                         })
1443
1444                     return new_dict
1445
1446                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1447
1448                 def selector_function(ctx):
1449                     for pair in itertools.product(
1450                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1451                         yield _merge(pair)
1452
1453             filters = [self._build_format_filter(f) for f in selector.filters]
1454
1455             def final_selector(ctx):
1456                 ctx_copy = copy.deepcopy(ctx)
1457                 for _filter in filters:
1458                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1459                 return selector_function(ctx_copy)
1460             return final_selector
1461
1462         stream = io.BytesIO(format_spec.encode('utf-8'))
1463         try:
1464             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1465         except tokenize.TokenError:
1466             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1467
1468         class TokenIterator(object):
1469             def __init__(self, tokens):
1470                 self.tokens = tokens
1471                 self.counter = 0
1472
1473             def __iter__(self):
1474                 return self
1475
1476             def __next__(self):
1477                 if self.counter >= len(self.tokens):
1478                     raise StopIteration()
1479                 value = self.tokens[self.counter]
1480                 self.counter += 1
1481                 return value
1482
1483             next = __next__
1484
1485             def restore_last_token(self):
1486                 self.counter -= 1
1487
1488         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1489         return _build_selector_function(parsed_selector)
1490
1491     def _calc_headers(self, info_dict):
1492         res = std_headers.copy()
1493
1494         add_headers = info_dict.get('http_headers')
1495         if add_headers:
1496             res.update(add_headers)
1497
1498         cookies = self._calc_cookies(info_dict)
1499         if cookies:
1500             res['Cookie'] = cookies
1501
1502         if 'X-Forwarded-For' not in res:
1503             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1504             if x_forwarded_for_ip:
1505                 res['X-Forwarded-For'] = x_forwarded_for_ip
1506
1507         return res
1508
1509     def _calc_cookies(self, info_dict):
1510         pr = sanitized_Request(info_dict['url'])
1511         self.cookiejar.add_cookie_header(pr)
1512         return pr.get_header('Cookie')
1513
1514     def process_video_result(self, info_dict, download=True):
1515         assert info_dict.get('_type', 'video') == 'video'
1516
1517         if 'id' not in info_dict:
1518             raise ExtractorError('Missing "id" field in extractor result')
1519         if 'title' not in info_dict:
1520             raise ExtractorError('Missing "title" field in extractor result')
1521
1522         def report_force_conversion(field, field_not, conversion):
1523             self.report_warning(
1524                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1525                 % (field, field_not, conversion))
1526
1527         def sanitize_string_field(info, string_field):
1528             field = info.get(string_field)
1529             if field is None or isinstance(field, compat_str):
1530                 return
1531             report_force_conversion(string_field, 'a string', 'string')
1532             info[string_field] = compat_str(field)
1533
1534         def sanitize_numeric_fields(info):
1535             for numeric_field in self._NUMERIC_FIELDS:
1536                 field = info.get(numeric_field)
1537                 if field is None or isinstance(field, compat_numeric_types):
1538                     continue
1539                 report_force_conversion(numeric_field, 'numeric', 'int')
1540                 info[numeric_field] = int_or_none(field)
1541
1542         sanitize_string_field(info_dict, 'id')
1543         sanitize_numeric_fields(info_dict)
1544
1545         if 'playlist' not in info_dict:
1546             # It isn't part of a playlist
1547             info_dict['playlist'] = None
1548             info_dict['playlist_index'] = None
1549
1550         thumbnails = info_dict.get('thumbnails')
1551         if thumbnails is None:
1552             thumbnail = info_dict.get('thumbnail')
1553             if thumbnail:
1554                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1555         if thumbnails:
1556             thumbnails.sort(key=lambda t: (
1557                 t.get('preference') if t.get('preference') is not None else -1,
1558                 t.get('width') if t.get('width') is not None else -1,
1559                 t.get('height') if t.get('height') is not None else -1,
1560                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1561             for i, t in enumerate(thumbnails):
1562                 t['url'] = sanitize_url(t['url'])
1563                 if t.get('width') and t.get('height'):
1564                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1565                 if t.get('id') is None:
1566                     t['id'] = '%d' % i
1567
1568         if self.params.get('list_thumbnails'):
1569             self.list_thumbnails(info_dict)
1570             return
1571
1572         thumbnail = info_dict.get('thumbnail')
1573         if thumbnail:
1574             info_dict['thumbnail'] = sanitize_url(thumbnail)
1575         elif thumbnails:
1576             info_dict['thumbnail'] = thumbnails[-1]['url']
1577
1578         if 'display_id' not in info_dict and 'id' in info_dict:
1579             info_dict['display_id'] = info_dict['id']
1580
1581         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1582             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1583             # see http://bugs.python.org/issue1646728)
1584             try:
1585                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1586                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1587             except (ValueError, OverflowError, OSError):
1588                 pass
1589
1590         # Auto generate title fields corresponding to the *_number fields when missing
1591         # in order to always have clean titles. This is very common for TV series.
1592         for field in ('chapter', 'season', 'episode'):
1593             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1594                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1595
1596         for cc_kind in ('subtitles', 'automatic_captions'):
1597             cc = info_dict.get(cc_kind)
1598             if cc:
1599                 for _, subtitle in cc.items():
1600                     for subtitle_format in subtitle:
1601                         if subtitle_format.get('url'):
1602                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1603                         if subtitle_format.get('ext') is None:
1604                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1605
1606         automatic_captions = info_dict.get('automatic_captions')
1607         subtitles = info_dict.get('subtitles')
1608
1609         if self.params.get('listsubtitles', False):
1610             if 'automatic_captions' in info_dict:
1611                 self.list_subtitles(
1612                     info_dict['id'], automatic_captions, 'automatic captions')
1613             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1614             return
1615
1616         info_dict['requested_subtitles'] = self.process_subtitles(
1617             info_dict['id'], subtitles, automatic_captions)
1618
1619         # We now pick which formats have to be downloaded
1620         if info_dict.get('formats') is None:
1621             # There's only one format available
1622             formats = [info_dict]
1623         else:
1624             formats = info_dict['formats']
1625
1626         if not formats:
1627             raise ExtractorError('No video formats found!')
1628
1629         def is_wellformed(f):
1630             url = f.get('url')
1631             if not url:
1632                 self.report_warning(
1633                     '"url" field is missing or empty - skipping format, '
1634                     'there is an error in extractor')
1635                 return False
1636             if isinstance(url, bytes):
1637                 sanitize_string_field(f, 'url')
1638             return True
1639
1640         # Filter out malformed formats for better extraction robustness
1641         formats = list(filter(is_wellformed, formats))
1642
1643         formats_dict = {}
1644
1645         # We check that all the formats have the format and format_id fields
1646         for i, format in enumerate(formats):
1647             sanitize_string_field(format, 'format_id')
1648             sanitize_numeric_fields(format)
1649             format['url'] = sanitize_url(format['url'])
1650             if not format.get('format_id'):
1651                 format['format_id'] = compat_str(i)
1652             else:
1653                 # Sanitize format_id from characters used in format selector expression
1654                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1655             format_id = format['format_id']
1656             if format_id not in formats_dict:
1657                 formats_dict[format_id] = []
1658             formats_dict[format_id].append(format)
1659
1660         # Make sure all formats have unique format_id
1661         for format_id, ambiguous_formats in formats_dict.items():
1662             if len(ambiguous_formats) > 1:
1663                 for i, format in enumerate(ambiguous_formats):
1664                     format['format_id'] = '%s-%d' % (format_id, i)
1665
1666         for i, format in enumerate(formats):
1667             if format.get('format') is None:
1668                 format['format'] = '{id} - {res}{note}'.format(
1669                     id=format['format_id'],
1670                     res=self.format_resolution(format),
1671                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1672                 )
1673             # Automatically determine file extension if missing
1674             if format.get('ext') is None:
1675                 format['ext'] = determine_ext(format['url']).lower()
1676             # Automatically determine protocol if missing (useful for format
1677             # selection purposes)
1678             if format.get('protocol') is None:
1679                 format['protocol'] = determine_protocol(format)
1680             # Add HTTP headers, so that external programs can use them from the
1681             # json output
1682             full_format_info = info_dict.copy()
1683             full_format_info.update(format)
1684             format['http_headers'] = self._calc_headers(full_format_info)
1685         # Remove private housekeeping stuff
1686         if '__x_forwarded_for_ip' in info_dict:
1687             del info_dict['__x_forwarded_for_ip']
1688
1689         # TODO Central sorting goes here
1690
1691         if formats[0] is not info_dict:
1692             # only set the 'formats' fields if the original info_dict list them
1693             # otherwise we end up with a circular reference, the first (and unique)
1694             # element in the 'formats' field in info_dict is info_dict itself,
1695             # which can't be exported to json
1696             info_dict['formats'] = formats
1697         if self.params.get('listformats'):
1698             self.list_formats(info_dict)
1699             return
1700
1701         req_format = self.params.get('format')
1702         if req_format is None:
1703             req_format = self._default_format_spec(info_dict, download=download)
1704             if self.params.get('verbose'):
1705                 self.to_stdout('[debug] Default format spec: %s' % req_format)
1706
1707         format_selector = self.build_format_selector(req_format)
1708
1709         # While in format selection we may need to have an access to the original
1710         # format set in order to calculate some metrics or do some processing.
1711         # For now we need to be able to guess whether original formats provided
1712         # by extractor are incomplete or not (i.e. whether extractor provides only
1713         # video-only or audio-only formats) for proper formats selection for
1714         # extractors with such incomplete formats (see
1715         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1716         # Since formats may be filtered during format selection and may not match
1717         # the original formats the results may be incorrect. Thus original formats
1718         # or pre-calculated metrics should be passed to format selection routines
1719         # as well.
1720         # We will pass a context object containing all necessary additional data
1721         # instead of just formats.
1722         # This fixes incorrect format selection issue (see
1723         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1724         incomplete_formats = (
1725             # All formats are video-only or
1726             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1727             # all formats are audio-only
1728             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1729
1730         ctx = {
1731             'formats': formats,
1732             'incomplete_formats': incomplete_formats,
1733         }
1734
1735         formats_to_download = list(format_selector(ctx))
1736         if not formats_to_download:
1737             raise ExtractorError('requested format not available',
1738                                  expected=True)
1739
1740         if download:
1741             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1742             if len(formats_to_download) > 1:
1743                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1744             for format in formats_to_download:
1745                 new_info = dict(info_dict)
1746                 new_info.update(format)
1747                 self.process_info(new_info)
1748         # We update the info dict with the best quality format (backwards compatibility)
1749         info_dict.update(formats_to_download[-1])
1750         return info_dict
1751
1752     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1753         """Select the requested subtitles and their format"""
1754         available_subs = {}
1755         if normal_subtitles and self.params.get('writesubtitles'):
1756             available_subs.update(normal_subtitles)
1757         if automatic_captions and self.params.get('writeautomaticsub'):
1758             for lang, cap_info in automatic_captions.items():
1759                 if lang not in available_subs:
1760                     available_subs[lang] = cap_info
1761
1762         if (not self.params.get('writesubtitles') and not
1763                 self.params.get('writeautomaticsub') or not
1764                 available_subs):
1765             return None
1766
1767         if self.params.get('allsubtitles', False):
1768             requested_langs = available_subs.keys()
1769         else:
1770             if self.params.get('subtitleslangs', False):
1771                 requested_langs = self.params.get('subtitleslangs')
1772             elif 'en' in available_subs:
1773                 requested_langs = ['en']
1774             else:
1775                 requested_langs = [list(available_subs.keys())[0]]
1776
1777         formats_query = self.params.get('subtitlesformat', 'best')
1778         formats_preference = formats_query.split('/') if formats_query else []
1779         subs = {}
1780         for lang in requested_langs:
1781             formats = available_subs.get(lang)
1782             if formats is None:
1783                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1784                 continue
1785             for ext in formats_preference:
1786                 if ext == 'best':
1787                     f = formats[-1]
1788                     break
1789                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1790                 if matches:
1791                     f = matches[-1]
1792                     break
1793             else:
1794                 f = formats[-1]
1795                 self.report_warning(
1796                     'No subtitle format found matching "%s" for language %s, '
1797                     'using %s' % (formats_query, lang, f['ext']))
1798             subs[lang] = f
1799         return subs
1800
1801     def __forced_printings(self, info_dict, filename, incomplete):
1802         def print_mandatory(field):
1803             if (self.params.get('force%s' % field, False)
1804                     and (not incomplete or info_dict.get(field) is not None)):
1805                 self.to_stdout(info_dict[field])
1806
1807         def print_optional(field):
1808             if (self.params.get('force%s' % field, False)
1809                     and info_dict.get(field) is not None):
1810                 self.to_stdout(info_dict[field])
1811
1812         print_mandatory('title')
1813         print_mandatory('id')
1814         if self.params.get('forceurl', False) and not incomplete:
1815             if info_dict.get('requested_formats') is not None:
1816                 for f in info_dict['requested_formats']:
1817                     self.to_stdout(f['url'] + f.get('play_path', ''))
1818             else:
1819                 # For RTMP URLs, also include the playpath
1820                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1821         print_optional('thumbnail')
1822         print_optional('description')
1823         if self.params.get('forcefilename', False) and filename is not None:
1824             self.to_stdout(filename)
1825         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1826             self.to_stdout(formatSeconds(info_dict['duration']))
1827         print_mandatory('format')
1828         if self.params.get('forcejson', False):
1829             self.to_stdout(json.dumps(info_dict))
1830
1831     def process_info(self, info_dict):
1832         """Process a single resolved IE result."""
1833
1834         assert info_dict.get('_type', 'video') == 'video'
1835
1836         max_downloads = self.params.get('max_downloads')
1837         if max_downloads is not None:
1838             if self._num_downloads >= int(max_downloads):
1839                 raise MaxDownloadsReached()
1840
1841         # TODO: backward compatibility, to be removed
1842         info_dict['fulltitle'] = info_dict['title']
1843
1844         if 'format' not in info_dict:
1845             info_dict['format'] = info_dict['ext']
1846
1847         reason = self._match_entry(info_dict, incomplete=False)
1848         if reason is not None:
1849             self.to_screen('[download] ' + reason)
1850             return
1851
1852         self._num_downloads += 1
1853
1854         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1855
1856         # Forced printings
1857         self.__forced_printings(info_dict, filename, incomplete=False)
1858
1859         # Do nothing else if in simulate mode
1860         if self.params.get('simulate', False):
1861             return
1862
1863         if filename is None:
1864             return
1865
1866         def ensure_dir_exists(path):
1867             try:
1868                 dn = os.path.dirname(path)
1869                 if dn and not os.path.exists(dn):
1870                     os.makedirs(dn)
1871                 return True
1872             except (OSError, IOError) as err:
1873                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1874                 return False
1875
1876         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1877             return
1878
1879         if self.params.get('writedescription', False):
1880             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1881             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1882                 self.to_screen('[info] Video description is already present')
1883             elif info_dict.get('description') is None:
1884                 self.report_warning('There\'s no description to write.')
1885             else:
1886                 try:
1887                     self.to_screen('[info] Writing video description to: ' + descfn)
1888                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1889                         descfile.write(info_dict['description'])
1890                 except (OSError, IOError):
1891                     self.report_error('Cannot write description file ' + descfn)
1892                     return
1893
1894         if self.params.get('writeannotations', False):
1895             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1896             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1897                 self.to_screen('[info] Video annotations are already present')
1898             elif not info_dict.get('annotations'):
1899                 self.report_warning('There are no annotations to write.')
1900             else:
1901                 try:
1902                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1903                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1904                         annofile.write(info_dict['annotations'])
1905                 except (KeyError, TypeError):
1906                     self.report_warning('There are no annotations to write.')
1907                 except (OSError, IOError):
1908                     self.report_error('Cannot write annotations file: ' + annofn)
1909                     return
1910
1911         def dl(name, info, subtitle=False):
1912             fd = get_suitable_downloader(info, self.params)(self, self.params)
1913             for ph in self._progress_hooks:
1914                 fd.add_progress_hook(ph)
1915             if self.params.get('verbose'):
1916                 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1917             return fd.download(name, info, subtitle)
1918
1919         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1920                                        self.params.get('writeautomaticsub')])
1921
1922         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1923             # subtitles download errors are already managed as troubles in relevant IE
1924             # that way it will silently go on when used with unsupporting IE
1925             subtitles = info_dict['requested_subtitles']
1926             # ie = self.get_info_extractor(info_dict['extractor_key'])
1927             for sub_lang, sub_info in subtitles.items():
1928                 sub_format = sub_info['ext']
1929                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1930                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1931                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1932                 else:
1933                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1934                     if sub_info.get('data') is not None:
1935                         try:
1936                             # Use newline='' to prevent conversion of newline characters
1937                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1938                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1939                                 subfile.write(sub_info['data'])
1940                         except (OSError, IOError):
1941                             self.report_error('Cannot write subtitles file ' + sub_filename)
1942                             return
1943                     else:
1944                         try:
1945                             dl(sub_filename, sub_info, subtitle=True)
1946                             '''
1947                             if self.params.get('sleep_interval_subtitles', False):
1948                                 dl(sub_filename, sub_info)
1949                             else:
1950                                 sub_data = ie._request_webpage(
1951                                     sub_info['url'], info_dict['id'], note=False).read()
1952                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1953                                     subfile.write(sub_data)
1954                             '''
1955                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1956                             self.report_warning('Unable to download subtitle for "%s": %s' %
1957                                                 (sub_lang, error_to_compat_str(err)))
1958                             continue
1959
1960         if self.params.get('skip_download', False):
1961             if self.params.get('convertsubtitles', False):
1962                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1963                 filename_real_ext = os.path.splitext(filename)[1][1:]
1964                 filename_wo_ext = (
1965                     os.path.splitext(filename)[0]
1966                     if filename_real_ext == info_dict['ext']
1967                     else filename)
1968                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1969                 if subconv.available:
1970                     info_dict.setdefault('__postprocessors', [])
1971                     # info_dict['__postprocessors'].append(subconv)
1972                 if os.path.exists(encodeFilename(afilename)):
1973                     self.to_screen(
1974                         '[download] %s has already been downloaded and '
1975                         'converted' % afilename)
1976                 else:
1977                     try:
1978                         self.post_process(filename, info_dict)
1979                     except (PostProcessingError) as err:
1980                         self.report_error('postprocessing: %s' % str(err))
1981                         return
1982
1983         if self.params.get('writeinfojson', False):
1984             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1985             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1986                 self.to_screen('[info] Video description metadata is already present')
1987             else:
1988                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1989                 try:
1990                     write_json_file(self.filter_requested_info(info_dict), infofn)
1991                 except (OSError, IOError):
1992                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1993                     return
1994
1995         self._write_thumbnails(info_dict, filename)
1996
1997         # Write internet shortcut files
1998         url_link = webloc_link = desktop_link = False
1999         if self.params.get('writelink', False):
2000             if sys.platform == "darwin":  # macOS.
2001                 webloc_link = True
2002             elif sys.platform.startswith("linux"):
2003                 desktop_link = True
2004             else:  # if sys.platform in ['win32', 'cygwin']:
2005                 url_link = True
2006         if self.params.get('writeurllink', False):
2007             url_link = True
2008         if self.params.get('writewebloclink', False):
2009             webloc_link = True
2010         if self.params.get('writedesktoplink', False):
2011             desktop_link = True
2012
2013         if url_link or webloc_link or desktop_link:
2014             if 'webpage_url' not in info_dict:
2015                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2016                 return
2017             ascii_url = iri_to_uri(info_dict['webpage_url'])
2018
2019         def _write_link_file(extension, template, newline, embed_filename):
2020             linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2021             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2022                 self.to_screen('[info] Internet shortcut is already present')
2023             else:
2024                 try:
2025                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2026                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2027                         template_vars = {'url': ascii_url}
2028                         if embed_filename:
2029                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2030                         linkfile.write(template % template_vars)
2031                 except (OSError, IOError):
2032                     self.report_error('Cannot write internet shortcut ' + linkfn)
2033                     return False
2034             return True
2035
2036         if url_link:
2037             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2038                 return
2039         if webloc_link:
2040             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2041                 return
2042         if desktop_link:
2043             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2044                 return
2045
2046         # Download
2047         must_record_download_archive = False
2048         if not self.params.get('skip_download', False):
2049             try:
2050                 if info_dict.get('requested_formats') is not None:
2051                     downloaded = []
2052                     success = True
2053                     merger = FFmpegMergerPP(self)
2054                     if not merger.available:
2055                         postprocessors = []
2056                         self.report_warning('You have requested multiple '
2057                                             'formats but ffmpeg or avconv are not installed.'
2058                                             ' The formats won\'t be merged.')
2059                     else:
2060                         postprocessors = [merger]
2061
2062                     def compatible_formats(formats):
2063                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2064                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2065                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2066                         if len(video_formats) > 2 or len(audio_formats) > 2:
2067                             return False
2068
2069                         # Check extension
2070                         exts = set(format.get('ext') for format in formats)
2071                         COMPATIBLE_EXTS = (
2072                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2073                             set(('webm',)),
2074                         )
2075                         for ext_sets in COMPATIBLE_EXTS:
2076                             if ext_sets.issuperset(exts):
2077                                 return True
2078                         # TODO: Check acodec/vcodec
2079                         return False
2080
2081                     filename_real_ext = os.path.splitext(filename)[1][1:]
2082                     filename_wo_ext = (
2083                         os.path.splitext(filename)[0]
2084                         if filename_real_ext == info_dict['ext']
2085                         else filename)
2086                     requested_formats = info_dict['requested_formats']
2087                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2088                         info_dict['ext'] = 'mkv'
2089                         self.report_warning(
2090                             'Requested formats are incompatible for merge and will be merged into mkv.')
2091                     # Ensure filename always has a correct extension for successful merge
2092                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2093                     if os.path.exists(encodeFilename(filename)):
2094                         self.to_screen(
2095                             '[download] %s has already been downloaded and '
2096                             'merged' % filename)
2097                     else:
2098                         for f in requested_formats:
2099                             new_info = dict(info_dict)
2100                             new_info.update(f)
2101                             fname = prepend_extension(
2102                                 self.prepare_filename(new_info),
2103                                 'f%s' % f['format_id'], new_info['ext'])
2104                             if not ensure_dir_exists(fname):
2105                                 return
2106                             downloaded.append(fname)
2107                             partial_success = dl(fname, new_info)
2108                             success = success and partial_success
2109                         info_dict['__postprocessors'] = postprocessors
2110                         info_dict['__files_to_merge'] = downloaded
2111                 else:
2112                     # Just a single file
2113                     success = dl(filename, info_dict)
2114             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2115                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2116                 return
2117             except (OSError, IOError) as err:
2118                 raise UnavailableVideoError(err)
2119             except (ContentTooShortError, ) as err:
2120                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2121                 return
2122
2123             if success and filename != '-':
2124                 # Fixup content
2125                 fixup_policy = self.params.get('fixup')
2126                 if fixup_policy is None:
2127                     fixup_policy = 'detect_or_warn'
2128
2129                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2130
2131                 stretched_ratio = info_dict.get('stretched_ratio')
2132                 if stretched_ratio is not None and stretched_ratio != 1:
2133                     if fixup_policy == 'warn':
2134                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2135                             info_dict['id'], stretched_ratio))
2136                     elif fixup_policy == 'detect_or_warn':
2137                         stretched_pp = FFmpegFixupStretchedPP(self)
2138                         if stretched_pp.available:
2139                             info_dict.setdefault('__postprocessors', [])
2140                             info_dict['__postprocessors'].append(stretched_pp)
2141                         else:
2142                             self.report_warning(
2143                                 '%s: Non-uniform pixel ratio (%s). %s'
2144                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2145                     else:
2146                         assert fixup_policy in ('ignore', 'never')
2147
2148                 if (info_dict.get('requested_formats') is None
2149                         and info_dict.get('container') == 'm4a_dash'):
2150                     if fixup_policy == 'warn':
2151                         self.report_warning(
2152                             '%s: writing DASH m4a. '
2153                             'Only some players support this container.'
2154                             % info_dict['id'])
2155                     elif fixup_policy == 'detect_or_warn':
2156                         fixup_pp = FFmpegFixupM4aPP(self)
2157                         if fixup_pp.available:
2158                             info_dict.setdefault('__postprocessors', [])
2159                             info_dict['__postprocessors'].append(fixup_pp)
2160                         else:
2161                             self.report_warning(
2162                                 '%s: writing DASH m4a. '
2163                                 'Only some players support this container. %s'
2164                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2165                     else:
2166                         assert fixup_policy in ('ignore', 'never')
2167
2168                 if (info_dict.get('protocol') == 'm3u8_native'
2169                         or info_dict.get('protocol') == 'm3u8'
2170                         and self.params.get('hls_prefer_native')):
2171                     if fixup_policy == 'warn':
2172                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2173                             info_dict['id']))
2174                     elif fixup_policy == 'detect_or_warn':
2175                         fixup_pp = FFmpegFixupM3u8PP(self)
2176                         if fixup_pp.available:
2177                             info_dict.setdefault('__postprocessors', [])
2178                             info_dict['__postprocessors'].append(fixup_pp)
2179                         else:
2180                             self.report_warning(
2181                                 '%s: malformed AAC bitstream detected. %s'
2182                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2183                     else:
2184                         assert fixup_policy in ('ignore', 'never')
2185
2186                 try:
2187                     self.post_process(filename, info_dict)
2188                 except (PostProcessingError) as err:
2189                     self.report_error('postprocessing: %s' % str(err))
2190                     return
2191                 self.record_download_archive(info_dict)
2192
2193     def download(self, url_list):
2194         """Download a given list of URLs."""
2195         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2196         if (len(url_list) > 1
2197                 and outtmpl != '-'
2198                 and '%' not in outtmpl
2199                 and self.params.get('max_downloads') != 1):
2200             raise SameFileError(outtmpl)
2201
2202         for url in url_list:
2203             try:
2204                 # It also downloads the videos
2205                 res = self.extract_info(
2206                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2207             except UnavailableVideoError:
2208                 self.report_error('unable to download video')
2209             except MaxDownloadsReached:
2210                 self.to_screen('[info] Maximum number of downloaded files reached.')
2211                 raise
2212             else:
2213                 if self.params.get('dump_single_json', False):
2214                     self.to_stdout(json.dumps(res))
2215
2216         return self._download_retcode
2217
2218     def download_with_info_file(self, info_filename):
2219         with contextlib.closing(fileinput.FileInput(
2220                 [info_filename], mode='r',
2221                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2222             # FileInput doesn't have a read method, we can't call json.load
2223             info = self.filter_requested_info(json.loads('\n'.join(f)))
2224         try:
2225             self.process_ie_result(info, download=True)
2226         except DownloadError:
2227             webpage_url = info.get('webpage_url')
2228             if webpage_url is not None:
2229                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2230                 return self.download([webpage_url])
2231             else:
2232                 raise
2233         return self._download_retcode
2234
2235     @staticmethod
2236     def filter_requested_info(info_dict):
2237         return dict(
2238             (k, v) for k, v in info_dict.items()
2239             if k not in ['requested_formats', 'requested_subtitles'])
2240
2241     def post_process(self, filename, ie_info):
2242         """Run all the postprocessors on the given file."""
2243         info = dict(ie_info)
2244         info['filepath'] = filename
2245         pps_chain = []
2246         if ie_info.get('__postprocessors') is not None:
2247             pps_chain.extend(ie_info['__postprocessors'])
2248         pps_chain.extend(self._pps)
2249         for pp in pps_chain:
2250             files_to_delete = []
2251             try:
2252                 files_to_delete, info = pp.run(info)
2253             except PostProcessingError as e:
2254                 self.report_error(e.msg)
2255             if files_to_delete and not self.params.get('keepvideo', False):
2256                 for old_filename in set(files_to_delete):
2257                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2258                     try:
2259                         os.remove(encodeFilename(old_filename))
2260                     except (IOError, OSError):
2261                         self.report_warning('Unable to remove downloaded original file')
2262
2263     def _make_archive_id(self, info_dict):
2264         video_id = info_dict.get('id')
2265         if not video_id:
2266             return
2267         # Future-proof against any change in case
2268         # and backwards compatibility with prior versions
2269         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2270         if extractor is None:
2271             url = str_or_none(info_dict.get('url'))
2272             if not url:
2273                 return
2274             # Try to find matching extractor for the URL and take its ie_key
2275             for ie in self._ies:
2276                 if ie.suitable(url):
2277                     extractor = ie.ie_key()
2278                     break
2279             else:
2280                 return
2281         return extractor.lower() + ' ' + video_id
2282
2283     def in_download_archive(self, info_dict):
2284         fn = self.params.get('download_archive')
2285         if fn is None:
2286             return False
2287
2288         vid_id = self._make_archive_id(info_dict)
2289         if not vid_id:
2290             return False  # Incomplete video information
2291
2292         return vid_id in self.archive
2293
2294     def record_download_archive(self, info_dict):
2295         fn = self.params.get('download_archive')
2296         if fn is None:
2297             return
2298         vid_id = self._make_archive_id(info_dict)
2299         assert vid_id
2300         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2301             archive_file.write(vid_id + '\n')
2302         self.archive.add(vid_id)
2303
2304     @staticmethod
2305     def format_resolution(format, default='unknown'):
2306         if format.get('vcodec') == 'none':
2307             return 'audio only'
2308         if format.get('resolution') is not None:
2309             return format['resolution']
2310         if format.get('height') is not None:
2311             if format.get('width') is not None:
2312                 res = '%sx%s' % (format['width'], format['height'])
2313             else:
2314                 res = '%sp' % format['height']
2315         elif format.get('width') is not None:
2316             res = '%dx?' % format['width']
2317         else:
2318             res = default
2319         return res
2320
2321     def _format_note(self, fdict):
2322         res = ''
2323         if fdict.get('ext') in ['f4f', 'f4m']:
2324             res += '(unsupported) '
2325         if fdict.get('language'):
2326             if res:
2327                 res += ' '
2328             res += '[%s] ' % fdict['language']
2329         if fdict.get('format_note') is not None:
2330             res += fdict['format_note'] + ' '
2331         if fdict.get('tbr') is not None:
2332             res += '%4dk ' % fdict['tbr']
2333         if fdict.get('container') is not None:
2334             if res:
2335                 res += ', '
2336             res += '%s container' % fdict['container']
2337         if (fdict.get('vcodec') is not None
2338                 and fdict.get('vcodec') != 'none'):
2339             if res:
2340                 res += ', '
2341             res += fdict['vcodec']
2342             if fdict.get('vbr') is not None:
2343                 res += '@'
2344         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2345             res += 'video@'
2346         if fdict.get('vbr') is not None:
2347             res += '%4dk' % fdict['vbr']
2348         if fdict.get('fps') is not None:
2349             if res:
2350                 res += ', '
2351             res += '%sfps' % fdict['fps']
2352         if fdict.get('acodec') is not None:
2353             if res:
2354                 res += ', '
2355             if fdict['acodec'] == 'none':
2356                 res += 'video only'
2357             else:
2358                 res += '%-5s' % fdict['acodec']
2359         elif fdict.get('abr') is not None:
2360             if res:
2361                 res += ', '
2362             res += 'audio'
2363         if fdict.get('abr') is not None:
2364             res += '@%3dk' % fdict['abr']
2365         if fdict.get('asr') is not None:
2366             res += ' (%5dHz)' % fdict['asr']
2367         if fdict.get('filesize') is not None:
2368             if res:
2369                 res += ', '
2370             res += format_bytes(fdict['filesize'])
2371         elif fdict.get('filesize_approx') is not None:
2372             if res:
2373                 res += ', '
2374             res += '~' + format_bytes(fdict['filesize_approx'])
2375         return res
2376
2377     def list_formats(self, info_dict):
2378         formats = info_dict.get('formats', [info_dict])
2379         table = [
2380             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2381             for f in formats
2382             if f.get('preference') is None or f['preference'] >= -1000]
2383         # if len(formats) > 1:
2384         #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best*)'
2385
2386         header_line = ['format code', 'extension', 'resolution', 'note']
2387         self.to_screen(
2388             '[info] Available formats for %s:\n%s' %
2389             (info_dict['id'], render_table(header_line, table)))
2390
2391     def list_thumbnails(self, info_dict):
2392         thumbnails = info_dict.get('thumbnails')
2393         if not thumbnails:
2394             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2395             return
2396
2397         self.to_screen(
2398             '[info] Thumbnails for %s:' % info_dict['id'])
2399         self.to_screen(render_table(
2400             ['ID', 'width', 'height', 'URL'],
2401             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2402
2403     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2404         if not subtitles:
2405             self.to_screen('%s has no %s' % (video_id, name))
2406             return
2407         self.to_screen(
2408             'Available %s for %s:' % (name, video_id))
2409         self.to_screen(render_table(
2410             ['Language', 'formats'],
2411             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2412                 for lang, formats in subtitles.items()]))
2413
2414     def urlopen(self, req):
2415         """ Start an HTTP download """
2416         if isinstance(req, compat_basestring):
2417             req = sanitized_Request(req)
2418         return self._opener.open(req, timeout=self._socket_timeout)
2419
2420     def print_debug_header(self):
2421         if not self.params.get('verbose'):
2422             return
2423
2424         if type('') is not compat_str:
2425             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2426             self.report_warning(
2427                 'Your Python is broken! Update to a newer and supported version')
2428
2429         stdout_encoding = getattr(
2430             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2431         encoding_str = (
2432             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2433                 locale.getpreferredencoding(),
2434                 sys.getfilesystemencoding(),
2435                 stdout_encoding,
2436                 self.get_encoding()))
2437         write_string(encoding_str, encoding=None)
2438
2439         self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2440         if _LAZY_LOADER:
2441             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2442         try:
2443             sp = subprocess.Popen(
2444                 ['git', 'rev-parse', '--short', 'HEAD'],
2445                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2446                 cwd=os.path.dirname(os.path.abspath(__file__)))
2447             out, err = sp.communicate()
2448             out = out.decode().strip()
2449             if re.match('[0-9a-f]+', out):
2450                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2451         except Exception:
2452             try:
2453                 sys.exc_clear()
2454             except Exception:
2455                 pass
2456
2457         def python_implementation():
2458             impl_name = platform.python_implementation()
2459             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2460                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2461             return impl_name
2462
2463         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2464             platform.python_version(), python_implementation(),
2465             platform_name()))
2466
2467         exe_versions = FFmpegPostProcessor.get_versions(self)
2468         exe_versions['rtmpdump'] = rtmpdump_version()
2469         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2470         exe_str = ', '.join(
2471             '%s %s' % (exe, v)
2472             for exe, v in sorted(exe_versions.items())
2473             if v
2474         )
2475         if not exe_str:
2476             exe_str = 'none'
2477         self._write_string('[debug] exe versions: %s\n' % exe_str)
2478
2479         proxy_map = {}
2480         for handler in self._opener.handlers:
2481             if hasattr(handler, 'proxies'):
2482                 proxy_map.update(handler.proxies)
2483         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2484
2485         if self.params.get('call_home', False):
2486             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2487             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2488             latest_version = self.urlopen(
2489                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2490             if version_tuple(latest_version) > version_tuple(__version__):
2491                 self.report_warning(
2492                     'You are using an outdated version (newest version: %s)! '
2493                     'See https://yt-dl.org/update if you need help updating.' %
2494                     latest_version)
2495
2496     def _setup_opener(self):
2497         timeout_val = self.params.get('socket_timeout')
2498         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2499
2500         opts_cookiefile = self.params.get('cookiefile')
2501         opts_proxy = self.params.get('proxy')
2502
2503         if opts_cookiefile is None:
2504             self.cookiejar = compat_cookiejar.CookieJar()
2505         else:
2506             opts_cookiefile = expand_path(opts_cookiefile)
2507             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2508             if os.access(opts_cookiefile, os.R_OK):
2509                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2510
2511         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2512         if opts_proxy is not None:
2513             if opts_proxy == '':
2514                 proxies = {}
2515             else:
2516                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2517         else:
2518             proxies = compat_urllib_request.getproxies()
2519             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2520             if 'http' in proxies and 'https' not in proxies:
2521                 proxies['https'] = proxies['http']
2522         proxy_handler = PerRequestProxyHandler(proxies)
2523
2524         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2525         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2526         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2527         redirect_handler = YoutubeDLRedirectHandler()
2528         data_handler = compat_urllib_request_DataHandler()
2529
2530         # When passing our own FileHandler instance, build_opener won't add the
2531         # default FileHandler and allows us to disable the file protocol, which
2532         # can be used for malicious purposes (see
2533         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2534         file_handler = compat_urllib_request.FileHandler()
2535
2536         def file_open(*args, **kwargs):
2537             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2538         file_handler.file_open = file_open
2539
2540         opener = compat_urllib_request.build_opener(
2541             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2542
2543         # Delete the default user-agent header, which would otherwise apply in
2544         # cases where our custom HTTP handler doesn't come into play
2545         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2546         opener.addheaders = []
2547         self._opener = opener
2548
2549     def encode(self, s):
2550         if isinstance(s, bytes):
2551             return s  # Already encoded
2552
2553         try:
2554             return s.encode(self.get_encoding())
2555         except UnicodeEncodeError as err:
2556             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2557             raise
2558
2559     def get_encoding(self):
2560         encoding = self.params.get('encoding')
2561         if encoding is None:
2562             encoding = preferredencoding()
2563         return encoding
2564
2565     def _write_thumbnails(self, info_dict, filename):
2566         if self.params.get('writethumbnail', False):
2567             thumbnails = info_dict.get('thumbnails')
2568             if thumbnails:
2569                 thumbnails = [thumbnails[-1]]
2570         elif self.params.get('write_all_thumbnails', False):
2571             thumbnails = info_dict.get('thumbnails')
2572         else:
2573             return
2574
2575         if not thumbnails:
2576             # No thumbnails present, so return immediately
2577             return
2578
2579         for t in thumbnails:
2580             thumb_ext = determine_ext(t['url'], 'jpg')
2581             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2582             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2583             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2584
2585             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2586                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2587                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2588             else:
2589                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2590                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2591                 try:
2592                     uf = self.urlopen(t['url'])
2593                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2594                         shutil.copyfileobj(uf, thumbf)
2595                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2596                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2597                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2598                     self.report_warning('Unable to download thumbnail "%s": %s' %
2599                                         (t['url'], error_to_compat_str(err)))