youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     format_bytes,
  65     format_field,
  66     formatSeconds,
  67     GeoRestrictedError,
  68     int_or_none,
  69     iri_to_uri,
  70     ISO3166Utils,
  71     locked_file,
  72     make_HTTPS_handler,
  73     MaxDownloadsReached,
  74     orderedSet,
  75     PagedList,
  76     parse_filesize,
  77     PerRequestProxyHandler,
  78     platform_name,
  79     PostProcessingError,
  80     preferredencoding,
  81     prepend_extension,
  82     register_socks_protocols,
  83     render_table,
  84     replace_extension,
  85     RejectedVideoReached,
  86     SameFileError,
  87     sanitize_filename,
  88     sanitize_path,
  89     sanitize_url,
  90     sanitized_Request,
  91     std_headers,
  92     str_or_none,
  93     subtitles_filename,
  94     to_high_limit_path,
  95     UnavailableVideoError,
  96     url_basename,
  97     version_tuple,
  98     write_json_file,
  99     write_string,
 100     YoutubeDLCookieJar,
 101     YoutubeDLCookieProcessor,
 102     YoutubeDLHandler,
 103     YoutubeDLRedirectHandler,
 104     process_communicate_or_kill,
 105 )
 106 from .cache import Cache
 107 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 108 from .extractor.openload import PhantomJSwrapper
 109 from .downloader import get_suitable_downloader
 110 from .downloader.rtmp import rtmpdump_version
 111 from .postprocessor import (
 112     FFmpegFixupM3u8PP,
 113     FFmpegFixupM4aPP,
 114     FFmpegFixupStretchedPP,
 115     FFmpegMergerPP,
 116     FFmpegPostProcessor,
 117     FFmpegSubtitlesConvertorPP,
 118     get_postprocessor,
 119 )
 120 from .version import __version__
 121
 122 if compat_os_name == 'nt':
 123     import ctypes
 124
 125
 126 class YoutubeDL(object):
 127     """YoutubeDL class.
 128
 129     YoutubeDL objects are the ones responsible of downloading the
 130     actual video file and writing it to disk if the user has requested
 131     it, among some other tasks. In most cases there should be one per
 132     program. As, given a video URL, the downloader doesn't know how to
 133     extract all the needed information, task that InfoExtractors do, it
 134     has to pass the URL to one of them.
 135
 136     For this, YoutubeDL objects have a method that allows
 137     InfoExtractors to be registered in a given order. When it is passed
 138     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 139     finds that reports being able to handle it. The InfoExtractor extracts
 140     all the information about the video or videos the URL refers to, and
 141     YoutubeDL process the extracted information, possibly using a File
 142     Downloader to download the video.
 143
 144     YoutubeDL objects accept a lot of parameters. In order not to saturate
 145     the object constructor with arguments, it receives a dictionary of
 146     options instead. These options are available through the params
 147     attribute for the InfoExtractors to use. The YoutubeDL also
 148     registers itself as the downloader in charge for the InfoExtractors
 149     that are added to it, so this is a "mutual registration".
 150
 151     Available options:
 152
 153     username:          Username for authentication purposes.
 154     password:          Password for authentication purposes.
 155     videopassword:     Password for accessing a video.
 156     ap_mso:            Adobe Pass multiple-system operator identifier.
 157     ap_username:       Multiple-system operator account username.
 158     ap_password:       Multiple-system operator account password.
 159     usenetrc:          Use netrc for authentication instead.
 160     verbose:           Print additional info to stdout.
 161     quiet:             Do not print messages to stdout.
 162     no_warnings:       Do not print out anything for warnings.
 163     forceurl:          Force printing final URL.
 164     forcetitle:        Force printing title.
 165     forceid:           Force printing ID.
 166     forcethumbnail:    Force printing thumbnail URL.
 167     forcedescription:  Force printing description.
 168     forcefilename:     Force printing final filename.
 169     forceduration:     Force printing duration.
 170     forcejson:         Force printing info_dict as JSON.
 171     dump_single_json:  Force printing the info_dict of the whole playlist
 172                        (or video) as a single JSON line.
 173     force_write_download_archive: Force writing download archive regardless of
 174                        'skip_download' or 'simulate'.
 175     simulate:          Do not download the video files.
 176     format:            Video format code. see "FORMAT SELECTION" for more details.
 177     format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
 178     format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
 179     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
 180     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
 181     outtmpl:           Template for output names.
 182     restrictfilenames: Do not allow "&" and spaces in file names.
 183     trim_file_name:    Limit length of filename (extension excluded).
 184     ignoreerrors:      Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
 185     force_generic_extractor: Force downloader to use the generic extractor
 186     overwrites:        Overwrite all video and metadata files if True,
 187                        overwrite only non-video files if None
 188                        and don't overwrite any file if False
 189     playliststart:     Playlist item to start at.
 190     playlistend:       Playlist item to end at.
 191     playlist_items:    Specific indices of playlist to download.
 192     playlistreverse:   Download playlist items in reverse order.
 193     playlistrandom:    Download playlist items in random order.
 194     matchtitle:        Download only matching titles.
 195     rejecttitle:       Reject downloads for matching titles.
 196     logger:            Log messages to a logging.Logger instance.
 197     logtostderr:       Log messages to stderr instead of stdout.
 198     writedescription:  Write the video description to a .description file
 199     writeinfojson:     Write the video description to a .info.json file
 200     writeannotations:  Write the video annotations to a .annotations.xml file
 201     writethumbnail:    Write the thumbnail image to a file
 202     write_all_thumbnails:  Write all thumbnail formats to files
 203     writelink:         Write an internet shortcut file, depending on the
 204                        current platform (.url/.webloc/.desktop)
 205     writeurllink:      Write a Windows internet shortcut file (.url)
 206     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 207     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 208     writesubtitles:    Write the video subtitles to a file
 209     writeautomaticsub: Write the automatically generated subtitles to a file
 210     allsubtitles:      Downloads all the subtitles of the video
 211                        (requires writesubtitles or writeautomaticsub)
 212     listsubtitles:     Lists all available subtitles for the video
 213     subtitlesformat:   The format code for subtitles
 214     subtitleslangs:    List of languages of the subtitles to download
 215     keepvideo:         Keep the video file after post-processing
 216     daterange:         A DateRange object, download only if the upload_date is in the range.
 217     skip_download:     Skip the actual download of the video file
 218     cachedir:          Location of the cache files in the filesystem.
 219                        False to disable filesystem cache.
 220     noplaylist:        Download single video instead of a playlist if in doubt.
 221     age_limit:         An integer representing the user's age in years.
 222                        Unsuitable videos for the given age are skipped.
 223     min_views:         An integer representing the minimum view count the video
 224                        must have in order to not be skipped.
 225                        Videos without view count information are always
 226                        downloaded. None for no limit.
 227     max_views:         An integer representing the maximum view count.
 228                        Videos that are more popular than that are not
 229                        downloaded.
 230                        Videos without view count information are always
 231                        downloaded. None for no limit.
 232     download_archive:  File name of a file where all downloads are recorded.
 233                        Videos already present in the file are not downloaded
 234                        again.
 235     break_on_existing: Stop the download process after attempting to download a
 236                        file that is in the archive.
 237     break_on_reject:   Stop the download process when encountering a video that
 238                        has been filtered out.
 239     cookiefile:        File name where cookies should be read from and dumped to
 240     nocheckcertificate:Do not verify SSL certificates
 241     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 242                        At the moment, this is only supported by YouTube.
 243     proxy:             URL of the proxy server to use
 244     geo_verification_proxy:  URL of the proxy to use for IP address verification
 245                        on geo-restricted sites.
 246     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 247     bidi_workaround:   Work around buggy terminals without bidirectional text
 248                        support, using fridibi
 249     debug_printtraffic:Print out sent and received HTTP traffic
 250     include_ads:       Download ads as well
 251     default_search:    Prepend this string if an input url is not valid.
 252                        'auto' for elaborate guessing
 253     encoding:          Use this encoding instead of the system-specified.
 254     extract_flat:      Do not resolve URLs, return the immediate result.
 255                        Pass in 'in_playlist' to only show this behavior for
 256                        playlist items.
 257     postprocessors:    A list of dictionaries, each with an entry
 258                        * key:  The name of the postprocessor. See
 259                                youtube_dlc/postprocessor/__init__.py for a list.
 260                        as well as any further keyword arguments for the
 261                        postprocessor.
 262     post_hooks:        A list of functions that get called as the final step
 263                        for each video file, after all postprocessors have been
 264                        called. The filename will be passed as the only argument.
 265     progress_hooks:    A list of functions that get called on download
 266                        progress, with a dictionary with the entries
 267                        * status: One of "downloading", "error", or "finished".
 268                                  Check this first and ignore unknown values.
 269
 270                        If status is one of "downloading", or "finished", the
 271                        following properties may also be present:
 272                        * filename: The final filename (always present)
 273                        * tmpfilename: The filename we're currently writing to
 274                        * downloaded_bytes: Bytes on disk
 275                        * total_bytes: Size of the whole file, None if unknown
 276                        * total_bytes_estimate: Guess of the eventual file size,
 277                                                None if unavailable.
 278                        * elapsed: The number of seconds since download started.
 279                        * eta: The estimated time in seconds, None if unknown
 280                        * speed: The download speed in bytes/second, None if
 281                                 unknown
 282                        * fragment_index: The counter of the currently
 283                                          downloaded video fragment.
 284                        * fragment_count: The number of fragments (= individual
 285                                          files that will be merged)
 286
 287                        Progress hooks are guaranteed to be called at least once
 288                        (with status "finished") if the download is successful.
 289     merge_output_format: Extension to use when merging formats.
 290     fixup:             Automatically correct known faults of the file.
 291                        One of:
 292                        - "never": do nothing
 293                        - "warn": only emit a warning
 294                        - "detect_or_warn": check whether we can do anything
 295                                            about it, warn otherwise (default)
 296     source_address:    Client-side IP address to bind to.
 297     call_home:         Boolean, true iff we are allowed to contact the
 298                        youtube-dlc servers for debugging.
 299     sleep_interval:    Number of seconds to sleep before each download when
 300                        used alone or a lower bound of a range for randomized
 301                        sleep before each download (minimum possible number
 302                        of seconds to sleep) when used along with
 303                        max_sleep_interval.
 304     max_sleep_interval:Upper bound of a range for randomized sleep before each
 305                        download (maximum possible number of seconds to sleep).
 306                        Must only be used along with sleep_interval.
 307                        Actual sleep time will be a random float from range
 308                        [sleep_interval; max_sleep_interval].
 309     listformats:       Print an overview of available video formats and exit.
 310     list_thumbnails:   Print a table of all thumbnails and exit.
 311     match_filter:      A function that gets called with the info_dict of
 312                        every video.
 313                        If it returns a message, the video is ignored.
 314                        If it returns None, the video is downloaded.
 315                        match_filter_func in utils.py is one example for this.
 316     no_color:          Do not emit color codes in output.
 317     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 318                        HTTP header
 319     geo_bypass_country:
 320                        Two-letter ISO 3166-2 country code that will be used for
 321                        explicit geographic restriction bypassing via faking
 322                        X-Forwarded-For HTTP header
 323     geo_bypass_ip_block:
 324                        IP range in CIDR notation that will be used similarly to
 325                        geo_bypass_country
 326
 327     The following options determine which downloader is picked:
 328     external_downloader: Executable of the external downloader to call.
 329                        None or unset for standard (built-in) downloader.
 330     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 331                        if True, otherwise use ffmpeg/avconv if False, otherwise
 332                        use downloader suggested by extractor if None.
 333
 334     The following parameters are not used by YoutubeDL itself, they are used by
 335     the downloader (see youtube_dlc/downloader/common.py):
 336     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 337     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 338     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 339     http_chunk_size.
 340
 341     The following options are used by the post processors:
 342     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 343                        otherwise prefer ffmpeg.
 344     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 345                        to the binary or its containing directory.
 346     postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
 347                         of additional command-line arguments for the postprocessor.
 348                         Use 'default' as the name for arguments to passed to all PP.
 349
 350     The following options are used by the Youtube extractor:
 351     youtube_include_dash_manifest: If True (default), DASH manifests and related
 352                         data will be downloaded and processed by extractor.
 353                         You can reduce network I/O by disabling it if you don't
 354                         care about DASH.
 355     """
 356
 357     _NUMERIC_FIELDS = set((
 358         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 359         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 360         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 361         'average_rating', 'comment_count', 'age_limit',
 362         'start_time', 'end_time',
 363         'chapter_number', 'season_number', 'episode_number',
 364         'track_number', 'disc_number', 'release_year',
 365         'playlist_index',
 366     ))
 367
 368     params = None
 369     _ies = []
 370     _pps = []
 371     _download_retcode = None
 372     _num_downloads = None
 373     _playlist_level = 0
 374     _playlist_urls = set()
 375     _screen_file = None
 376
 377     def __init__(self, params=None, auto_init=True):
 378         """Create a FileDownloader object with the given options."""
 379         if params is None:
 380             params = {}
 381         self._ies = []
 382         self._ies_instances = {}
 383         self._pps = []
 384         self._post_hooks = []
 385         self._progress_hooks = []
 386         self._download_retcode = 0
 387         self._num_downloads = 0
 388         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 389         self._err_file = sys.stderr
 390         self.params = {
 391             # Default parameters
 392             'nocheckcertificate': False,
 393         }
 394         self.params.update(params)
 395         self.cache = Cache(self)
 396         self.archive = set()
 397
 398         """Preload the archive, if any is specified"""
 399         def preload_download_archive(self):
 400             fn = self.params.get('download_archive')
 401             if fn is None:
 402                 return False
 403             try:
 404                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 405                     for line in archive_file:
 406                         self.archive.add(line.strip())
 407             except IOError as ioe:
 408                 if ioe.errno != errno.ENOENT:
 409                     raise
 410                 return False
 411             return True
 412
 413         def check_deprecated(param, option, suggestion):
 414             if self.params.get(param) is not None:
 415                 self.report_warning(
 416                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 417                 return True
 418             return False
 419
 420         if self.params.get('verbose'):
 421             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 422
 423         preload_download_archive(self)
 424
 425         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 426             if self.params.get('geo_verification_proxy') is None:
 427                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 428
 429         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 430         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 431         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 432
 433         if params.get('bidi_workaround', False):
 434             try:
 435                 import pty
 436                 master, slave = pty.openpty()
 437                 width = compat_get_terminal_size().columns
 438                 if width is None:
 439                     width_args = []
 440                 else:
 441                     width_args = ['-w', str(width)]
 442                 sp_kwargs = dict(
 443                     stdin=subprocess.PIPE,
 444                     stdout=slave,
 445                     stderr=self._err_file)
 446                 try:
 447                     self._output_process = subprocess.Popen(
 448                         ['bidiv'] + width_args, **sp_kwargs
 449                     )
 450                 except OSError:
 451                     self._output_process = subprocess.Popen(
 452                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 453                 self._output_channel = os.fdopen(master, 'rb')
 454             except OSError as ose:
 455                 if ose.errno == errno.ENOENT:
 456                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 457                 else:
 458                     raise
 459
 460         if (sys.platform != 'win32'
 461                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 462                 and not params.get('restrictfilenames', False)):
 463             # Unicode filesystem API will throw errors (#1474, #13027)
 464             self.report_warning(
 465                 'Assuming --restrict-filenames since file system encoding '
 466                 'cannot encode all characters. '
 467                 'Set the LC_ALL environment variable to fix this.')
 468             self.params['restrictfilenames'] = True
 469
 470         if isinstance(params.get('outtmpl'), bytes):
 471             self.report_warning(
 472                 'Parameter outtmpl is bytes, but should be a unicode string. '
 473                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 474
 475         self._setup_opener()
 476
 477         if auto_init:
 478             self.print_debug_header()
 479             self.add_default_info_extractors()
 480
 481         for pp_def_raw in self.params.get('postprocessors', []):
 482             pp_class = get_postprocessor(pp_def_raw['key'])
 483             pp_def = dict(pp_def_raw)
 484             del pp_def['key']
 485             pp = pp_class(self, **compat_kwargs(pp_def))
 486             self.add_post_processor(pp)
 487
 488         for ph in self.params.get('post_hooks', []):
 489             self.add_post_hook(ph)
 490
 491         for ph in self.params.get('progress_hooks', []):
 492             self.add_progress_hook(ph)
 493
 494         register_socks_protocols()
 495
 496     def warn_if_short_id(self, argv):
 497         # short YouTube ID starting with dash?
 498         idxs = [
 499             i for i, a in enumerate(argv)
 500             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 501         if idxs:
 502             correct_argv = (
 503                 ['youtube-dlc']
 504                 + [a for i, a in enumerate(argv) if i not in idxs]
 505                 + ['--'] + [argv[i] for i in idxs]
 506             )
 507             self.report_warning(
 508                 'Long argument string detected. '
 509                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 510                 args_to_str(correct_argv))
 511
 512     def add_info_extractor(self, ie):
 513         """Add an InfoExtractor object to the end of the list."""
 514         self._ies.append(ie)
 515         if not isinstance(ie, type):
 516             self._ies_instances[ie.ie_key()] = ie
 517             ie.set_downloader(self)
 518
 519     def get_info_extractor(self, ie_key):
 520         """
 521         Get an instance of an IE with name ie_key, it will try to get one from
 522         the _ies list, if there's no instance it will create a new one and add
 523         it to the extractor list.
 524         """
 525         ie = self._ies_instances.get(ie_key)
 526         if ie is None:
 527             ie = get_info_extractor(ie_key)()
 528             self.add_info_extractor(ie)
 529         return ie
 530
 531     def add_default_info_extractors(self):
 532         """
 533         Add the InfoExtractors returned by gen_extractors to the end of the list
 534         """
 535         for ie in gen_extractor_classes():
 536             self.add_info_extractor(ie)
 537
 538     def add_post_processor(self, pp):
 539         """Add a PostProcessor object to the end of the chain."""
 540         self._pps.append(pp)
 541         pp.set_downloader(self)
 542
 543     def add_post_hook(self, ph):
 544         """Add the post hook"""
 545         self._post_hooks.append(ph)
 546
 547     def add_progress_hook(self, ph):
 548         """Add the progress hook (currently only for the file downloader)"""
 549         self._progress_hooks.append(ph)
 550
 551     def _bidi_workaround(self, message):
 552         if not hasattr(self, '_output_channel'):
 553             return message
 554
 555         assert hasattr(self, '_output_process')
 556         assert isinstance(message, compat_str)
 557         line_count = message.count('\n') + 1
 558         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 559         self._output_process.stdin.flush()
 560         res = ''.join(self._output_channel.readline().decode('utf-8')
 561                       for _ in range(line_count))
 562         return res[:-len('\n')]
 563
 564     def to_screen(self, message, skip_eol=False):
 565         """Print message to stdout if not in quiet mode."""
 566         return self.to_stdout(message, skip_eol, check_quiet=True)
 567
 568     def _write_string(self, s, out=None):
 569         write_string(s, out=out, encoding=self.params.get('encoding'))
 570
 571     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 572         """Print message to stdout if not in quiet mode."""
 573         if self.params.get('logger'):
 574             self.params['logger'].debug(message)
 575         elif not check_quiet or not self.params.get('quiet', False):
 576             message = self._bidi_workaround(message)
 577             terminator = ['\n', ''][skip_eol]
 578             output = message + terminator
 579
 580             self._write_string(output, self._screen_file)
 581
 582     def to_stderr(self, message):
 583         """Print message to stderr."""
 584         assert isinstance(message, compat_str)
 585         if self.params.get('logger'):
 586             self.params['logger'].error(message)
 587         else:
 588             message = self._bidi_workaround(message)
 589             output = message + '\n'
 590             self._write_string(output, self._err_file)
 591
 592     def to_console_title(self, message):
 593         if not self.params.get('consoletitle', False):
 594             return
 595         if compat_os_name == 'nt':
 596             if ctypes.windll.kernel32.GetConsoleWindow():
 597                 # c_wchar_p() might not be necessary if `message` is
 598                 # already of type unicode()
 599                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 600         elif 'TERM' in os.environ:
 601             self._write_string('\033[0;%s\007' % message, self._screen_file)
 602
 603     def save_console_title(self):
 604         if not self.params.get('consoletitle', False):
 605             return
 606         if self.params.get('simulate', False):
 607             return
 608         if compat_os_name != 'nt' and 'TERM' in os.environ:
 609             # Save the title on stack
 610             self._write_string('\033[22;0t', self._screen_file)
 611
 612     def restore_console_title(self):
 613         if not self.params.get('consoletitle', False):
 614             return
 615         if self.params.get('simulate', False):
 616             return
 617         if compat_os_name != 'nt' and 'TERM' in os.environ:
 618             # Restore the title from stack
 619             self._write_string('\033[23;0t', self._screen_file)
 620
 621     def __enter__(self):
 622         self.save_console_title()
 623         return self
 624
 625     def __exit__(self, *args):
 626         self.restore_console_title()
 627
 628         if self.params.get('cookiefile') is not None:
 629             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 630
 631     def trouble(self, message=None, tb=None):
 632         """Determine action to take when a download problem appears.
 633
 634         Depending on if the downloader has been configured to ignore
 635         download errors or not, this method may throw an exception or
 636         not when errors are found, after printing the message.
 637
 638         tb, if given, is additional traceback information.
 639         """
 640         if message is not None:
 641             self.to_stderr(message)
 642         if self.params.get('verbose'):
 643             if tb is None:
 644                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 645                     tb = ''
 646                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 647                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 648                     tb += encode_compat_str(traceback.format_exc())
 649                 else:
 650                     tb_data = traceback.format_list(traceback.extract_stack())
 651                     tb = ''.join(tb_data)
 652             self.to_stderr(tb)
 653         if not self.params.get('ignoreerrors', False):
 654             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 655                 exc_info = sys.exc_info()[1].exc_info
 656             else:
 657                 exc_info = sys.exc_info()
 658             raise DownloadError(message, exc_info)
 659         self._download_retcode = 1
 660
 661     def report_warning(self, message):
 662         '''
 663         Print the message to stderr, it will be prefixed with 'WARNING:'
 664         If stderr is a tty file the 'WARNING:' will be colored
 665         '''
 666         if self.params.get('logger') is not None:
 667             self.params['logger'].warning(message)
 668         else:
 669             if self.params.get('no_warnings'):
 670                 return
 671             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 672                 _msg_header = '\033[0;33mWARNING:\033[0m'
 673             else:
 674                 _msg_header = 'WARNING:'
 675             warning_message = '%s %s' % (_msg_header, message)
 676             self.to_stderr(warning_message)
 677
 678     def report_error(self, message, tb=None):
 679         '''
 680         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 681         in red if stderr is a tty file.
 682         '''
 683         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 684             _msg_header = '\033[0;31mERROR:\033[0m'
 685         else:
 686             _msg_header = 'ERROR:'
 687         error_message = '%s %s' % (_msg_header, message)
 688         self.trouble(error_message, tb)
 689
 690     def report_file_already_downloaded(self, file_name):
 691         """Report file has already been fully downloaded."""
 692         try:
 693             self.to_screen('[download] %s has already been downloaded' % file_name)
 694         except UnicodeEncodeError:
 695             self.to_screen('[download] The file has already been downloaded')
 696
 697     def report_file_delete(self, file_name):
 698         """Report that existing file will be deleted."""
 699         try:
 700             self.to_screen('Deleting already existent file %s' % file_name)
 701         except UnicodeEncodeError:
 702             self.to_screen('Deleting already existent file')
 703
 704     def prepare_filename(self, info_dict):
 705         """Generate the output filename."""
 706         try:
 707             template_dict = dict(info_dict)
 708
 709             template_dict['epoch'] = int(time.time())
 710             autonumber_size = self.params.get('autonumber_size')
 711             if autonumber_size is None:
 712                 autonumber_size = 5
 713             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 714             if template_dict.get('resolution') is None:
 715                 if template_dict.get('width') and template_dict.get('height'):
 716                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 717                 elif template_dict.get('height'):
 718                     template_dict['resolution'] = '%sp' % template_dict['height']
 719                 elif template_dict.get('width'):
 720                     template_dict['resolution'] = '%dx?' % template_dict['width']
 721
 722             sanitize = lambda k, v: sanitize_filename(
 723                 compat_str(v),
 724                 restricted=self.params.get('restrictfilenames'),
 725                 is_id=(k == 'id' or k.endswith('_id')))
 726             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 727                                  for k, v in template_dict.items()
 728                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 729             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 730
 731             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 732
 733             # For fields playlist_index and autonumber convert all occurrences
 734             # of %(field)s to %(field)0Nd for backward compatibility
 735             field_size_compat_map = {
 736                 'playlist_index': len(str(template_dict['n_entries'])),
 737                 'autonumber': autonumber_size,
 738             }
 739             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 740             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 741             if mobj:
 742                 outtmpl = re.sub(
 743                     FIELD_SIZE_COMPAT_RE,
 744                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 745                     outtmpl)
 746
 747             # Missing numeric fields used together with integer presentation types
 748             # in format specification will break the argument substitution since
 749             # string 'NA' is returned for missing fields. We will patch output
 750             # template for missing fields to meet string presentation type.
 751             for numeric_field in self._NUMERIC_FIELDS:
 752                 if numeric_field not in template_dict:
 753                     # As of [1] format syntax is:
 754                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 755                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 756                     FORMAT_RE = r'''(?x)
 757                         (?<!%)
 758                         %
 759                         \({0}\)  # mapping key
 760                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 761                         (?:\d+)?  # minimum field width (optional)
 762                         (?:\.\d+)?  # precision (optional)
 763                         [hlL]?  # length modifier (optional)
 764                         [diouxXeEfFgGcrs%]  # conversion type
 765                     '''
 766                     outtmpl = re.sub(
 767                         FORMAT_RE.format(numeric_field),
 768                         r'%({0})s'.format(numeric_field), outtmpl)
 769
 770             # expand_path translates '%%' into '%' and '$$' into '$'
 771             # correspondingly that is not what we want since we need to keep
 772             # '%%' intact for template dict substitution step. Working around
 773             # with boundary-alike separator hack.
 774             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 775             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 776
 777             # outtmpl should be expand_path'ed before template dict substitution
 778             # because meta fields may contain env variables we don't want to
 779             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 780             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 781             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 782
 783             # https://github.com/blackjack4494/youtube-dlc/issues/85
 784             trim_file_name = self.params.get('trim_file_name', False)
 785             if trim_file_name:
 786                 fn_groups = filename.rsplit('.')
 787                 ext = fn_groups[-1]
 788                 sub_ext = ''
 789                 if len(fn_groups) > 2:
 790                     sub_ext = fn_groups[-2]
 791                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 792
 793             # Temporary fix for #4787
 794             # 'Treat' all problem characters by passing filename through preferredencoding
 795             # to workaround encoding issues with subprocess on python2 @ Windows
 796             if sys.version_info < (3, 0) and sys.platform == 'win32':
 797                 filename = encodeFilename(filename, True).decode(preferredencoding())
 798             return sanitize_path(filename)
 799         except ValueError as err:
 800             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 801             return None
 802
 803     def _match_entry(self, info_dict, incomplete):
 804         """ Returns None if the file should be downloaded """
 805
 806         def check_filter():
 807             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 808             if 'title' in info_dict:
 809                 # This can happen when we're just evaluating the playlist
 810                 title = info_dict['title']
 811                 matchtitle = self.params.get('matchtitle', False)
 812                 if matchtitle:
 813                     if not re.search(matchtitle, title, re.IGNORECASE):
 814                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 815                 rejecttitle = self.params.get('rejecttitle', False)
 816                 if rejecttitle:
 817                     if re.search(rejecttitle, title, re.IGNORECASE):
 818                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 819             date = info_dict.get('upload_date')
 820             if date is not None:
 821                 dateRange = self.params.get('daterange', DateRange())
 822                 if date not in dateRange:
 823                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 824             view_count = info_dict.get('view_count')
 825             if view_count is not None:
 826                 min_views = self.params.get('min_views')
 827                 if min_views is not None and view_count < min_views:
 828                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 829                 max_views = self.params.get('max_views')
 830                 if max_views is not None and view_count > max_views:
 831                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 832             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 833                 return 'Skipping "%s" because it is age restricted' % video_title
 834             if self.in_download_archive(info_dict):
 835                 return '%s has already been recorded in archive' % video_title
 836
 837             if not incomplete:
 838                 match_filter = self.params.get('match_filter')
 839                 if match_filter is not None:
 840                     ret = match_filter(info_dict)
 841                     if ret is not None:
 842                         return ret
 843             return None
 844
 845         reason = check_filter()
 846         if reason is not None:
 847             self.to_screen('[download] ' + reason)
 848             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 849                 raise ExistingVideoReached()
 850             elif self.params.get('break_on_reject', False):
 851                 raise RejectedVideoReached()
 852         return reason
 853
 854     @staticmethod
 855     def add_extra_info(info_dict, extra_info):
 856         '''Set the keys from extra_info in info dict if they are missing'''
 857         for key, value in extra_info.items():
 858             info_dict.setdefault(key, value)
 859
 860     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 861                      process=True, force_generic_extractor=False):
 862         '''
 863         Returns a list with a dictionary for each video we find.
 864         If 'download', also downloads the videos.
 865         extra_info is a dict containing the extra values to add to each result
 866         '''
 867
 868         if not ie_key and force_generic_extractor:
 869             ie_key = 'Generic'
 870
 871         if ie_key:
 872             ies = [self.get_info_extractor(ie_key)]
 873         else:
 874             ies = self._ies
 875
 876         for ie in ies:
 877             if not ie.suitable(url):
 878                 continue
 879
 880             ie_key = ie.ie_key()
 881             ie = self.get_info_extractor(ie_key)
 882             if not ie.working():
 883                 self.report_warning('The program functionality for this site has been marked as broken, '
 884                                     'and will probably not work.')
 885
 886             try:
 887                 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
 888             except (AssertionError, IndexError, AttributeError):
 889                 temp_id = None
 890             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 891                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 892                                ie_key, temp_id))
 893                 break
 894
 895             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 896
 897         else:
 898             self.report_error('no suitable InfoExtractor for URL %s' % url)
 899
 900     def __handle_extraction_exceptions(func):
 901         def wrapper(self, *args, **kwargs):
 902             try:
 903                 return func(self, *args, **kwargs)
 904             except GeoRestrictedError as e:
 905                 msg = e.msg
 906                 if e.countries:
 907                     msg += '\nThis video is available in %s.' % ', '.join(
 908                         map(ISO3166Utils.short2full, e.countries))
 909                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 910                 self.report_error(msg)
 911             except ExtractorError as e:  # An error we somewhat expected
 912                 self.report_error(compat_str(e), e.format_traceback())
 913             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
 914                 raise
 915             except Exception as e:
 916                 if self.params.get('ignoreerrors', False):
 917                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 918                 else:
 919                     raise
 920         return wrapper
 921
 922     @__handle_extraction_exceptions
 923     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 924         ie_result = ie.extract(url)
 925         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 926             return
 927         if isinstance(ie_result, list):
 928             # Backwards compatibility: old IE result format
 929             ie_result = {
 930                 '_type': 'compat_list',
 931                 'entries': ie_result,
 932             }
 933         if info_dict:
 934             if info_dict.get('id'):
 935                 ie_result['id'] = info_dict['id']
 936             if info_dict.get('title'):
 937                 ie_result['title'] = info_dict['title']
 938         self.add_default_extra_info(ie_result, ie, url)
 939         if process:
 940             return self.process_ie_result(ie_result, download, extra_info)
 941         else:
 942             return ie_result
 943
 944     def add_default_extra_info(self, ie_result, ie, url):
 945         self.add_extra_info(ie_result, {
 946             'extractor': ie.IE_NAME,
 947             'webpage_url': url,
 948             'duration_string': (
 949                 formatSeconds(ie_result['duration'], '-')
 950                 if ie_result.get('duration', None) is not None
 951                 else None),
 952             'webpage_url_basename': url_basename(url),
 953             'extractor_key': ie.ie_key(),
 954         })
 955
 956     def process_ie_result(self, ie_result, download=True, extra_info={}):
 957         """
 958         Take the result of the ie(may be modified) and resolve all unresolved
 959         references (URLs, playlist items).
 960
 961         It will also download the videos if 'download'.
 962         Returns the resolved ie_result.
 963         """
 964         result_type = ie_result.get('_type', 'video')
 965
 966         if result_type in ('url', 'url_transparent'):
 967             ie_result['url'] = sanitize_url(ie_result['url'])
 968             extract_flat = self.params.get('extract_flat', False)
 969             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 970                     or extract_flat is True):
 971                 self.__forced_printings(
 972                     ie_result, self.prepare_filename(ie_result),
 973                     incomplete=True)
 974                 return ie_result
 975
 976         if result_type == 'video':
 977             self.add_extra_info(ie_result, extra_info)
 978             return self.process_video_result(ie_result, download=download)
 979         elif result_type == 'url':
 980             # We have to add extra_info to the results because it may be
 981             # contained in a playlist
 982             return self.extract_info(ie_result['url'],
 983                                      download, info_dict=ie_result,
 984                                      ie_key=ie_result.get('ie_key'),
 985                                      extra_info=extra_info)
 986         elif result_type == 'url_transparent':
 987             # Use the information from the embedding page
 988             info = self.extract_info(
 989                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 990                 extra_info=extra_info, download=False, process=False)
 991
 992             # extract_info may return None when ignoreerrors is enabled and
 993             # extraction failed with an error, don't crash and return early
 994             # in this case
 995             if not info:
 996                 return info
 997
 998             force_properties = dict(
 999                 (k, v) for k, v in ie_result.items() if v is not None)
1000             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1001                 if f in force_properties:
1002                     del force_properties[f]
1003             new_result = info.copy()
1004             new_result.update(force_properties)
1005
1006             # Extracted info may not be a video result (i.e.
1007             # info.get('_type', 'video') != video) but rather an url or
1008             # url_transparent. In such cases outer metadata (from ie_result)
1009             # should be propagated to inner one (info). For this to happen
1010             # _type of info should be overridden with url_transparent. This
1011             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1012             if new_result.get('_type') == 'url':
1013                 new_result['_type'] = 'url_transparent'
1014
1015             return self.process_ie_result(
1016                 new_result, download=download, extra_info=extra_info)
1017         elif result_type in ('playlist', 'multi_video'):
1018             # Protect from infinite recursion due to recursively nested playlists
1019             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1020             webpage_url = ie_result['webpage_url']
1021             if webpage_url in self._playlist_urls:
1022                 self.to_screen(
1023                     '[download] Skipping already downloaded playlist: %s'
1024                     % ie_result.get('title') or ie_result.get('id'))
1025                 return
1026
1027             self._playlist_level += 1
1028             self._playlist_urls.add(webpage_url)
1029             try:
1030                 return self.__process_playlist(ie_result, download)
1031             finally:
1032                 self._playlist_level -= 1
1033                 if not self._playlist_level:
1034                     self._playlist_urls.clear()
1035         elif result_type == 'compat_list':
1036             self.report_warning(
1037                 'Extractor %s returned a compat_list result. '
1038                 'It needs to be updated.' % ie_result.get('extractor'))
1039
1040             def _fixup(r):
1041                 self.add_extra_info(
1042                     r,
1043                     {
1044                         'extractor': ie_result['extractor'],
1045                         'webpage_url': ie_result['webpage_url'],
1046                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1047                         'extractor_key': ie_result['extractor_key'],
1048                     }
1049                 )
1050                 return r
1051             ie_result['entries'] = [
1052                 self.process_ie_result(_fixup(r), download, extra_info)
1053                 for r in ie_result['entries']
1054             ]
1055             return ie_result
1056         else:
1057             raise Exception('Invalid result type: %s' % result_type)
1058
1059     def __process_playlist(self, ie_result, download):
1060         # We process each entry in the playlist
1061         playlist = ie_result.get('title') or ie_result.get('id')
1062         self.to_screen('[download] Downloading playlist: %s' % playlist)
1063
1064         playlist_results = []
1065
1066         playliststart = self.params.get('playliststart', 1) - 1
1067         playlistend = self.params.get('playlistend')
1068         # For backwards compatibility, interpret -1 as whole list
1069         if playlistend == -1:
1070             playlistend = None
1071
1072         playlistitems_str = self.params.get('playlist_items')
1073         playlistitems = None
1074         if playlistitems_str is not None:
1075             def iter_playlistitems(format):
1076                 for string_segment in format.split(','):
1077                     if '-' in string_segment:
1078                         start, end = string_segment.split('-')
1079                         for item in range(int(start), int(end) + 1):
1080                             yield int(item)
1081                     else:
1082                         yield int(string_segment)
1083             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1084
1085         ie_entries = ie_result['entries']
1086
1087         def make_playlistitems_entries(list_ie_entries):
1088             num_entries = len(list_ie_entries)
1089             return [
1090                 list_ie_entries[i - 1] for i in playlistitems
1091                 if -num_entries <= i - 1 < num_entries]
1092
1093         def report_download(num_entries):
1094             self.to_screen(
1095                 '[%s] playlist %s: Downloading %d videos' %
1096                 (ie_result['extractor'], playlist, num_entries))
1097
1098         if isinstance(ie_entries, list):
1099             n_all_entries = len(ie_entries)
1100             if playlistitems:
1101                 entries = make_playlistitems_entries(ie_entries)
1102             else:
1103                 entries = ie_entries[playliststart:playlistend]
1104             n_entries = len(entries)
1105             self.to_screen(
1106                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1107                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1108         elif isinstance(ie_entries, PagedList):
1109             if playlistitems:
1110                 entries = []
1111                 for item in playlistitems:
1112                     entries.extend(ie_entries.getslice(
1113                         item - 1, item
1114                     ))
1115             else:
1116                 entries = ie_entries.getslice(
1117                     playliststart, playlistend)
1118             n_entries = len(entries)
1119             report_download(n_entries)
1120         else:  # iterable
1121             if playlistitems:
1122                 entries = make_playlistitems_entries(list(itertools.islice(
1123                     ie_entries, 0, max(playlistitems))))
1124             else:
1125                 entries = list(itertools.islice(
1126                     ie_entries, playliststart, playlistend))
1127             n_entries = len(entries)
1128             report_download(n_entries)
1129
1130         if self.params.get('playlistreverse', False):
1131             entries = entries[::-1]
1132
1133         if self.params.get('playlistrandom', False):
1134             random.shuffle(entries)
1135
1136         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1137
1138         for i, entry in enumerate(entries, 1):
1139             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1140             # This __x_forwarded_for_ip thing is a bit ugly but requires
1141             # minimal changes
1142             if x_forwarded_for:
1143                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1144             extra = {
1145                 'n_entries': n_entries,
1146                 'playlist': playlist,
1147                 'playlist_id': ie_result.get('id'),
1148                 'playlist_title': ie_result.get('title'),
1149                 'playlist_uploader': ie_result.get('uploader'),
1150                 'playlist_uploader_id': ie_result.get('uploader_id'),
1151                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1152                 'extractor': ie_result['extractor'],
1153                 'webpage_url': ie_result['webpage_url'],
1154                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1155                 'extractor_key': ie_result['extractor_key'],
1156             }
1157
1158             if self._match_entry(entry, incomplete=True) is not None:
1159                 continue
1160
1161             entry_result = self.__process_iterable_entry(entry, download, extra)
1162             # TODO: skip failed (empty) entries?
1163             playlist_results.append(entry_result)
1164         ie_result['entries'] = playlist_results
1165         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1166         return ie_result
1167
1168     @__handle_extraction_exceptions
1169     def __process_iterable_entry(self, entry, download, extra_info):
1170         return self.process_ie_result(
1171             entry, download=download, extra_info=extra_info)
1172
1173     def _build_format_filter(self, filter_spec):
1174         " Returns a function to filter the formats according to the filter_spec "
1175
1176         OPERATORS = {
1177             '<': operator.lt,
1178             '<=': operator.le,
1179             '>': operator.gt,
1180             '>=': operator.ge,
1181             '=': operator.eq,
1182             '!=': operator.ne,
1183         }
1184         operator_rex = re.compile(r'''(?x)\s*
1185             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1186             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1187             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1188             $
1189             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1190         m = operator_rex.search(filter_spec)
1191         if m:
1192             try:
1193                 comparison_value = int(m.group('value'))
1194             except ValueError:
1195                 comparison_value = parse_filesize(m.group('value'))
1196                 if comparison_value is None:
1197                     comparison_value = parse_filesize(m.group('value') + 'B')
1198                 if comparison_value is None:
1199                     raise ValueError(
1200                         'Invalid value %r in format specification %r' % (
1201                             m.group('value'), filter_spec))
1202             op = OPERATORS[m.group('op')]
1203
1204         if not m:
1205             STR_OPERATORS = {
1206                 '=': operator.eq,
1207                 '^=': lambda attr, value: attr.startswith(value),
1208                 '$=': lambda attr, value: attr.endswith(value),
1209                 '*=': lambda attr, value: value in attr,
1210             }
1211             str_operator_rex = re.compile(r'''(?x)
1212                 \s*(?P<key>[a-zA-Z0-9._-]+)
1213                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1214                 \s*(?P<value>[a-zA-Z0-9._-]+)
1215                 \s*$
1216                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1217             m = str_operator_rex.search(filter_spec)
1218             if m:
1219                 comparison_value = m.group('value')
1220                 str_op = STR_OPERATORS[m.group('op')]
1221                 if m.group('negation'):
1222                     op = lambda attr, value: not str_op(attr, value)
1223                 else:
1224                     op = str_op
1225
1226         if not m:
1227             raise ValueError('Invalid filter specification %r' % filter_spec)
1228
1229         def _filter(f):
1230             actual_value = f.get(m.group('key'))
1231             if actual_value is None:
1232                 return m.group('none_inclusive')
1233             return op(actual_value, comparison_value)
1234         return _filter
1235
1236     def _default_format_spec(self, info_dict, download=True):
1237
1238         def can_merge():
1239             merger = FFmpegMergerPP(self)
1240             return merger.available and merger.can_merge()
1241
1242         prefer_best = (
1243             not self.params.get('simulate', False)
1244             and download
1245             and (
1246                 not can_merge()
1247                 or info_dict.get('is_live', False)
1248                 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1249
1250         return (
1251             'best/bestvideo+bestaudio'
1252             if prefer_best
1253             else 'bestvideo*+bestaudio/best'
1254             if not self.params.get('allow_multiple_audio_streams', False)
1255             else 'bestvideo+bestaudio/best')
1256
1257     def build_format_selector(self, format_spec):
1258         def syntax_error(note, start):
1259             message = (
1260                 'Invalid format specification: '
1261                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1262             return SyntaxError(message)
1263
1264         PICKFIRST = 'PICKFIRST'
1265         MERGE = 'MERGE'
1266         SINGLE = 'SINGLE'
1267         GROUP = 'GROUP'
1268         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1269
1270         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1271                                   'video': self.params.get('allow_multiple_video_streams', False)}
1272
1273         def _parse_filter(tokens):
1274             filter_parts = []
1275             for type, string, start, _, _ in tokens:
1276                 if type == tokenize.OP and string == ']':
1277                     return ''.join(filter_parts)
1278                 else:
1279                     filter_parts.append(string)
1280
1281         def _remove_unused_ops(tokens):
1282             # Remove operators that we don't use and join them with the surrounding strings
1283             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1284             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1285             last_string, last_start, last_end, last_line = None, None, None, None
1286             for type, string, start, end, line in tokens:
1287                 if type == tokenize.OP and string == '[':
1288                     if last_string:
1289                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1290                         last_string = None
1291                     yield type, string, start, end, line
1292                     # everything inside brackets will be handled by _parse_filter
1293                     for type, string, start, end, line in tokens:
1294                         yield type, string, start, end, line
1295                         if type == tokenize.OP and string == ']':
1296                             break
1297                 elif type == tokenize.OP and string in ALLOWED_OPS:
1298                     if last_string:
1299                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1300                         last_string = None
1301                     yield type, string, start, end, line
1302                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1303                     if not last_string:
1304                         last_string = string
1305                         last_start = start
1306                         last_end = end
1307                     else:
1308                         last_string += string
1309             if last_string:
1310                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1311
1312         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1313             selectors = []
1314             current_selector = None
1315             for type, string, start, _, _ in tokens:
1316                 # ENCODING is only defined in python 3.x
1317                 if type == getattr(tokenize, 'ENCODING', None):
1318                     continue
1319                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1320                     current_selector = FormatSelector(SINGLE, string, [])
1321                 elif type == tokenize.OP:
1322                     if string == ')':
1323                         if not inside_group:
1324                             # ')' will be handled by the parentheses group
1325                             tokens.restore_last_token()
1326                         break
1327                     elif inside_merge and string in ['/', ',']:
1328                         tokens.restore_last_token()
1329                         break
1330                     elif inside_choice and string == ',':
1331                         tokens.restore_last_token()
1332                         break
1333                     elif string == ',':
1334                         if not current_selector:
1335                             raise syntax_error('"," must follow a format selector', start)
1336                         selectors.append(current_selector)
1337                         current_selector = None
1338                     elif string == '/':
1339                         if not current_selector:
1340                             raise syntax_error('"/" must follow a format selector', start)
1341                         first_choice = current_selector
1342                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1343                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1344                     elif string == '[':
1345                         if not current_selector:
1346                             current_selector = FormatSelector(SINGLE, 'best', [])
1347                         format_filter = _parse_filter(tokens)
1348                         current_selector.filters.append(format_filter)
1349                     elif string == '(':
1350                         if current_selector:
1351                             raise syntax_error('Unexpected "("', start)
1352                         group = _parse_format_selection(tokens, inside_group=True)
1353                         current_selector = FormatSelector(GROUP, group, [])
1354                     elif string == '+':
1355                         if not current_selector:
1356                             raise syntax_error('Unexpected "+"', start)
1357                         selector_1 = current_selector
1358                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1359                         if not selector_2:
1360                             raise syntax_error('Expected a selector', start)
1361                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1362                     else:
1363                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1364                 elif type == tokenize.ENDMARKER:
1365                     break
1366             if current_selector:
1367                 selectors.append(current_selector)
1368             return selectors
1369
1370         def _build_selector_function(selector):
1371             if isinstance(selector, list):  # ,
1372                 fs = [_build_selector_function(s) for s in selector]
1373
1374                 def selector_function(ctx):
1375                     for f in fs:
1376                         for format in f(ctx):
1377                             yield format
1378                 return selector_function
1379
1380             elif selector.type == GROUP:  # ()
1381                 selector_function = _build_selector_function(selector.selector)
1382
1383             elif selector.type == PICKFIRST:  # /
1384                 fs = [_build_selector_function(s) for s in selector.selector]
1385
1386                 def selector_function(ctx):
1387                     for f in fs:
1388                         picked_formats = list(f(ctx))
1389                         if picked_formats:
1390                             return picked_formats
1391                     return []
1392
1393             elif selector.type == SINGLE:  # atom
1394                 format_spec = selector.selector if selector.selector is not None else 'best'
1395
1396                 if format_spec == 'all':
1397                     def selector_function(ctx):
1398                         formats = list(ctx['formats'])
1399                         if formats:
1400                             for f in formats:
1401                                 yield f
1402
1403                 else:
1404                     format_fallback = False
1405                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1406                     if format_spec_obj is not None:
1407                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1408                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1409                         not_format_type = 'v' if format_type == 'a' else 'a'
1410                         format_modified = format_spec_obj.group(3) is not None
1411
1412                         format_fallback = not format_type and not format_modified  # for b, w
1413                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1414                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1415                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1416                                     if format_type  # bv, ba, wv, wa
1417                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1418                                     if not format_modified  # b, w
1419                                     else None)  # b*, w*
1420                     else:
1421                         format_idx = -1
1422                         filter_f = ((lambda f: f.get('ext') == format_spec)
1423                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1424                                     else (lambda f: f.get('format_id') == format_spec))  # id
1425
1426                     def selector_function(ctx):
1427                         formats = list(ctx['formats'])
1428                         if not formats:
1429                             return
1430                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1431                         if matches:
1432                             yield matches[format_idx]
1433                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1434                             # for extractors with incomplete formats (audio only (soundcloud)
1435                             # or video only (imgur)) best/worst will fallback to
1436                             # best/worst {video,audio}-only format
1437                             yield formats[format_idx]
1438
1439             elif selector.type == MERGE:        # +
1440                 def _merge(formats_pair):
1441                     format_1, format_2 = formats_pair
1442
1443                     formats_info = []
1444                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1445                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1446
1447                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1448                         get_no_more = {"video": False, "audio": False}
1449                         for (i, fmt_info) in enumerate(formats_info):
1450                             for aud_vid in ["audio", "video"]:
1451                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1452                                     if get_no_more[aud_vid]:
1453                                         formats_info.pop(i)
1454                                     get_no_more[aud_vid] = True
1455
1456                     if len(formats_info) == 1:
1457                         return formats_info[0]
1458
1459                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1460                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1461
1462                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1463                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1464
1465                     output_ext = self.params.get('merge_output_format')
1466                     if not output_ext:
1467                         if the_only_video:
1468                             output_ext = the_only_video['ext']
1469                         elif the_only_audio and not video_fmts:
1470                             output_ext = the_only_audio['ext']
1471                         else:
1472                             output_ext = 'mkv'
1473
1474                     new_dict = {
1475                         'requested_formats': formats_info,
1476                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1477                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1478                         'ext': output_ext,
1479                     }
1480
1481                     if the_only_video:
1482                         new_dict.update({
1483                             'width': the_only_video.get('width'),
1484                             'height': the_only_video.get('height'),
1485                             'resolution': the_only_video.get('resolution'),
1486                             'fps': the_only_video.get('fps'),
1487                             'vcodec': the_only_video.get('vcodec'),
1488                             'vbr': the_only_video.get('vbr'),
1489                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1490                         })
1491
1492                     if the_only_audio:
1493                         new_dict.update({
1494                             'acodec': the_only_audio.get('acodec'),
1495                             'abr': the_only_audio.get('abr'),
1496                         })
1497
1498                     return new_dict
1499
1500                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1501
1502                 def selector_function(ctx):
1503                     for pair in itertools.product(
1504                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1505                         yield _merge(pair)
1506
1507             filters = [self._build_format_filter(f) for f in selector.filters]
1508
1509             def final_selector(ctx):
1510                 ctx_copy = copy.deepcopy(ctx)
1511                 for _filter in filters:
1512                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1513                 return selector_function(ctx_copy)
1514             return final_selector
1515
1516         stream = io.BytesIO(format_spec.encode('utf-8'))
1517         try:
1518             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1519         except tokenize.TokenError:
1520             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1521
1522         class TokenIterator(object):
1523             def __init__(self, tokens):
1524                 self.tokens = tokens
1525                 self.counter = 0
1526
1527             def __iter__(self):
1528                 return self
1529
1530             def __next__(self):
1531                 if self.counter >= len(self.tokens):
1532                     raise StopIteration()
1533                 value = self.tokens[self.counter]
1534                 self.counter += 1
1535                 return value
1536
1537             next = __next__
1538
1539             def restore_last_token(self):
1540                 self.counter -= 1
1541
1542         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1543         return _build_selector_function(parsed_selector)
1544
1545     def _calc_headers(self, info_dict):
1546         res = std_headers.copy()
1547
1548         add_headers = info_dict.get('http_headers')
1549         if add_headers:
1550             res.update(add_headers)
1551
1552         cookies = self._calc_cookies(info_dict)
1553         if cookies:
1554             res['Cookie'] = cookies
1555
1556         if 'X-Forwarded-For' not in res:
1557             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1558             if x_forwarded_for_ip:
1559                 res['X-Forwarded-For'] = x_forwarded_for_ip
1560
1561         return res
1562
1563     def _calc_cookies(self, info_dict):
1564         pr = sanitized_Request(info_dict['url'])
1565         self.cookiejar.add_cookie_header(pr)
1566         return pr.get_header('Cookie')
1567
1568     def process_video_result(self, info_dict, download=True):
1569         assert info_dict.get('_type', 'video') == 'video'
1570
1571         if 'id' not in info_dict:
1572             raise ExtractorError('Missing "id" field in extractor result')
1573         if 'title' not in info_dict:
1574             raise ExtractorError('Missing "title" field in extractor result')
1575
1576         def report_force_conversion(field, field_not, conversion):
1577             self.report_warning(
1578                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1579                 % (field, field_not, conversion))
1580
1581         def sanitize_string_field(info, string_field):
1582             field = info.get(string_field)
1583             if field is None or isinstance(field, compat_str):
1584                 return
1585             report_force_conversion(string_field, 'a string', 'string')
1586             info[string_field] = compat_str(field)
1587
1588         def sanitize_numeric_fields(info):
1589             for numeric_field in self._NUMERIC_FIELDS:
1590                 field = info.get(numeric_field)
1591                 if field is None or isinstance(field, compat_numeric_types):
1592                     continue
1593                 report_force_conversion(numeric_field, 'numeric', 'int')
1594                 info[numeric_field] = int_or_none(field)
1595
1596         sanitize_string_field(info_dict, 'id')
1597         sanitize_numeric_fields(info_dict)
1598
1599         if 'playlist' not in info_dict:
1600             # It isn't part of a playlist
1601             info_dict['playlist'] = None
1602             info_dict['playlist_index'] = None
1603
1604         thumbnails = info_dict.get('thumbnails')
1605         if thumbnails is None:
1606             thumbnail = info_dict.get('thumbnail')
1607             if thumbnail:
1608                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1609         if thumbnails:
1610             thumbnails.sort(key=lambda t: (
1611                 t.get('preference') if t.get('preference') is not None else -1,
1612                 t.get('width') if t.get('width') is not None else -1,
1613                 t.get('height') if t.get('height') is not None else -1,
1614                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1615             for i, t in enumerate(thumbnails):
1616                 t['url'] = sanitize_url(t['url'])
1617                 if t.get('width') and t.get('height'):
1618                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1619                 if t.get('id') is None:
1620                     t['id'] = '%d' % i
1621
1622         if self.params.get('list_thumbnails'):
1623             self.list_thumbnails(info_dict)
1624             return
1625
1626         thumbnail = info_dict.get('thumbnail')
1627         if thumbnail:
1628             info_dict['thumbnail'] = sanitize_url(thumbnail)
1629         elif thumbnails:
1630             info_dict['thumbnail'] = thumbnails[-1]['url']
1631
1632         if 'display_id' not in info_dict and 'id' in info_dict:
1633             info_dict['display_id'] = info_dict['id']
1634
1635         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1636             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1637             # see http://bugs.python.org/issue1646728)
1638             try:
1639                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1640                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1641             except (ValueError, OverflowError, OSError):
1642                 pass
1643
1644         # Auto generate title fields corresponding to the *_number fields when missing
1645         # in order to always have clean titles. This is very common for TV series.
1646         for field in ('chapter', 'season', 'episode'):
1647             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1648                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1649
1650         for cc_kind in ('subtitles', 'automatic_captions'):
1651             cc = info_dict.get(cc_kind)
1652             if cc:
1653                 for _, subtitle in cc.items():
1654                     for subtitle_format in subtitle:
1655                         if subtitle_format.get('url'):
1656                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1657                         if subtitle_format.get('ext') is None:
1658                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1659
1660         automatic_captions = info_dict.get('automatic_captions')
1661         subtitles = info_dict.get('subtitles')
1662
1663         if self.params.get('listsubtitles', False):
1664             if 'automatic_captions' in info_dict:
1665                 self.list_subtitles(
1666                     info_dict['id'], automatic_captions, 'automatic captions')
1667             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1668             return
1669
1670         info_dict['requested_subtitles'] = self.process_subtitles(
1671             info_dict['id'], subtitles, automatic_captions)
1672
1673         # We now pick which formats have to be downloaded
1674         if info_dict.get('formats') is None:
1675             # There's only one format available
1676             formats = [info_dict]
1677         else:
1678             formats = info_dict['formats']
1679
1680         if not formats:
1681             raise ExtractorError('No video formats found!')
1682
1683         def is_wellformed(f):
1684             url = f.get('url')
1685             if not url:
1686                 self.report_warning(
1687                     '"url" field is missing or empty - skipping format, '
1688                     'there is an error in extractor')
1689                 return False
1690             if isinstance(url, bytes):
1691                 sanitize_string_field(f, 'url')
1692             return True
1693
1694         # Filter out malformed formats for better extraction robustness
1695         formats = list(filter(is_wellformed, formats))
1696
1697         formats_dict = {}
1698
1699         # We check that all the formats have the format and format_id fields
1700         for i, format in enumerate(formats):
1701             sanitize_string_field(format, 'format_id')
1702             sanitize_numeric_fields(format)
1703             format['url'] = sanitize_url(format['url'])
1704             if not format.get('format_id'):
1705                 format['format_id'] = compat_str(i)
1706             else:
1707                 # Sanitize format_id from characters used in format selector expression
1708                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1709             format_id = format['format_id']
1710             if format_id not in formats_dict:
1711                 formats_dict[format_id] = []
1712             formats_dict[format_id].append(format)
1713
1714         # Make sure all formats have unique format_id
1715         for format_id, ambiguous_formats in formats_dict.items():
1716             if len(ambiguous_formats) > 1:
1717                 for i, format in enumerate(ambiguous_formats):
1718                     format['format_id'] = '%s-%d' % (format_id, i)
1719
1720         for i, format in enumerate(formats):
1721             if format.get('format') is None:
1722                 format['format'] = '{id} - {res}{note}'.format(
1723                     id=format['format_id'],
1724                     res=self.format_resolution(format),
1725                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1726                 )
1727             # Automatically determine file extension if missing
1728             if format.get('ext') is None:
1729                 format['ext'] = determine_ext(format['url']).lower()
1730             # Automatically determine protocol if missing (useful for format
1731             # selection purposes)
1732             if format.get('protocol') is None:
1733                 format['protocol'] = determine_protocol(format)
1734             # Add HTTP headers, so that external programs can use them from the
1735             # json output
1736             full_format_info = info_dict.copy()
1737             full_format_info.update(format)
1738             format['http_headers'] = self._calc_headers(full_format_info)
1739         # Remove private housekeeping stuff
1740         if '__x_forwarded_for_ip' in info_dict:
1741             del info_dict['__x_forwarded_for_ip']
1742
1743         # TODO Central sorting goes here
1744
1745         if formats[0] is not info_dict:
1746             # only set the 'formats' fields if the original info_dict list them
1747             # otherwise we end up with a circular reference, the first (and unique)
1748             # element in the 'formats' field in info_dict is info_dict itself,
1749             # which can't be exported to json
1750             info_dict['formats'] = formats
1751         if self.params.get('listformats'):
1752             self.list_formats(info_dict)
1753             return
1754
1755         req_format = self.params.get('format')
1756         if req_format is None:
1757             req_format = self._default_format_spec(info_dict, download=download)
1758             if self.params.get('verbose'):
1759                 self._write_string('[debug] Default format spec: %s\n' % req_format)
1760
1761         format_selector = self.build_format_selector(req_format)
1762
1763         # While in format selection we may need to have an access to the original
1764         # format set in order to calculate some metrics or do some processing.
1765         # For now we need to be able to guess whether original formats provided
1766         # by extractor are incomplete or not (i.e. whether extractor provides only
1767         # video-only or audio-only formats) for proper formats selection for
1768         # extractors with such incomplete formats (see
1769         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1770         # Since formats may be filtered during format selection and may not match
1771         # the original formats the results may be incorrect. Thus original formats
1772         # or pre-calculated metrics should be passed to format selection routines
1773         # as well.
1774         # We will pass a context object containing all necessary additional data
1775         # instead of just formats.
1776         # This fixes incorrect format selection issue (see
1777         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1778         incomplete_formats = (
1779             # All formats are video-only or
1780             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1781             # all formats are audio-only
1782             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1783
1784         ctx = {
1785             'formats': formats,
1786             'incomplete_formats': incomplete_formats,
1787         }
1788
1789         formats_to_download = list(format_selector(ctx))
1790         if not formats_to_download:
1791             raise ExtractorError('requested format not available',
1792                                  expected=True)
1793
1794         if download:
1795             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1796             if len(formats_to_download) > 1:
1797                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1798             for format in formats_to_download:
1799                 new_info = dict(info_dict)
1800                 new_info.update(format)
1801                 self.process_info(new_info)
1802         # We update the info dict with the best quality format (backwards compatibility)
1803         info_dict.update(formats_to_download[-1])
1804         return info_dict
1805
1806     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1807         """Select the requested subtitles and their format"""
1808         available_subs = {}
1809         if normal_subtitles and self.params.get('writesubtitles'):
1810             available_subs.update(normal_subtitles)
1811         if automatic_captions and self.params.get('writeautomaticsub'):
1812             for lang, cap_info in automatic_captions.items():
1813                 if lang not in available_subs:
1814                     available_subs[lang] = cap_info
1815
1816         if (not self.params.get('writesubtitles') and not
1817                 self.params.get('writeautomaticsub') or not
1818                 available_subs):
1819             return None
1820
1821         if self.params.get('allsubtitles', False):
1822             requested_langs = available_subs.keys()
1823         else:
1824             if self.params.get('subtitleslangs', False):
1825                 requested_langs = self.params.get('subtitleslangs')
1826             elif 'en' in available_subs:
1827                 requested_langs = ['en']
1828             else:
1829                 requested_langs = [list(available_subs.keys())[0]]
1830
1831         formats_query = self.params.get('subtitlesformat', 'best')
1832         formats_preference = formats_query.split('/') if formats_query else []
1833         subs = {}
1834         for lang in requested_langs:
1835             formats = available_subs.get(lang)
1836             if formats is None:
1837                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1838                 continue
1839             for ext in formats_preference:
1840                 if ext == 'best':
1841                     f = formats[-1]
1842                     break
1843                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1844                 if matches:
1845                     f = matches[-1]
1846                     break
1847             else:
1848                 f = formats[-1]
1849                 self.report_warning(
1850                     'No subtitle format found matching "%s" for language %s, '
1851                     'using %s' % (formats_query, lang, f['ext']))
1852             subs[lang] = f
1853         return subs
1854
1855     def __forced_printings(self, info_dict, filename, incomplete):
1856         def print_mandatory(field):
1857             if (self.params.get('force%s' % field, False)
1858                     and (not incomplete or info_dict.get(field) is not None)):
1859                 self.to_stdout(info_dict[field])
1860
1861         def print_optional(field):
1862             if (self.params.get('force%s' % field, False)
1863                     and info_dict.get(field) is not None):
1864                 self.to_stdout(info_dict[field])
1865
1866         print_mandatory('title')
1867         print_mandatory('id')
1868         if self.params.get('forceurl', False) and not incomplete:
1869             if info_dict.get('requested_formats') is not None:
1870                 for f in info_dict['requested_formats']:
1871                     self.to_stdout(f['url'] + f.get('play_path', ''))
1872             else:
1873                 # For RTMP URLs, also include the playpath
1874                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1875         print_optional('thumbnail')
1876         print_optional('description')
1877         if self.params.get('forcefilename', False) and filename is not None:
1878             self.to_stdout(filename)
1879         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1880             self.to_stdout(formatSeconds(info_dict['duration']))
1881         print_mandatory('format')
1882         if self.params.get('forcejson', False):
1883             self.to_stdout(json.dumps(info_dict))
1884
1885     def process_info(self, info_dict):
1886         """Process a single resolved IE result."""
1887
1888         assert info_dict.get('_type', 'video') == 'video'
1889
1890         max_downloads = self.params.get('max_downloads')
1891         if max_downloads is not None:
1892             if self._num_downloads >= int(max_downloads):
1893                 raise MaxDownloadsReached()
1894
1895         # TODO: backward compatibility, to be removed
1896         info_dict['fulltitle'] = info_dict['title']
1897
1898         if 'format' not in info_dict:
1899             info_dict['format'] = info_dict['ext']
1900
1901         if self._match_entry(info_dict, incomplete=False) is not None:
1902             return
1903
1904         self._num_downloads += 1
1905
1906         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1907
1908         # Forced printings
1909         self.__forced_printings(info_dict, filename, incomplete=False)
1910
1911         if self.params.get('simulate', False):
1912             if self.params.get('force_write_download_archive', False):
1913                 self.record_download_archive(info_dict)
1914
1915             # Do nothing else if in simulate mode
1916             return
1917
1918         if filename is None:
1919             return
1920
1921         def ensure_dir_exists(path):
1922             try:
1923                 dn = os.path.dirname(path)
1924                 if dn and not os.path.exists(dn):
1925                     os.makedirs(dn)
1926                 return True
1927             except (OSError, IOError) as err:
1928                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1929                 return False
1930
1931         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1932             return
1933
1934         if self.params.get('writedescription', False):
1935             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1936             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1937                 self.to_screen('[info] Video description is already present')
1938             elif info_dict.get('description') is None:
1939                 self.report_warning('There\'s no description to write.')
1940             else:
1941                 try:
1942                     self.to_screen('[info] Writing video description to: ' + descfn)
1943                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1944                         descfile.write(info_dict['description'])
1945                 except (OSError, IOError):
1946                     self.report_error('Cannot write description file ' + descfn)
1947                     return
1948
1949         if self.params.get('writeannotations', False):
1950             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1951             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
1952                 self.to_screen('[info] Video annotations are already present')
1953             elif not info_dict.get('annotations'):
1954                 self.report_warning('There are no annotations to write.')
1955             else:
1956                 try:
1957                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1958                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1959                         annofile.write(info_dict['annotations'])
1960                 except (KeyError, TypeError):
1961                     self.report_warning('There are no annotations to write.')
1962                 except (OSError, IOError):
1963                     self.report_error('Cannot write annotations file: ' + annofn)
1964                     return
1965
1966         def dl(name, info, subtitle=False):
1967             fd = get_suitable_downloader(info, self.params)(self, self.params)
1968             for ph in self._progress_hooks:
1969                 fd.add_progress_hook(ph)
1970             if self.params.get('verbose'):
1971                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1972             return fd.download(name, info, subtitle)
1973
1974         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1975                                        self.params.get('writeautomaticsub')])
1976
1977         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1978             # subtitles download errors are already managed as troubles in relevant IE
1979             # that way it will silently go on when used with unsupporting IE
1980             subtitles = info_dict['requested_subtitles']
1981             # ie = self.get_info_extractor(info_dict['extractor_key'])
1982             for sub_lang, sub_info in subtitles.items():
1983                 sub_format = sub_info['ext']
1984                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1985                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
1986                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1987                 else:
1988                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1989                     if sub_info.get('data') is not None:
1990                         try:
1991                             # Use newline='' to prevent conversion of newline characters
1992                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1993                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1994                                 subfile.write(sub_info['data'])
1995                         except (OSError, IOError):
1996                             self.report_error('Cannot write subtitles file ' + sub_filename)
1997                             return
1998                     else:
1999                         try:
2000                             dl(sub_filename, sub_info, subtitle=True)
2001                             '''
2002                             if self.params.get('sleep_interval_subtitles', False):
2003                                 dl(sub_filename, sub_info)
2004                             else:
2005                                 sub_data = ie._request_webpage(
2006                                     sub_info['url'], info_dict['id'], note=False).read()
2007                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2008                                     subfile.write(sub_data)
2009                             '''
2010                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2011                             self.report_warning('Unable to download subtitle for "%s": %s' %
2012                                                 (sub_lang, error_to_compat_str(err)))
2013                             continue
2014
2015         if self.params.get('skip_download', False):
2016             if self.params.get('convertsubtitles', False):
2017                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2018                 filename_real_ext = os.path.splitext(filename)[1][1:]
2019                 filename_wo_ext = (
2020                     os.path.splitext(filename)[0]
2021                     if filename_real_ext == info_dict['ext']
2022                     else filename)
2023                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2024                 if subconv.available:
2025                     info_dict.setdefault('__postprocessors', [])
2026                     # info_dict['__postprocessors'].append(subconv)
2027                 if os.path.exists(encodeFilename(afilename)):
2028                     self.to_screen(
2029                         '[download] %s has already been downloaded and '
2030                         'converted' % afilename)
2031                 else:
2032                     try:
2033                         self.post_process(filename, info_dict)
2034                     except (PostProcessingError) as err:
2035                         self.report_error('postprocessing: %s' % str(err))
2036                         return
2037
2038         if self.params.get('writeinfojson', False):
2039             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
2040             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2041                 self.to_screen('[info] Video description metadata is already present')
2042             else:
2043                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
2044                 try:
2045                     write_json_file(self.filter_requested_info(info_dict), infofn)
2046                 except (OSError, IOError):
2047                     self.report_error('Cannot write metadata to JSON file ' + infofn)
2048                     return
2049
2050         self._write_thumbnails(info_dict, filename)
2051
2052         # Write internet shortcut files
2053         url_link = webloc_link = desktop_link = False
2054         if self.params.get('writelink', False):
2055             if sys.platform == "darwin":  # macOS.
2056                 webloc_link = True
2057             elif sys.platform.startswith("linux"):
2058                 desktop_link = True
2059             else:  # if sys.platform in ['win32', 'cygwin']:
2060                 url_link = True
2061         if self.params.get('writeurllink', False):
2062             url_link = True
2063         if self.params.get('writewebloclink', False):
2064             webloc_link = True
2065         if self.params.get('writedesktoplink', False):
2066             desktop_link = True
2067
2068         if url_link or webloc_link or desktop_link:
2069             if 'webpage_url' not in info_dict:
2070                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2071                 return
2072             ascii_url = iri_to_uri(info_dict['webpage_url'])
2073
2074         def _write_link_file(extension, template, newline, embed_filename):
2075             linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2076             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2077                 self.to_screen('[info] Internet shortcut is already present')
2078             else:
2079                 try:
2080                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2081                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2082                         template_vars = {'url': ascii_url}
2083                         if embed_filename:
2084                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2085                         linkfile.write(template % template_vars)
2086                 except (OSError, IOError):
2087                     self.report_error('Cannot write internet shortcut ' + linkfn)
2088                     return False
2089             return True
2090
2091         if url_link:
2092             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2093                 return
2094         if webloc_link:
2095             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2096                 return
2097         if desktop_link:
2098             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2099                 return
2100
2101         # Download
2102         must_record_download_archive = False
2103         if not self.params.get('skip_download', False):
2104             try:
2105                 if info_dict.get('requested_formats') is not None:
2106                     downloaded = []
2107                     success = True
2108                     merger = FFmpegMergerPP(self)
2109                     if not merger.available:
2110                         postprocessors = []
2111                         self.report_warning('You have requested multiple '
2112                                             'formats but ffmpeg or avconv are not installed.'
2113                                             ' The formats won\'t be merged.')
2114                     else:
2115                         postprocessors = [merger]
2116
2117                     def compatible_formats(formats):
2118                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2119                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2120                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2121                         if len(video_formats) > 2 or len(audio_formats) > 2:
2122                             return False
2123
2124                         # Check extension
2125                         exts = set(format.get('ext') for format in formats)
2126                         COMPATIBLE_EXTS = (
2127                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2128                             set(('webm',)),
2129                         )
2130                         for ext_sets in COMPATIBLE_EXTS:
2131                             if ext_sets.issuperset(exts):
2132                                 return True
2133                         # TODO: Check acodec/vcodec
2134                         return False
2135
2136                     filename_real_ext = os.path.splitext(filename)[1][1:]
2137                     filename_wo_ext = (
2138                         os.path.splitext(filename)[0]
2139                         if filename_real_ext == info_dict['ext']
2140                         else filename)
2141                     requested_formats = info_dict['requested_formats']
2142                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2143                         info_dict['ext'] = 'mkv'
2144                         self.report_warning(
2145                             'Requested formats are incompatible for merge and will be merged into mkv.')
2146                     # Ensure filename always has a correct extension for successful merge
2147                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2148                     file_exists = os.path.exists(encodeFilename(filename))
2149                     if not self.params.get('overwrites', False) and file_exists:
2150                         self.to_screen(
2151                             '[download] %s has already been downloaded and '
2152                             'merged' % filename)
2153                     else:
2154                         if file_exists:
2155                             self.report_file_delete(filename)
2156                             os.remove(encodeFilename(filename))
2157                         for f in requested_formats:
2158                             new_info = dict(info_dict)
2159                             new_info.update(f)
2160                             fname = prepend_extension(
2161                                 self.prepare_filename(new_info),
2162                                 'f%s' % f['format_id'], new_info['ext'])
2163                             if not ensure_dir_exists(fname):
2164                                 return
2165                             downloaded.append(fname)
2166                             partial_success, real_download = dl(fname, new_info)
2167                             success = success and partial_success
2168                         info_dict['__postprocessors'] = postprocessors
2169                         info_dict['__files_to_merge'] = downloaded
2170                         # Even if there were no downloads, it is being merged only now
2171                         info_dict['__real_download'] = True
2172                 else:
2173                     # Delete existing file with --yes-overwrites
2174                     if self.params.get('overwrites', False):
2175                         if os.path.exists(encodeFilename(filename)):
2176                             self.report_file_delete(filename)
2177                             os.remove(encodeFilename(filename))
2178                     # Just a single file
2179                     success, real_download = dl(filename, info_dict)
2180                     info_dict['__real_download'] = real_download
2181             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2182                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2183                 return
2184             except (OSError, IOError) as err:
2185                 raise UnavailableVideoError(err)
2186             except (ContentTooShortError, ) as err:
2187                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2188                 return
2189
2190             if success and filename != '-':
2191                 # Fixup content
2192                 fixup_policy = self.params.get('fixup')
2193                 if fixup_policy is None:
2194                     fixup_policy = 'detect_or_warn'
2195
2196                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2197
2198                 stretched_ratio = info_dict.get('stretched_ratio')
2199                 if stretched_ratio is not None and stretched_ratio != 1:
2200                     if fixup_policy == 'warn':
2201                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2202                             info_dict['id'], stretched_ratio))
2203                     elif fixup_policy == 'detect_or_warn':
2204                         stretched_pp = FFmpegFixupStretchedPP(self)
2205                         if stretched_pp.available:
2206                             info_dict.setdefault('__postprocessors', [])
2207                             info_dict['__postprocessors'].append(stretched_pp)
2208                         else:
2209                             self.report_warning(
2210                                 '%s: Non-uniform pixel ratio (%s). %s'
2211                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2212                     else:
2213                         assert fixup_policy in ('ignore', 'never')
2214
2215                 if (info_dict.get('requested_formats') is None
2216                         and info_dict.get('container') == 'm4a_dash'):
2217                     if fixup_policy == 'warn':
2218                         self.report_warning(
2219                             '%s: writing DASH m4a. '
2220                             'Only some players support this container.'
2221                             % info_dict['id'])
2222                     elif fixup_policy == 'detect_or_warn':
2223                         fixup_pp = FFmpegFixupM4aPP(self)
2224                         if fixup_pp.available:
2225                             info_dict.setdefault('__postprocessors', [])
2226                             info_dict['__postprocessors'].append(fixup_pp)
2227                         else:
2228                             self.report_warning(
2229                                 '%s: writing DASH m4a. '
2230                                 'Only some players support this container. %s'
2231                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2232                     else:
2233                         assert fixup_policy in ('ignore', 'never')
2234
2235                 if (info_dict.get('protocol') == 'm3u8_native'
2236                         or info_dict.get('protocol') == 'm3u8'
2237                         and self.params.get('hls_prefer_native')):
2238                     if fixup_policy == 'warn':
2239                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2240                             info_dict['id']))
2241                     elif fixup_policy == 'detect_or_warn':
2242                         fixup_pp = FFmpegFixupM3u8PP(self)
2243                         if fixup_pp.available:
2244                             info_dict.setdefault('__postprocessors', [])
2245                             info_dict['__postprocessors'].append(fixup_pp)
2246                         else:
2247                             self.report_warning(
2248                                 '%s: malformed AAC bitstream detected. %s'
2249                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2250                     else:
2251                         assert fixup_policy in ('ignore', 'never')
2252
2253                 try:
2254                     self.post_process(filename, info_dict)
2255                 except (PostProcessingError) as err:
2256                     self.report_error('postprocessing: %s' % str(err))
2257                     return
2258                 try:
2259                     for ph in self._post_hooks:
2260                         ph(filename)
2261                 except Exception as err:
2262                     self.report_error('post hooks: %s' % str(err))
2263                     return
2264                 must_record_download_archive = True
2265
2266         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2267             self.record_download_archive(info_dict)
2268         max_downloads = self.params.get('max_downloads')
2269         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2270             raise MaxDownloadsReached()
2271
2272     def download(self, url_list):
2273         """Download a given list of URLs."""
2274         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2275         if (len(url_list) > 1
2276                 and outtmpl != '-'
2277                 and '%' not in outtmpl
2278                 and self.params.get('max_downloads') != 1):
2279             raise SameFileError(outtmpl)
2280
2281         for url in url_list:
2282             try:
2283                 # It also downloads the videos
2284                 res = self.extract_info(
2285                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2286             except UnavailableVideoError:
2287                 self.report_error('unable to download video')
2288             except MaxDownloadsReached:
2289                 self.to_screen('[info] Maximum number of downloaded files reached')
2290                 raise
2291             except ExistingVideoReached:
2292                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2293                 raise
2294             except RejectedVideoReached:
2295                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2296                 raise
2297             else:
2298                 if self.params.get('dump_single_json', False):
2299                     self.to_stdout(json.dumps(res))
2300
2301         return self._download_retcode
2302
2303     def download_with_info_file(self, info_filename):
2304         with contextlib.closing(fileinput.FileInput(
2305                 [info_filename], mode='r',
2306                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2307             # FileInput doesn't have a read method, we can't call json.load
2308             info = self.filter_requested_info(json.loads('\n'.join(f)))
2309         try:
2310             self.process_ie_result(info, download=True)
2311         except DownloadError:
2312             webpage_url = info.get('webpage_url')
2313             if webpage_url is not None:
2314                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2315                 return self.download([webpage_url])
2316             else:
2317                 raise
2318         return self._download_retcode
2319
2320     @staticmethod
2321     def filter_requested_info(info_dict):
2322         return dict(
2323             (k, v) for k, v in info_dict.items()
2324             if k not in ['requested_formats', 'requested_subtitles'])
2325
2326     def post_process(self, filename, ie_info):
2327         """Run all the postprocessors on the given file."""
2328         info = dict(ie_info)
2329         info['filepath'] = filename
2330         pps_chain = []
2331         if ie_info.get('__postprocessors') is not None:
2332             pps_chain.extend(ie_info['__postprocessors'])
2333         pps_chain.extend(self._pps)
2334         for pp in pps_chain:
2335             files_to_delete = []
2336             try:
2337                 files_to_delete, info = pp.run(info)
2338             except PostProcessingError as e:
2339                 self.report_error(e.msg)
2340             if files_to_delete and not self.params.get('keepvideo', False):
2341                 for old_filename in set(files_to_delete):
2342                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2343                     try:
2344                         os.remove(encodeFilename(old_filename))
2345                     except (IOError, OSError):
2346                         self.report_warning('Unable to remove downloaded original file')
2347
2348     def _make_archive_id(self, info_dict):
2349         video_id = info_dict.get('id')
2350         if not video_id:
2351             return
2352         # Future-proof against any change in case
2353         # and backwards compatibility with prior versions
2354         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2355         if extractor is None:
2356             url = str_or_none(info_dict.get('url'))
2357             if not url:
2358                 return
2359             # Try to find matching extractor for the URL and take its ie_key
2360             for ie in self._ies:
2361                 if ie.suitable(url):
2362                     extractor = ie.ie_key()
2363                     break
2364             else:
2365                 return
2366         return extractor.lower() + ' ' + video_id
2367
2368     def in_download_archive(self, info_dict):
2369         fn = self.params.get('download_archive')
2370         if fn is None:
2371             return False
2372
2373         vid_id = self._make_archive_id(info_dict)
2374         if not vid_id:
2375             return False  # Incomplete video information
2376
2377         return vid_id in self.archive
2378
2379     def record_download_archive(self, info_dict):
2380         fn = self.params.get('download_archive')
2381         if fn is None:
2382             return
2383         vid_id = self._make_archive_id(info_dict)
2384         assert vid_id
2385         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2386             archive_file.write(vid_id + '\n')
2387         self.archive.add(vid_id)
2388
2389     @staticmethod
2390     def format_resolution(format, default='unknown'):
2391         if format.get('vcodec') == 'none':
2392             return 'audio only'
2393         if format.get('resolution') is not None:
2394             return format['resolution']
2395         if format.get('height') is not None:
2396             if format.get('width') is not None:
2397                 res = '%sx%s' % (format['width'], format['height'])
2398             else:
2399                 res = '%sp' % format['height']
2400         elif format.get('width') is not None:
2401             res = '%dx?' % format['width']
2402         else:
2403             res = default
2404         return res
2405
2406     def _format_note(self, fdict):
2407         res = ''
2408         if fdict.get('ext') in ['f4f', 'f4m']:
2409             res += '(unsupported) '
2410         if fdict.get('language'):
2411             if res:
2412                 res += ' '
2413             res += '[%s] ' % fdict['language']
2414         if fdict.get('format_note') is not None:
2415             res += fdict['format_note'] + ' '
2416         if fdict.get('tbr') is not None:
2417             res += '%4dk ' % fdict['tbr']
2418         if fdict.get('container') is not None:
2419             if res:
2420                 res += ', '
2421             res += '%s container' % fdict['container']
2422         if (fdict.get('vcodec') is not None
2423                 and fdict.get('vcodec') != 'none'):
2424             if res:
2425                 res += ', '
2426             res += fdict['vcodec']
2427             if fdict.get('vbr') is not None:
2428                 res += '@'
2429         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2430             res += 'video@'
2431         if fdict.get('vbr') is not None:
2432             res += '%4dk' % fdict['vbr']
2433         if fdict.get('fps') is not None:
2434             if res:
2435                 res += ', '
2436             res += '%sfps' % fdict['fps']
2437         if fdict.get('acodec') is not None:
2438             if res:
2439                 res += ', '
2440             if fdict['acodec'] == 'none':
2441                 res += 'video only'
2442             else:
2443                 res += '%-5s' % fdict['acodec']
2444         elif fdict.get('abr') is not None:
2445             if res:
2446                 res += ', '
2447             res += 'audio'
2448         if fdict.get('abr') is not None:
2449             res += '@%3dk' % fdict['abr']
2450         if fdict.get('asr') is not None:
2451             res += ' (%5dHz)' % fdict['asr']
2452         if fdict.get('filesize') is not None:
2453             if res:
2454                 res += ', '
2455             res += format_bytes(fdict['filesize'])
2456         elif fdict.get('filesize_approx') is not None:
2457             if res:
2458                 res += ', '
2459             res += '~' + format_bytes(fdict['filesize_approx'])
2460         return res
2461
2462     def _format_note_table(self, f):
2463         def join_fields(*vargs):
2464             return ', '.join((val for val in vargs if val != ''))
2465
2466         return join_fields(
2467             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2468             format_field(f, 'language', '[%s]'),
2469             format_field(f, 'format_note'),
2470             format_field(f, 'container', ignore=(None, f.get('ext'))),
2471             format_field(f, 'asr', '%5dHz'))
2472
2473     def list_formats(self, info_dict):
2474         formats = info_dict.get('formats', [info_dict])
2475         new_format = self.params.get('listformats_table', False)
2476         if new_format:
2477             table = [
2478                 [
2479                     format_field(f, 'format_id'),
2480                     format_field(f, 'ext'),
2481                     self.format_resolution(f),
2482                     format_field(f, 'fps', '%d'),
2483                     '|',
2484                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2485                     format_field(f, 'tbr', '%4dk'),
2486                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2487                     '|',
2488                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2489                     format_field(f, 'vbr', '%4dk'),
2490                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2491                     format_field(f, 'abr', '%3dk'),
2492                     format_field(f, 'asr', '%5dHz'),
2493                     self._format_note_table(f)]
2494                 for f in formats
2495                 if f.get('preference') is None or f['preference'] >= -1000]
2496             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2497                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2498         else:
2499             table = [
2500                 [
2501                     format_field(f, 'format_id'),
2502                     format_field(f, 'ext'),
2503                     self.format_resolution(f),
2504                     self._format_note(f)]
2505                 for f in formats
2506                 if f.get('preference') is None or f['preference'] >= -1000]
2507             header_line = ['format code', 'extension', 'resolution', 'note']
2508
2509         # if len(formats) > 1:
2510         #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2511         self.to_screen(
2512             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2513                 header_line,
2514                 table,
2515                 delim=new_format,
2516                 extraGap=(0 if new_format else 1),
2517                 hideEmpty=new_format)))
2518
2519     def list_thumbnails(self, info_dict):
2520         thumbnails = info_dict.get('thumbnails')
2521         if not thumbnails:
2522             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2523             return
2524
2525         self.to_screen(
2526             '[info] Thumbnails for %s:' % info_dict['id'])
2527         self.to_screen(render_table(
2528             ['ID', 'width', 'height', 'URL'],
2529             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2530
2531     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2532         if not subtitles:
2533             self.to_screen('%s has no %s' % (video_id, name))
2534             return
2535         self.to_screen(
2536             'Available %s for %s:' % (name, video_id))
2537         self.to_screen(render_table(
2538             ['Language', 'formats'],
2539             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2540                 for lang, formats in subtitles.items()]))
2541
2542     def urlopen(self, req):
2543         """ Start an HTTP download """
2544         if isinstance(req, compat_basestring):
2545             req = sanitized_Request(req)
2546         return self._opener.open(req, timeout=self._socket_timeout)
2547
2548     def print_debug_header(self):
2549         if not self.params.get('verbose'):
2550             return
2551
2552         if type('') is not compat_str:
2553             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2554             self.report_warning(
2555                 'Your Python is broken! Update to a newer and supported version')
2556
2557         stdout_encoding = getattr(
2558             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2559         encoding_str = (
2560             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2561                 locale.getpreferredencoding(),
2562                 sys.getfilesystemencoding(),
2563                 stdout_encoding,
2564                 self.get_encoding()))
2565         write_string(encoding_str, encoding=None)
2566
2567         self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
2568         if _LAZY_LOADER:
2569             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2570         try:
2571             sp = subprocess.Popen(
2572                 ['git', 'rev-parse', '--short', 'HEAD'],
2573                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2574                 cwd=os.path.dirname(os.path.abspath(__file__)))
2575             out, err = process_communicate_or_kill(sp)
2576             out = out.decode().strip()
2577             if re.match('[0-9a-f]+', out):
2578                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2579         except Exception:
2580             try:
2581                 sys.exc_clear()
2582             except Exception:
2583                 pass
2584
2585         def python_implementation():
2586             impl_name = platform.python_implementation()
2587             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2588                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2589             return impl_name
2590
2591         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2592             platform.python_version(), python_implementation(),
2593             platform_name()))
2594
2595         exe_versions = FFmpegPostProcessor.get_versions(self)
2596         exe_versions['rtmpdump'] = rtmpdump_version()
2597         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2598         exe_str = ', '.join(
2599             '%s %s' % (exe, v)
2600             for exe, v in sorted(exe_versions.items())
2601             if v
2602         )
2603         if not exe_str:
2604             exe_str = 'none'
2605         self._write_string('[debug] exe versions: %s\n' % exe_str)
2606
2607         proxy_map = {}
2608         for handler in self._opener.handlers:
2609             if hasattr(handler, 'proxies'):
2610                 proxy_map.update(handler.proxies)
2611         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2612
2613         if self.params.get('call_home', False):
2614             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2615             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2616             return
2617             latest_version = self.urlopen(
2618                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2619             if version_tuple(latest_version) > version_tuple(__version__):
2620                 self.report_warning(
2621                     'You are using an outdated version (newest version: %s)! '
2622                     'See https://yt-dl.org/update if you need help updating.' %
2623                     latest_version)
2624
2625     def _setup_opener(self):
2626         timeout_val = self.params.get('socket_timeout')
2627         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2628
2629         opts_cookiefile = self.params.get('cookiefile')
2630         opts_proxy = self.params.get('proxy')
2631
2632         if opts_cookiefile is None:
2633             self.cookiejar = compat_cookiejar.CookieJar()
2634         else:
2635             opts_cookiefile = expand_path(opts_cookiefile)
2636             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2637             if os.access(opts_cookiefile, os.R_OK):
2638                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2639
2640         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2641         if opts_proxy is not None:
2642             if opts_proxy == '':
2643                 proxies = {}
2644             else:
2645                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2646         else:
2647             proxies = compat_urllib_request.getproxies()
2648             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2649             if 'http' in proxies and 'https' not in proxies:
2650                 proxies['https'] = proxies['http']
2651         proxy_handler = PerRequestProxyHandler(proxies)
2652
2653         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2654         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2655         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2656         redirect_handler = YoutubeDLRedirectHandler()
2657         data_handler = compat_urllib_request_DataHandler()
2658
2659         # When passing our own FileHandler instance, build_opener won't add the
2660         # default FileHandler and allows us to disable the file protocol, which
2661         # can be used for malicious purposes (see
2662         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2663         file_handler = compat_urllib_request.FileHandler()
2664
2665         def file_open(*args, **kwargs):
2666             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2667         file_handler.file_open = file_open
2668
2669         opener = compat_urllib_request.build_opener(
2670             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2671
2672         # Delete the default user-agent header, which would otherwise apply in
2673         # cases where our custom HTTP handler doesn't come into play
2674         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2675         opener.addheaders = []
2676         self._opener = opener
2677
2678     def encode(self, s):
2679         if isinstance(s, bytes):
2680             return s  # Already encoded
2681
2682         try:
2683             return s.encode(self.get_encoding())
2684         except UnicodeEncodeError as err:
2685             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2686             raise
2687
2688     def get_encoding(self):
2689         encoding = self.params.get('encoding')
2690         if encoding is None:
2691             encoding = preferredencoding()
2692         return encoding
2693
2694     def _write_thumbnails(self, info_dict, filename):
2695         if self.params.get('writethumbnail', False):
2696             thumbnails = info_dict.get('thumbnails')
2697             if thumbnails:
2698                 thumbnails = [thumbnails[-1]]
2699         elif self.params.get('write_all_thumbnails', False):
2700             thumbnails = info_dict.get('thumbnails')
2701         else:
2702             return
2703
2704         if not thumbnails:
2705             # No thumbnails present, so return immediately
2706             return
2707
2708         for t in thumbnails:
2709             thumb_ext = determine_ext(t['url'], 'jpg')
2710             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2711             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2712             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2713
2714             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2715                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2716                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2717             else:
2718                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2719                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2720                 try:
2721                     uf = self.urlopen(t['url'])
2722                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2723                         shutil.copyfileobj(uf, thumbf)
2724                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2725                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2726                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2727                     self.report_warning('Unable to download thumbnail "%s": %s' %
2728                                         (t['url'], error_to_compat_str(err)))