youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     format_bytes,
  65     format_field,
  66     formatSeconds,
  67     GeoRestrictedError,
  68     int_or_none,
  69     iri_to_uri,
  70     ISO3166Utils,
  71     locked_file,
  72     make_dir,
  73     make_HTTPS_handler,
  74     MaxDownloadsReached,
  75     orderedSet,
  76     PagedList,
  77     parse_filesize,
  78     PerRequestProxyHandler,
  79     platform_name,
  80     PostProcessingError,
  81     preferredencoding,
  82     prepend_extension,
  83     register_socks_protocols,
  84     render_table,
  85     replace_extension,
  86     RejectedVideoReached,
  87     SameFileError,
  88     sanitize_filename,
  89     sanitize_path,
  90     sanitize_url,
  91     sanitized_Request,
  92     std_headers,
  93     str_or_none,
  94     subtitles_filename,
  95     to_high_limit_path,
  96     UnavailableVideoError,
  97     url_basename,
  98     version_tuple,
  99     write_json_file,
 100     write_string,
 101     YoutubeDLCookieJar,
 102     YoutubeDLCookieProcessor,
 103     YoutubeDLHandler,
 104     YoutubeDLRedirectHandler,
 105     process_communicate_or_kill,
 106 )
 107 from .cache import Cache
 108 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 109 from .extractor.openload import PhantomJSwrapper
 110 from .downloader import get_suitable_downloader
 111 from .downloader.rtmp import rtmpdump_version
 112 from .postprocessor import (
 113     FFmpegFixupM3u8PP,
 114     FFmpegFixupM4aPP,
 115     FFmpegFixupStretchedPP,
 116     FFmpegMergerPP,
 117     FFmpegPostProcessor,
 118     # FFmpegSubtitlesConvertorPP,
 119     get_postprocessor,
 120     MoveFilesAfterDownloadPP,
 121 )
 122 from .version import __version__
 123
 124 if compat_os_name == 'nt':
 125     import ctypes
 126
 127
 128 class YoutubeDL(object):
 129     """YoutubeDL class.
 130
 131     YoutubeDL objects are the ones responsible of downloading the
 132     actual video file and writing it to disk if the user has requested
 133     it, among some other tasks. In most cases there should be one per
 134     program. As, given a video URL, the downloader doesn't know how to
 135     extract all the needed information, task that InfoExtractors do, it
 136     has to pass the URL to one of them.
 137
 138     For this, YoutubeDL objects have a method that allows
 139     InfoExtractors to be registered in a given order. When it is passed
 140     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 141     finds that reports being able to handle it. The InfoExtractor extracts
 142     all the information about the video or videos the URL refers to, and
 143     YoutubeDL process the extracted information, possibly using a File
 144     Downloader to download the video.
 145
 146     YoutubeDL objects accept a lot of parameters. In order not to saturate
 147     the object constructor with arguments, it receives a dictionary of
 148     options instead. These options are available through the params
 149     attribute for the InfoExtractors to use. The YoutubeDL also
 150     registers itself as the downloader in charge for the InfoExtractors
 151     that are added to it, so this is a "mutual registration".
 152
 153     Available options:
 154
 155     username:          Username for authentication purposes.
 156     password:          Password for authentication purposes.
 157     videopassword:     Password for accessing a video.
 158     ap_mso:            Adobe Pass multiple-system operator identifier.
 159     ap_username:       Multiple-system operator account username.
 160     ap_password:       Multiple-system operator account password.
 161     usenetrc:          Use netrc for authentication instead.
 162     verbose:           Print additional info to stdout.
 163     quiet:             Do not print messages to stdout.
 164     no_warnings:       Do not print out anything for warnings.
 165     forceurl:          Force printing final URL.
 166     forcetitle:        Force printing title.
 167     forceid:           Force printing ID.
 168     forcethumbnail:    Force printing thumbnail URL.
 169     forcedescription:  Force printing description.
 170     forcefilename:     Force printing final filename.
 171     forceduration:     Force printing duration.
 172     forcejson:         Force printing info_dict as JSON.
 173     dump_single_json:  Force printing the info_dict of the whole playlist
 174                        (or video) as a single JSON line.
 175     force_write_download_archive: Force writing download archive regardless of
 176                        'skip_download' or 'simulate'.
 177     simulate:          Do not download the video files.
 178     format:            Video format code. see "FORMAT SELECTION" for more details.
 179     format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
 180     format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
 181     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
 182     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
 183     outtmpl:           Template for output names.
 184     restrictfilenames: Do not allow "&" and spaces in file names.
 185     trim_file_name:    Limit length of filename (extension excluded).
 186     ignoreerrors:      Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
 187     force_generic_extractor: Force downloader to use the generic extractor
 188     overwrites:        Overwrite all video and metadata files if True,
 189                        overwrite only non-video files if None
 190                        and don't overwrite any file if False
 191     playliststart:     Playlist item to start at.
 192     playlistend:       Playlist item to end at.
 193     playlist_items:    Specific indices of playlist to download.
 194     playlistreverse:   Download playlist items in reverse order.
 195     playlistrandom:    Download playlist items in random order.
 196     matchtitle:        Download only matching titles.
 197     rejecttitle:       Reject downloads for matching titles.
 198     logger:            Log messages to a logging.Logger instance.
 199     logtostderr:       Log messages to stderr instead of stdout.
 200     writedescription:  Write the video description to a .description file
 201     writeinfojson:     Write the video description to a .info.json file
 202     writeannotations:  Write the video annotations to a .annotations.xml file
 203     writethumbnail:    Write the thumbnail image to a file
 204     write_all_thumbnails:  Write all thumbnail formats to files
 205     writelink:         Write an internet shortcut file, depending on the
 206                        current platform (.url/.webloc/.desktop)
 207     writeurllink:      Write a Windows internet shortcut file (.url)
 208     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 209     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 210     writesubtitles:    Write the video subtitles to a file
 211     writeautomaticsub: Write the automatically generated subtitles to a file
 212     allsubtitles:      Downloads all the subtitles of the video
 213                        (requires writesubtitles or writeautomaticsub)
 214     listsubtitles:     Lists all available subtitles for the video
 215     subtitlesformat:   The format code for subtitles
 216     subtitleslangs:    List of languages of the subtitles to download
 217     keepvideo:         Keep the video file after post-processing
 218     daterange:         A DateRange object, download only if the upload_date is in the range.
 219     skip_download:     Skip the actual download of the video file
 220     cachedir:          Location of the cache files in the filesystem.
 221                        False to disable filesystem cache.
 222     noplaylist:        Download single video instead of a playlist if in doubt.
 223     age_limit:         An integer representing the user's age in years.
 224                        Unsuitable videos for the given age are skipped.
 225     min_views:         An integer representing the minimum view count the video
 226                        must have in order to not be skipped.
 227                        Videos without view count information are always
 228                        downloaded. None for no limit.
 229     max_views:         An integer representing the maximum view count.
 230                        Videos that are more popular than that are not
 231                        downloaded.
 232                        Videos without view count information are always
 233                        downloaded. None for no limit.
 234     download_archive:  File name of a file where all downloads are recorded.
 235                        Videos already present in the file are not downloaded
 236                        again.
 237     break_on_existing: Stop the download process after attempting to download a
 238                        file that is in the archive.
 239     break_on_reject:   Stop the download process when encountering a video that
 240                        has been filtered out.
 241     cookiefile:        File name where cookies should be read from and dumped to
 242     nocheckcertificate:Do not verify SSL certificates
 243     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 244                        At the moment, this is only supported by YouTube.
 245     proxy:             URL of the proxy server to use
 246     geo_verification_proxy:  URL of the proxy to use for IP address verification
 247                        on geo-restricted sites.
 248     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 249     bidi_workaround:   Work around buggy terminals without bidirectional text
 250                        support, using fridibi
 251     debug_printtraffic:Print out sent and received HTTP traffic
 252     include_ads:       Download ads as well
 253     default_search:    Prepend this string if an input url is not valid.
 254                        'auto' for elaborate guessing
 255     encoding:          Use this encoding instead of the system-specified.
 256     extract_flat:      Do not resolve URLs, return the immediate result.
 257                        Pass in 'in_playlist' to only show this behavior for
 258                        playlist items.
 259     postprocessors:    A list of dictionaries, each with an entry
 260                        * key:  The name of the postprocessor. See
 261                                youtube_dlc/postprocessor/__init__.py for a list.
 262                        * _after_move: Optional. If True, run this post_processor
 263                                after 'MoveFilesAfterDownload'
 264                        as well as any further keyword arguments for the
 265                        postprocessor.
 266     post_hooks:        A list of functions that get called as the final step
 267                        for each video file, after all postprocessors have been
 268                        called. The filename will be passed as the only argument.
 269     progress_hooks:    A list of functions that get called on download
 270                        progress, with a dictionary with the entries
 271                        * status: One of "downloading", "error", or "finished".
 272                                  Check this first and ignore unknown values.
 273
 274                        If status is one of "downloading", or "finished", the
 275                        following properties may also be present:
 276                        * filename: The final filename (always present)
 277                        * tmpfilename: The filename we're currently writing to
 278                        * downloaded_bytes: Bytes on disk
 279                        * total_bytes: Size of the whole file, None if unknown
 280                        * total_bytes_estimate: Guess of the eventual file size,
 281                                                None if unavailable.
 282                        * elapsed: The number of seconds since download started.
 283                        * eta: The estimated time in seconds, None if unknown
 284                        * speed: The download speed in bytes/second, None if
 285                                 unknown
 286                        * fragment_index: The counter of the currently
 287                                          downloaded video fragment.
 288                        * fragment_count: The number of fragments (= individual
 289                                          files that will be merged)
 290
 291                        Progress hooks are guaranteed to be called at least once
 292                        (with status "finished") if the download is successful.
 293     merge_output_format: Extension to use when merging formats.
 294     fixup:             Automatically correct known faults of the file.
 295                        One of:
 296                        - "never": do nothing
 297                        - "warn": only emit a warning
 298                        - "detect_or_warn": check whether we can do anything
 299                                            about it, warn otherwise (default)
 300     source_address:    Client-side IP address to bind to.
 301     call_home:         Boolean, true iff we are allowed to contact the
 302                        youtube-dlc servers for debugging.
 303     sleep_interval:    Number of seconds to sleep before each download when
 304                        used alone or a lower bound of a range for randomized
 305                        sleep before each download (minimum possible number
 306                        of seconds to sleep) when used along with
 307                        max_sleep_interval.
 308     max_sleep_interval:Upper bound of a range for randomized sleep before each
 309                        download (maximum possible number of seconds to sleep).
 310                        Must only be used along with sleep_interval.
 311                        Actual sleep time will be a random float from range
 312                        [sleep_interval; max_sleep_interval].
 313     listformats:       Print an overview of available video formats and exit.
 314     list_thumbnails:   Print a table of all thumbnails and exit.
 315     match_filter:      A function that gets called with the info_dict of
 316                        every video.
 317                        If it returns a message, the video is ignored.
 318                        If it returns None, the video is downloaded.
 319                        match_filter_func in utils.py is one example for this.
 320     no_color:          Do not emit color codes in output.
 321     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 322                        HTTP header
 323     geo_bypass_country:
 324                        Two-letter ISO 3166-2 country code that will be used for
 325                        explicit geographic restriction bypassing via faking
 326                        X-Forwarded-For HTTP header
 327     geo_bypass_ip_block:
 328                        IP range in CIDR notation that will be used similarly to
 329                        geo_bypass_country
 330
 331     The following options determine which downloader is picked:
 332     external_downloader: Executable of the external downloader to call.
 333                        None or unset for standard (built-in) downloader.
 334     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 335                        if True, otherwise use ffmpeg/avconv if False, otherwise
 336                        use downloader suggested by extractor if None.
 337
 338     The following parameters are not used by YoutubeDL itself, they are used by
 339     the downloader (see youtube_dlc/downloader/common.py):
 340     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 341     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 342     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 343     http_chunk_size.
 344
 345     The following options are used by the post processors:
 346     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 347                        otherwise prefer ffmpeg.
 348     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 349                        to the binary or its containing directory.
 350     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 351                         and a list of additional command-line arguments for the
 352                         postprocessor/executable. The dict can also have "PP+EXE" keys
 353                         which are used when the given exe is used by the given PP.
 354                         Use 'default' as the name for arguments to passed to all PP
 355     The following options are used by the Youtube extractor:
 356     youtube_include_dash_manifest: If True (default), DASH manifests and related
 357                         data will be downloaded and processed by extractor.
 358                         You can reduce network I/O by disabling it if you don't
 359                         care about DASH.
 360     """
 361
 362     _NUMERIC_FIELDS = set((
 363         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 364         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 365         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 366         'average_rating', 'comment_count', 'age_limit',
 367         'start_time', 'end_time',
 368         'chapter_number', 'season_number', 'episode_number',
 369         'track_number', 'disc_number', 'release_year',
 370         'playlist_index',
 371     ))
 372
 373     params = None
 374     _ies = []
 375     _pps = []
 376     _pps_end = []
 377     __prepare_filename_warned = False
 378     _download_retcode = None
 379     _num_downloads = None
 380     _playlist_level = 0
 381     _playlist_urls = set()
 382     _screen_file = None
 383
 384     def __init__(self, params=None, auto_init=True):
 385         """Create a FileDownloader object with the given options."""
 386         if params is None:
 387             params = {}
 388         self._ies = []
 389         self._ies_instances = {}
 390         self._pps = []
 391         self._pps_end = []
 392         self.__prepare_filename_warned = False
 393         self._post_hooks = []
 394         self._progress_hooks = []
 395         self._download_retcode = 0
 396         self._num_downloads = 0
 397         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 398         self._err_file = sys.stderr
 399         self.params = {
 400             # Default parameters
 401             'nocheckcertificate': False,
 402         }
 403         self.params.update(params)
 404         self.cache = Cache(self)
 405         self.archive = set()
 406
 407         """Preload the archive, if any is specified"""
 408         def preload_download_archive(self):
 409             fn = self.params.get('download_archive')
 410             if fn is None:
 411                 return False
 412             try:
 413                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 414                     for line in archive_file:
 415                         self.archive.add(line.strip())
 416             except IOError as ioe:
 417                 if ioe.errno != errno.ENOENT:
 418                     raise
 419                 return False
 420             return True
 421
 422         def check_deprecated(param, option, suggestion):
 423             if self.params.get(param) is not None:
 424                 self.report_warning(
 425                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 426                 return True
 427             return False
 428
 429         if self.params.get('verbose'):
 430             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 431
 432         preload_download_archive(self)
 433
 434         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 435             if self.params.get('geo_verification_proxy') is None:
 436                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 437
 438         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 439         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 440         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 441
 442         if params.get('bidi_workaround', False):
 443             try:
 444                 import pty
 445                 master, slave = pty.openpty()
 446                 width = compat_get_terminal_size().columns
 447                 if width is None:
 448                     width_args = []
 449                 else:
 450                     width_args = ['-w', str(width)]
 451                 sp_kwargs = dict(
 452                     stdin=subprocess.PIPE,
 453                     stdout=slave,
 454                     stderr=self._err_file)
 455                 try:
 456                     self._output_process = subprocess.Popen(
 457                         ['bidiv'] + width_args, **sp_kwargs
 458                     )
 459                 except OSError:
 460                     self._output_process = subprocess.Popen(
 461                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 462                 self._output_channel = os.fdopen(master, 'rb')
 463             except OSError as ose:
 464                 if ose.errno == errno.ENOENT:
 465                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 466                 else:
 467                     raise
 468
 469         if (sys.platform != 'win32'
 470                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 471                 and not params.get('restrictfilenames', False)):
 472             # Unicode filesystem API will throw errors (#1474, #13027)
 473             self.report_warning(
 474                 'Assuming --restrict-filenames since file system encoding '
 475                 'cannot encode all characters. '
 476                 'Set the LC_ALL environment variable to fix this.')
 477             self.params['restrictfilenames'] = True
 478
 479         if isinstance(params.get('outtmpl'), bytes):
 480             self.report_warning(
 481                 'Parameter outtmpl is bytes, but should be a unicode string. '
 482                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 483
 484         self._setup_opener()
 485
 486         if auto_init:
 487             self.print_debug_header()
 488             self.add_default_info_extractors()
 489
 490         for pp_def_raw in self.params.get('postprocessors', []):
 491             pp_class = get_postprocessor(pp_def_raw['key'])
 492             pp_def = dict(pp_def_raw)
 493             del pp_def['key']
 494             after_move = pp_def.get('_after_move', False)
 495             if '_after_move' in pp_def:
 496                 del pp_def['_after_move']
 497             pp = pp_class(self, **compat_kwargs(pp_def))
 498             self.add_post_processor(pp, after_move=after_move)
 499
 500         for ph in self.params.get('post_hooks', []):
 501             self.add_post_hook(ph)
 502
 503         for ph in self.params.get('progress_hooks', []):
 504             self.add_progress_hook(ph)
 505
 506         register_socks_protocols()
 507
 508     def warn_if_short_id(self, argv):
 509         # short YouTube ID starting with dash?
 510         idxs = [
 511             i for i, a in enumerate(argv)
 512             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 513         if idxs:
 514             correct_argv = (
 515                 ['youtube-dlc']
 516                 + [a for i, a in enumerate(argv) if i not in idxs]
 517                 + ['--'] + [argv[i] for i in idxs]
 518             )
 519             self.report_warning(
 520                 'Long argument string detected. '
 521                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 522                 args_to_str(correct_argv))
 523
 524     def add_info_extractor(self, ie):
 525         """Add an InfoExtractor object to the end of the list."""
 526         self._ies.append(ie)
 527         if not isinstance(ie, type):
 528             self._ies_instances[ie.ie_key()] = ie
 529             ie.set_downloader(self)
 530
 531     def get_info_extractor(self, ie_key):
 532         """
 533         Get an instance of an IE with name ie_key, it will try to get one from
 534         the _ies list, if there's no instance it will create a new one and add
 535         it to the extractor list.
 536         """
 537         ie = self._ies_instances.get(ie_key)
 538         if ie is None:
 539             ie = get_info_extractor(ie_key)()
 540             self.add_info_extractor(ie)
 541         return ie
 542
 543     def add_default_info_extractors(self):
 544         """
 545         Add the InfoExtractors returned by gen_extractors to the end of the list
 546         """
 547         for ie in gen_extractor_classes():
 548             self.add_info_extractor(ie)
 549
 550     def add_post_processor(self, pp, after_move=False):
 551         """Add a PostProcessor object to the end of the chain."""
 552         if after_move:
 553             self._pps_end.append(pp)
 554         else:
 555             self._pps.append(pp)
 556         pp.set_downloader(self)
 557
 558     def add_post_hook(self, ph):
 559         """Add the post hook"""
 560         self._post_hooks.append(ph)
 561
 562     def add_progress_hook(self, ph):
 563         """Add the progress hook (currently only for the file downloader)"""
 564         self._progress_hooks.append(ph)
 565
 566     def _bidi_workaround(self, message):
 567         if not hasattr(self, '_output_channel'):
 568             return message
 569
 570         assert hasattr(self, '_output_process')
 571         assert isinstance(message, compat_str)
 572         line_count = message.count('\n') + 1
 573         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 574         self._output_process.stdin.flush()
 575         res = ''.join(self._output_channel.readline().decode('utf-8')
 576                       for _ in range(line_count))
 577         return res[:-len('\n')]
 578
 579     def to_screen(self, message, skip_eol=False):
 580         """Print message to stdout if not in quiet mode."""
 581         return self.to_stdout(message, skip_eol, check_quiet=True)
 582
 583     def _write_string(self, s, out=None):
 584         write_string(s, out=out, encoding=self.params.get('encoding'))
 585
 586     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 587         """Print message to stdout if not in quiet mode."""
 588         if self.params.get('logger'):
 589             self.params['logger'].debug(message)
 590         elif not check_quiet or not self.params.get('quiet', False):
 591             message = self._bidi_workaround(message)
 592             terminator = ['\n', ''][skip_eol]
 593             output = message + terminator
 594
 595             self._write_string(output, self._screen_file)
 596
 597     def to_stderr(self, message):
 598         """Print message to stderr."""
 599         assert isinstance(message, compat_str)
 600         if self.params.get('logger'):
 601             self.params['logger'].error(message)
 602         else:
 603             message = self._bidi_workaround(message)
 604             output = message + '\n'
 605             self._write_string(output, self._err_file)
 606
 607     def to_console_title(self, message):
 608         if not self.params.get('consoletitle', False):
 609             return
 610         if compat_os_name == 'nt':
 611             if ctypes.windll.kernel32.GetConsoleWindow():
 612                 # c_wchar_p() might not be necessary if `message` is
 613                 # already of type unicode()
 614                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 615         elif 'TERM' in os.environ:
 616             self._write_string('\033]0;%s\007' % message, self._screen_file)
 617
 618     def save_console_title(self):
 619         if not self.params.get('consoletitle', False):
 620             return
 621         if self.params.get('simulate', False):
 622             return
 623         if compat_os_name != 'nt' and 'TERM' in os.environ:
 624             # Save the title on stack
 625             self._write_string('\033[22;0t', self._screen_file)
 626
 627     def restore_console_title(self):
 628         if not self.params.get('consoletitle', False):
 629             return
 630         if self.params.get('simulate', False):
 631             return
 632         if compat_os_name != 'nt' and 'TERM' in os.environ:
 633             # Restore the title from stack
 634             self._write_string('\033[23;0t', self._screen_file)
 635
 636     def __enter__(self):
 637         self.save_console_title()
 638         return self
 639
 640     def __exit__(self, *args):
 641         self.restore_console_title()
 642
 643         if self.params.get('cookiefile') is not None:
 644             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 645
 646     def trouble(self, message=None, tb=None):
 647         """Determine action to take when a download problem appears.
 648
 649         Depending on if the downloader has been configured to ignore
 650         download errors or not, this method may throw an exception or
 651         not when errors are found, after printing the message.
 652
 653         tb, if given, is additional traceback information.
 654         """
 655         if message is not None:
 656             self.to_stderr(message)
 657         if self.params.get('verbose'):
 658             if tb is None:
 659                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 660                     tb = ''
 661                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 662                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 663                     tb += encode_compat_str(traceback.format_exc())
 664                 else:
 665                     tb_data = traceback.format_list(traceback.extract_stack())
 666                     tb = ''.join(tb_data)
 667             self.to_stderr(tb)
 668         if not self.params.get('ignoreerrors', False):
 669             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 670                 exc_info = sys.exc_info()[1].exc_info
 671             else:
 672                 exc_info = sys.exc_info()
 673             raise DownloadError(message, exc_info)
 674         self._download_retcode = 1
 675
 676     def report_warning(self, message):
 677         '''
 678         Print the message to stderr, it will be prefixed with 'WARNING:'
 679         If stderr is a tty file the 'WARNING:' will be colored
 680         '''
 681         if self.params.get('logger') is not None:
 682             self.params['logger'].warning(message)
 683         else:
 684             if self.params.get('no_warnings'):
 685                 return
 686             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 687                 _msg_header = '\033[0;33mWARNING:\033[0m'
 688             else:
 689                 _msg_header = 'WARNING:'
 690             warning_message = '%s %s' % (_msg_header, message)
 691             self.to_stderr(warning_message)
 692
 693     def report_error(self, message, tb=None):
 694         '''
 695         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 696         in red if stderr is a tty file.
 697         '''
 698         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 699             _msg_header = '\033[0;31mERROR:\033[0m'
 700         else:
 701             _msg_header = 'ERROR:'
 702         error_message = '%s %s' % (_msg_header, message)
 703         self.trouble(error_message, tb)
 704
 705     def report_file_already_downloaded(self, file_name):
 706         """Report file has already been fully downloaded."""
 707         try:
 708             self.to_screen('[download] %s has already been downloaded' % file_name)
 709         except UnicodeEncodeError:
 710             self.to_screen('[download] The file has already been downloaded')
 711
 712     def report_file_delete(self, file_name):
 713         """Report that existing file will be deleted."""
 714         try:
 715             self.to_screen('Deleting already existent file %s' % file_name)
 716         except UnicodeEncodeError:
 717             self.to_screen('Deleting already existent file')
 718
 719     def prepare_filename(self, info_dict, warn=False):
 720         """Generate the output filename."""
 721         try:
 722             template_dict = dict(info_dict)
 723
 724             template_dict['epoch'] = int(time.time())
 725             autonumber_size = self.params.get('autonumber_size')
 726             if autonumber_size is None:
 727                 autonumber_size = 5
 728             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 729             if template_dict.get('resolution') is None:
 730                 if template_dict.get('width') and template_dict.get('height'):
 731                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 732                 elif template_dict.get('height'):
 733                     template_dict['resolution'] = '%sp' % template_dict['height']
 734                 elif template_dict.get('width'):
 735                     template_dict['resolution'] = '%dx?' % template_dict['width']
 736
 737             sanitize = lambda k, v: sanitize_filename(
 738                 compat_str(v),
 739                 restricted=self.params.get('restrictfilenames'),
 740                 is_id=(k == 'id' or k.endswith('_id')))
 741             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 742                                  for k, v in template_dict.items()
 743                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 744             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 745
 746             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 747
 748             # For fields playlist_index and autonumber convert all occurrences
 749             # of %(field)s to %(field)0Nd for backward compatibility
 750             field_size_compat_map = {
 751                 'playlist_index': len(str(template_dict['n_entries'])),
 752                 'autonumber': autonumber_size,
 753             }
 754             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 755             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 756             if mobj:
 757                 outtmpl = re.sub(
 758                     FIELD_SIZE_COMPAT_RE,
 759                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 760                     outtmpl)
 761
 762             # Missing numeric fields used together with integer presentation types
 763             # in format specification will break the argument substitution since
 764             # string 'NA' is returned for missing fields. We will patch output
 765             # template for missing fields to meet string presentation type.
 766             for numeric_field in self._NUMERIC_FIELDS:
 767                 if numeric_field not in template_dict:
 768                     # As of [1] format syntax is:
 769                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 770                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 771                     FORMAT_RE = r'''(?x)
 772                         (?<!%)
 773                         %
 774                         \({0}\)  # mapping key
 775                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 776                         (?:\d+)?  # minimum field width (optional)
 777                         (?:\.\d+)?  # precision (optional)
 778                         [hlL]?  # length modifier (optional)
 779                         [diouxXeEfFgGcrs%]  # conversion type
 780                     '''
 781                     outtmpl = re.sub(
 782                         FORMAT_RE.format(numeric_field),
 783                         r'%({0})s'.format(numeric_field), outtmpl)
 784
 785             # expand_path translates '%%' into '%' and '$$' into '$'
 786             # correspondingly that is not what we want since we need to keep
 787             # '%%' intact for template dict substitution step. Working around
 788             # with boundary-alike separator hack.
 789             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 790             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 791
 792             # outtmpl should be expand_path'ed before template dict substitution
 793             # because meta fields may contain env variables we don't want to
 794             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 795             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 796             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 797
 798             # https://github.com/blackjack4494/youtube-dlc/issues/85
 799             trim_file_name = self.params.get('trim_file_name', False)
 800             if trim_file_name:
 801                 fn_groups = filename.rsplit('.')
 802                 ext = fn_groups[-1]
 803                 sub_ext = ''
 804                 if len(fn_groups) > 2:
 805                     sub_ext = fn_groups[-2]
 806                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 807
 808             # Temporary fix for #4787
 809             # 'Treat' all problem characters by passing filename through preferredencoding
 810             # to workaround encoding issues with subprocess on python2 @ Windows
 811             if sys.version_info < (3, 0) and sys.platform == 'win32':
 812                 filename = encodeFilename(filename, True).decode(preferredencoding())
 813             filename = sanitize_path(filename)
 814
 815             if warn and not self.__prepare_filename_warned:
 816                 if not self.params.get('paths'):
 817                     pass
 818                 elif filename == '-':
 819                     self.report_warning('--paths is ignored when an outputting to stdout')
 820                 elif os.path.isabs(filename):
 821                     self.report_warning('--paths is ignored since an absolute path is given in output template')
 822                 self.__prepare_filename_warned = True
 823
 824             return filename
 825         except ValueError as err:
 826             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 827             return None
 828
 829     def prepare_filepath(self, filename, dir_type=''):
 830         if filename == '-':
 831             return filename
 832         paths = self.params.get('paths', {})
 833         assert isinstance(paths, dict)
 834         homepath = expand_path(paths.get('home', '').strip())
 835         assert isinstance(homepath, compat_str)
 836         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 837         assert isinstance(subdir, compat_str)
 838         return sanitize_path(os.path.join(homepath, subdir, filename))
 839
 840     def _match_entry(self, info_dict, incomplete):
 841         """ Returns None if the file should be downloaded """
 842
 843         def check_filter():
 844             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 845             if 'title' in info_dict:
 846                 # This can happen when we're just evaluating the playlist
 847                 title = info_dict['title']
 848                 matchtitle = self.params.get('matchtitle', False)
 849                 if matchtitle:
 850                     if not re.search(matchtitle, title, re.IGNORECASE):
 851                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 852                 rejecttitle = self.params.get('rejecttitle', False)
 853                 if rejecttitle:
 854                     if re.search(rejecttitle, title, re.IGNORECASE):
 855                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 856             date = info_dict.get('upload_date')
 857             if date is not None:
 858                 dateRange = self.params.get('daterange', DateRange())
 859                 if date not in dateRange:
 860                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 861             view_count = info_dict.get('view_count')
 862             if view_count is not None:
 863                 min_views = self.params.get('min_views')
 864                 if min_views is not None and view_count < min_views:
 865                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 866                 max_views = self.params.get('max_views')
 867                 if max_views is not None and view_count > max_views:
 868                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 869             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 870                 return 'Skipping "%s" because it is age restricted' % video_title
 871             if self.in_download_archive(info_dict):
 872                 return '%s has already been recorded in archive' % video_title
 873
 874             if not incomplete:
 875                 match_filter = self.params.get('match_filter')
 876                 if match_filter is not None:
 877                     ret = match_filter(info_dict)
 878                     if ret is not None:
 879                         return ret
 880             return None
 881
 882         reason = check_filter()
 883         if reason is not None:
 884             self.to_screen('[download] ' + reason)
 885             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 886                 raise ExistingVideoReached()
 887             elif self.params.get('break_on_reject', False):
 888                 raise RejectedVideoReached()
 889         return reason
 890
 891     @staticmethod
 892     def add_extra_info(info_dict, extra_info):
 893         '''Set the keys from extra_info in info dict if they are missing'''
 894         for key, value in extra_info.items():
 895             info_dict.setdefault(key, value)
 896
 897     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 898                      process=True, force_generic_extractor=False):
 899         '''
 900         Returns a list with a dictionary for each video we find.
 901         If 'download', also downloads the videos.
 902         extra_info is a dict containing the extra values to add to each result
 903         '''
 904
 905         if not ie_key and force_generic_extractor:
 906             ie_key = 'Generic'
 907
 908         if ie_key:
 909             ies = [self.get_info_extractor(ie_key)]
 910         else:
 911             ies = self._ies
 912
 913         for ie in ies:
 914             if not ie.suitable(url):
 915                 continue
 916
 917             ie_key = ie.ie_key()
 918             ie = self.get_info_extractor(ie_key)
 919             if not ie.working():
 920                 self.report_warning('The program functionality for this site has been marked as broken, '
 921                                     'and will probably not work.')
 922
 923             try:
 924                 temp_id = str_or_none(
 925                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
 926                     else ie._match_id(url))
 927             except (AssertionError, IndexError, AttributeError):
 928                 temp_id = None
 929             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 930                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 931                                ie_key, temp_id))
 932                 break
 933
 934             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 935
 936         else:
 937             self.report_error('no suitable InfoExtractor for URL %s' % url)
 938
 939     def __handle_extraction_exceptions(func):
 940         def wrapper(self, *args, **kwargs):
 941             try:
 942                 return func(self, *args, **kwargs)
 943             except GeoRestrictedError as e:
 944                 msg = e.msg
 945                 if e.countries:
 946                     msg += '\nThis video is available in %s.' % ', '.join(
 947                         map(ISO3166Utils.short2full, e.countries))
 948                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 949                 self.report_error(msg)
 950             except ExtractorError as e:  # An error we somewhat expected
 951                 self.report_error(compat_str(e), e.format_traceback())
 952             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
 953                 raise
 954             except Exception as e:
 955                 if self.params.get('ignoreerrors', False):
 956                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 957                 else:
 958                     raise
 959         return wrapper
 960
 961     @__handle_extraction_exceptions
 962     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 963         ie_result = ie.extract(url)
 964         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 965             return
 966         if isinstance(ie_result, list):
 967             # Backwards compatibility: old IE result format
 968             ie_result = {
 969                 '_type': 'compat_list',
 970                 'entries': ie_result,
 971             }
 972         if info_dict:
 973             if info_dict.get('id'):
 974                 ie_result['id'] = info_dict['id']
 975             if info_dict.get('title'):
 976                 ie_result['title'] = info_dict['title']
 977         self.add_default_extra_info(ie_result, ie, url)
 978         if process:
 979             return self.process_ie_result(ie_result, download, extra_info)
 980         else:
 981             return ie_result
 982
 983     def add_default_extra_info(self, ie_result, ie, url):
 984         self.add_extra_info(ie_result, {
 985             'extractor': ie.IE_NAME,
 986             'webpage_url': url,
 987             'duration_string': (
 988                 formatSeconds(ie_result['duration'], '-')
 989                 if ie_result.get('duration', None) is not None
 990                 else None),
 991             'webpage_url_basename': url_basename(url),
 992             'extractor_key': ie.ie_key(),
 993         })
 994
 995     def process_ie_result(self, ie_result, download=True, extra_info={}):
 996         """
 997         Take the result of the ie(may be modified) and resolve all unresolved
 998         references (URLs, playlist items).
 999
1000         It will also download the videos if 'download'.
1001         Returns the resolved ie_result.
1002         """
1003         result_type = ie_result.get('_type', 'video')
1004
1005         if result_type in ('url', 'url_transparent'):
1006             ie_result['url'] = sanitize_url(ie_result['url'])
1007             extract_flat = self.params.get('extract_flat', False)
1008             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1009                     or extract_flat is True):
1010                 self.__forced_printings(
1011                     ie_result,
1012                     self.prepare_filepath(self.prepare_filename(ie_result)),
1013                     incomplete=True)
1014                 return ie_result
1015
1016         if result_type == 'video':
1017             self.add_extra_info(ie_result, extra_info)
1018             return self.process_video_result(ie_result, download=download)
1019         elif result_type == 'url':
1020             # We have to add extra_info to the results because it may be
1021             # contained in a playlist
1022             return self.extract_info(ie_result['url'],
1023                                      download, info_dict=ie_result,
1024                                      ie_key=ie_result.get('ie_key'),
1025                                      extra_info=extra_info)
1026         elif result_type == 'url_transparent':
1027             # Use the information from the embedding page
1028             info = self.extract_info(
1029                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1030                 extra_info=extra_info, download=False, process=False)
1031
1032             # extract_info may return None when ignoreerrors is enabled and
1033             # extraction failed with an error, don't crash and return early
1034             # in this case
1035             if not info:
1036                 return info
1037
1038             force_properties = dict(
1039                 (k, v) for k, v in ie_result.items() if v is not None)
1040             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1041                 if f in force_properties:
1042                     del force_properties[f]
1043             new_result = info.copy()
1044             new_result.update(force_properties)
1045
1046             # Extracted info may not be a video result (i.e.
1047             # info.get('_type', 'video') != video) but rather an url or
1048             # url_transparent. In such cases outer metadata (from ie_result)
1049             # should be propagated to inner one (info). For this to happen
1050             # _type of info should be overridden with url_transparent. This
1051             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1052             if new_result.get('_type') == 'url':
1053                 new_result['_type'] = 'url_transparent'
1054
1055             return self.process_ie_result(
1056                 new_result, download=download, extra_info=extra_info)
1057         elif result_type in ('playlist', 'multi_video'):
1058             # Protect from infinite recursion due to recursively nested playlists
1059             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1060             webpage_url = ie_result['webpage_url']
1061             if webpage_url in self._playlist_urls:
1062                 self.to_screen(
1063                     '[download] Skipping already downloaded playlist: %s'
1064                     % ie_result.get('title') or ie_result.get('id'))
1065                 return
1066
1067             self._playlist_level += 1
1068             self._playlist_urls.add(webpage_url)
1069             try:
1070                 return self.__process_playlist(ie_result, download)
1071             finally:
1072                 self._playlist_level -= 1
1073                 if not self._playlist_level:
1074                     self._playlist_urls.clear()
1075         elif result_type == 'compat_list':
1076             self.report_warning(
1077                 'Extractor %s returned a compat_list result. '
1078                 'It needs to be updated.' % ie_result.get('extractor'))
1079
1080             def _fixup(r):
1081                 self.add_extra_info(
1082                     r,
1083                     {
1084                         'extractor': ie_result['extractor'],
1085                         'webpage_url': ie_result['webpage_url'],
1086                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1087                         'extractor_key': ie_result['extractor_key'],
1088                     }
1089                 )
1090                 return r
1091             ie_result['entries'] = [
1092                 self.process_ie_result(_fixup(r), download, extra_info)
1093                 for r in ie_result['entries']
1094             ]
1095             return ie_result
1096         else:
1097             raise Exception('Invalid result type: %s' % result_type)
1098
1099     def __process_playlist(self, ie_result, download):
1100         # We process each entry in the playlist
1101         playlist = ie_result.get('title') or ie_result.get('id')
1102         self.to_screen('[download] Downloading playlist: %s' % playlist)
1103
1104         playlist_results = []
1105
1106         playliststart = self.params.get('playliststart', 1) - 1
1107         playlistend = self.params.get('playlistend')
1108         # For backwards compatibility, interpret -1 as whole list
1109         if playlistend == -1:
1110             playlistend = None
1111
1112         playlistitems_str = self.params.get('playlist_items')
1113         playlistitems = None
1114         if playlistitems_str is not None:
1115             def iter_playlistitems(format):
1116                 for string_segment in format.split(','):
1117                     if '-' in string_segment:
1118                         start, end = string_segment.split('-')
1119                         for item in range(int(start), int(end) + 1):
1120                             yield int(item)
1121                     else:
1122                         yield int(string_segment)
1123             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1124
1125         ie_entries = ie_result['entries']
1126
1127         def make_playlistitems_entries(list_ie_entries):
1128             num_entries = len(list_ie_entries)
1129             return [
1130                 list_ie_entries[i - 1] for i in playlistitems
1131                 if -num_entries <= i - 1 < num_entries]
1132
1133         def report_download(num_entries):
1134             self.to_screen(
1135                 '[%s] playlist %s: Downloading %d videos' %
1136                 (ie_result['extractor'], playlist, num_entries))
1137
1138         if isinstance(ie_entries, list):
1139             n_all_entries = len(ie_entries)
1140             if playlistitems:
1141                 entries = make_playlistitems_entries(ie_entries)
1142             else:
1143                 entries = ie_entries[playliststart:playlistend]
1144             n_entries = len(entries)
1145             self.to_screen(
1146                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1147                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1148         elif isinstance(ie_entries, PagedList):
1149             if playlistitems:
1150                 entries = []
1151                 for item in playlistitems:
1152                     entries.extend(ie_entries.getslice(
1153                         item - 1, item
1154                     ))
1155             else:
1156                 entries = ie_entries.getslice(
1157                     playliststart, playlistend)
1158             n_entries = len(entries)
1159             report_download(n_entries)
1160         else:  # iterable
1161             if playlistitems:
1162                 entries = make_playlistitems_entries(list(itertools.islice(
1163                     ie_entries, 0, max(playlistitems))))
1164             else:
1165                 entries = list(itertools.islice(
1166                     ie_entries, playliststart, playlistend))
1167             n_entries = len(entries)
1168             report_download(n_entries)
1169
1170         if self.params.get('playlistreverse', False):
1171             entries = entries[::-1]
1172
1173         if self.params.get('playlistrandom', False):
1174             random.shuffle(entries)
1175
1176         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1177
1178         for i, entry in enumerate(entries, 1):
1179             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1180             # This __x_forwarded_for_ip thing is a bit ugly but requires
1181             # minimal changes
1182             if x_forwarded_for:
1183                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1184             extra = {
1185                 'n_entries': n_entries,
1186                 'playlist': playlist,
1187                 'playlist_id': ie_result.get('id'),
1188                 'playlist_title': ie_result.get('title'),
1189                 'playlist_uploader': ie_result.get('uploader'),
1190                 'playlist_uploader_id': ie_result.get('uploader_id'),
1191                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1192                 'extractor': ie_result['extractor'],
1193                 'webpage_url': ie_result['webpage_url'],
1194                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1195                 'extractor_key': ie_result['extractor_key'],
1196             }
1197
1198             if self._match_entry(entry, incomplete=True) is not None:
1199                 continue
1200
1201             entry_result = self.__process_iterable_entry(entry, download, extra)
1202             # TODO: skip failed (empty) entries?
1203             playlist_results.append(entry_result)
1204         ie_result['entries'] = playlist_results
1205         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1206         return ie_result
1207
1208     @__handle_extraction_exceptions
1209     def __process_iterable_entry(self, entry, download, extra_info):
1210         return self.process_ie_result(
1211             entry, download=download, extra_info=extra_info)
1212
1213     def _build_format_filter(self, filter_spec):
1214         " Returns a function to filter the formats according to the filter_spec "
1215
1216         OPERATORS = {
1217             '<': operator.lt,
1218             '<=': operator.le,
1219             '>': operator.gt,
1220             '>=': operator.ge,
1221             '=': operator.eq,
1222             '!=': operator.ne,
1223         }
1224         operator_rex = re.compile(r'''(?x)\s*
1225             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1226             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1227             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1228             $
1229             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1230         m = operator_rex.search(filter_spec)
1231         if m:
1232             try:
1233                 comparison_value = int(m.group('value'))
1234             except ValueError:
1235                 comparison_value = parse_filesize(m.group('value'))
1236                 if comparison_value is None:
1237                     comparison_value = parse_filesize(m.group('value') + 'B')
1238                 if comparison_value is None:
1239                     raise ValueError(
1240                         'Invalid value %r in format specification %r' % (
1241                             m.group('value'), filter_spec))
1242             op = OPERATORS[m.group('op')]
1243
1244         if not m:
1245             STR_OPERATORS = {
1246                 '=': operator.eq,
1247                 '^=': lambda attr, value: attr.startswith(value),
1248                 '$=': lambda attr, value: attr.endswith(value),
1249                 '*=': lambda attr, value: value in attr,
1250             }
1251             str_operator_rex = re.compile(r'''(?x)
1252                 \s*(?P<key>[a-zA-Z0-9._-]+)
1253                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1254                 \s*(?P<value>[a-zA-Z0-9._-]+)
1255                 \s*$
1256                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1257             m = str_operator_rex.search(filter_spec)
1258             if m:
1259                 comparison_value = m.group('value')
1260                 str_op = STR_OPERATORS[m.group('op')]
1261                 if m.group('negation'):
1262                     op = lambda attr, value: not str_op(attr, value)
1263                 else:
1264                     op = str_op
1265
1266         if not m:
1267             raise ValueError('Invalid filter specification %r' % filter_spec)
1268
1269         def _filter(f):
1270             actual_value = f.get(m.group('key'))
1271             if actual_value is None:
1272                 return m.group('none_inclusive')
1273             return op(actual_value, comparison_value)
1274         return _filter
1275
1276     def _default_format_spec(self, info_dict, download=True):
1277
1278         def can_merge():
1279             merger = FFmpegMergerPP(self)
1280             return merger.available and merger.can_merge()
1281
1282         prefer_best = (
1283             not self.params.get('simulate', False)
1284             and download
1285             and (
1286                 not can_merge()
1287                 or info_dict.get('is_live', False)
1288                 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1289
1290         return (
1291             'best/bestvideo+bestaudio'
1292             if prefer_best
1293             else 'bestvideo*+bestaudio/best'
1294             if not self.params.get('allow_multiple_audio_streams', False)
1295             else 'bestvideo+bestaudio/best')
1296
1297     def build_format_selector(self, format_spec):
1298         def syntax_error(note, start):
1299             message = (
1300                 'Invalid format specification: '
1301                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1302             return SyntaxError(message)
1303
1304         PICKFIRST = 'PICKFIRST'
1305         MERGE = 'MERGE'
1306         SINGLE = 'SINGLE'
1307         GROUP = 'GROUP'
1308         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1309
1310         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1311                                   'video': self.params.get('allow_multiple_video_streams', False)}
1312
1313         def _parse_filter(tokens):
1314             filter_parts = []
1315             for type, string, start, _, _ in tokens:
1316                 if type == tokenize.OP and string == ']':
1317                     return ''.join(filter_parts)
1318                 else:
1319                     filter_parts.append(string)
1320
1321         def _remove_unused_ops(tokens):
1322             # Remove operators that we don't use and join them with the surrounding strings
1323             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1324             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1325             last_string, last_start, last_end, last_line = None, None, None, None
1326             for type, string, start, end, line in tokens:
1327                 if type == tokenize.OP and string == '[':
1328                     if last_string:
1329                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1330                         last_string = None
1331                     yield type, string, start, end, line
1332                     # everything inside brackets will be handled by _parse_filter
1333                     for type, string, start, end, line in tokens:
1334                         yield type, string, start, end, line
1335                         if type == tokenize.OP and string == ']':
1336                             break
1337                 elif type == tokenize.OP and string in ALLOWED_OPS:
1338                     if last_string:
1339                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1340                         last_string = None
1341                     yield type, string, start, end, line
1342                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1343                     if not last_string:
1344                         last_string = string
1345                         last_start = start
1346                         last_end = end
1347                     else:
1348                         last_string += string
1349             if last_string:
1350                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1351
1352         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1353             selectors = []
1354             current_selector = None
1355             for type, string, start, _, _ in tokens:
1356                 # ENCODING is only defined in python 3.x
1357                 if type == getattr(tokenize, 'ENCODING', None):
1358                     continue
1359                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1360                     current_selector = FormatSelector(SINGLE, string, [])
1361                 elif type == tokenize.OP:
1362                     if string == ')':
1363                         if not inside_group:
1364                             # ')' will be handled by the parentheses group
1365                             tokens.restore_last_token()
1366                         break
1367                     elif inside_merge and string in ['/', ',']:
1368                         tokens.restore_last_token()
1369                         break
1370                     elif inside_choice and string == ',':
1371                         tokens.restore_last_token()
1372                         break
1373                     elif string == ',':
1374                         if not current_selector:
1375                             raise syntax_error('"," must follow a format selector', start)
1376                         selectors.append(current_selector)
1377                         current_selector = None
1378                     elif string == '/':
1379                         if not current_selector:
1380                             raise syntax_error('"/" must follow a format selector', start)
1381                         first_choice = current_selector
1382                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1383                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1384                     elif string == '[':
1385                         if not current_selector:
1386                             current_selector = FormatSelector(SINGLE, 'best', [])
1387                         format_filter = _parse_filter(tokens)
1388                         current_selector.filters.append(format_filter)
1389                     elif string == '(':
1390                         if current_selector:
1391                             raise syntax_error('Unexpected "("', start)
1392                         group = _parse_format_selection(tokens, inside_group=True)
1393                         current_selector = FormatSelector(GROUP, group, [])
1394                     elif string == '+':
1395                         if not current_selector:
1396                             raise syntax_error('Unexpected "+"', start)
1397                         selector_1 = current_selector
1398                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1399                         if not selector_2:
1400                             raise syntax_error('Expected a selector', start)
1401                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1402                     else:
1403                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1404                 elif type == tokenize.ENDMARKER:
1405                     break
1406             if current_selector:
1407                 selectors.append(current_selector)
1408             return selectors
1409
1410         def _build_selector_function(selector):
1411             if isinstance(selector, list):  # ,
1412                 fs = [_build_selector_function(s) for s in selector]
1413
1414                 def selector_function(ctx):
1415                     for f in fs:
1416                         for format in f(ctx):
1417                             yield format
1418                 return selector_function
1419
1420             elif selector.type == GROUP:  # ()
1421                 selector_function = _build_selector_function(selector.selector)
1422
1423             elif selector.type == PICKFIRST:  # /
1424                 fs = [_build_selector_function(s) for s in selector.selector]
1425
1426                 def selector_function(ctx):
1427                     for f in fs:
1428                         picked_formats = list(f(ctx))
1429                         if picked_formats:
1430                             return picked_formats
1431                     return []
1432
1433             elif selector.type == SINGLE:  # atom
1434                 format_spec = selector.selector if selector.selector is not None else 'best'
1435
1436                 if format_spec == 'all':
1437                     def selector_function(ctx):
1438                         formats = list(ctx['formats'])
1439                         if formats:
1440                             for f in formats:
1441                                 yield f
1442
1443                 else:
1444                     format_fallback = False
1445                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1446                     if format_spec_obj is not None:
1447                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1448                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1449                         not_format_type = 'v' if format_type == 'a' else 'a'
1450                         format_modified = format_spec_obj.group(3) is not None
1451
1452                         format_fallback = not format_type and not format_modified  # for b, w
1453                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1454                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1455                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1456                                     if format_type  # bv, ba, wv, wa
1457                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1458                                     if not format_modified  # b, w
1459                                     else None)  # b*, w*
1460                     else:
1461                         format_idx = -1
1462                         filter_f = ((lambda f: f.get('ext') == format_spec)
1463                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1464                                     else (lambda f: f.get('format_id') == format_spec))  # id
1465
1466                     def selector_function(ctx):
1467                         formats = list(ctx['formats'])
1468                         if not formats:
1469                             return
1470                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1471                         if matches:
1472                             yield matches[format_idx]
1473                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1474                             # for extractors with incomplete formats (audio only (soundcloud)
1475                             # or video only (imgur)) best/worst will fallback to
1476                             # best/worst {video,audio}-only format
1477                             yield formats[format_idx]
1478
1479             elif selector.type == MERGE:        # +
1480                 def _merge(formats_pair):
1481                     format_1, format_2 = formats_pair
1482
1483                     formats_info = []
1484                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1485                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1486
1487                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1488                         get_no_more = {"video": False, "audio": False}
1489                         for (i, fmt_info) in enumerate(formats_info):
1490                             for aud_vid in ["audio", "video"]:
1491                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1492                                     if get_no_more[aud_vid]:
1493                                         formats_info.pop(i)
1494                                     get_no_more[aud_vid] = True
1495
1496                     if len(formats_info) == 1:
1497                         return formats_info[0]
1498
1499                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1500                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1501
1502                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1503                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1504
1505                     output_ext = self.params.get('merge_output_format')
1506                     if not output_ext:
1507                         if the_only_video:
1508                             output_ext = the_only_video['ext']
1509                         elif the_only_audio and not video_fmts:
1510                             output_ext = the_only_audio['ext']
1511                         else:
1512                             output_ext = 'mkv'
1513
1514                     new_dict = {
1515                         'requested_formats': formats_info,
1516                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1517                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1518                         'ext': output_ext,
1519                     }
1520
1521                     if the_only_video:
1522                         new_dict.update({
1523                             'width': the_only_video.get('width'),
1524                             'height': the_only_video.get('height'),
1525                             'resolution': the_only_video.get('resolution'),
1526                             'fps': the_only_video.get('fps'),
1527                             'vcodec': the_only_video.get('vcodec'),
1528                             'vbr': the_only_video.get('vbr'),
1529                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1530                         })
1531
1532                     if the_only_audio:
1533                         new_dict.update({
1534                             'acodec': the_only_audio.get('acodec'),
1535                             'abr': the_only_audio.get('abr'),
1536                         })
1537
1538                     return new_dict
1539
1540                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1541
1542                 def selector_function(ctx):
1543                     for pair in itertools.product(
1544                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1545                         yield _merge(pair)
1546
1547             filters = [self._build_format_filter(f) for f in selector.filters]
1548
1549             def final_selector(ctx):
1550                 ctx_copy = copy.deepcopy(ctx)
1551                 for _filter in filters:
1552                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1553                 return selector_function(ctx_copy)
1554             return final_selector
1555
1556         stream = io.BytesIO(format_spec.encode('utf-8'))
1557         try:
1558             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1559         except tokenize.TokenError:
1560             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1561
1562         class TokenIterator(object):
1563             def __init__(self, tokens):
1564                 self.tokens = tokens
1565                 self.counter = 0
1566
1567             def __iter__(self):
1568                 return self
1569
1570             def __next__(self):
1571                 if self.counter >= len(self.tokens):
1572                     raise StopIteration()
1573                 value = self.tokens[self.counter]
1574                 self.counter += 1
1575                 return value
1576
1577             next = __next__
1578
1579             def restore_last_token(self):
1580                 self.counter -= 1
1581
1582         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1583         return _build_selector_function(parsed_selector)
1584
1585     def _calc_headers(self, info_dict):
1586         res = std_headers.copy()
1587
1588         add_headers = info_dict.get('http_headers')
1589         if add_headers:
1590             res.update(add_headers)
1591
1592         cookies = self._calc_cookies(info_dict)
1593         if cookies:
1594             res['Cookie'] = cookies
1595
1596         if 'X-Forwarded-For' not in res:
1597             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1598             if x_forwarded_for_ip:
1599                 res['X-Forwarded-For'] = x_forwarded_for_ip
1600
1601         return res
1602
1603     def _calc_cookies(self, info_dict):
1604         pr = sanitized_Request(info_dict['url'])
1605         self.cookiejar.add_cookie_header(pr)
1606         return pr.get_header('Cookie')
1607
1608     def process_video_result(self, info_dict, download=True):
1609         assert info_dict.get('_type', 'video') == 'video'
1610
1611         if 'id' not in info_dict:
1612             raise ExtractorError('Missing "id" field in extractor result')
1613         if 'title' not in info_dict:
1614             raise ExtractorError('Missing "title" field in extractor result')
1615
1616         def report_force_conversion(field, field_not, conversion):
1617             self.report_warning(
1618                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1619                 % (field, field_not, conversion))
1620
1621         def sanitize_string_field(info, string_field):
1622             field = info.get(string_field)
1623             if field is None or isinstance(field, compat_str):
1624                 return
1625             report_force_conversion(string_field, 'a string', 'string')
1626             info[string_field] = compat_str(field)
1627
1628         def sanitize_numeric_fields(info):
1629             for numeric_field in self._NUMERIC_FIELDS:
1630                 field = info.get(numeric_field)
1631                 if field is None or isinstance(field, compat_numeric_types):
1632                     continue
1633                 report_force_conversion(numeric_field, 'numeric', 'int')
1634                 info[numeric_field] = int_or_none(field)
1635
1636         sanitize_string_field(info_dict, 'id')
1637         sanitize_numeric_fields(info_dict)
1638
1639         if 'playlist' not in info_dict:
1640             # It isn't part of a playlist
1641             info_dict['playlist'] = None
1642             info_dict['playlist_index'] = None
1643
1644         thumbnails = info_dict.get('thumbnails')
1645         if thumbnails is None:
1646             thumbnail = info_dict.get('thumbnail')
1647             if thumbnail:
1648                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1649         if thumbnails:
1650             thumbnails.sort(key=lambda t: (
1651                 t.get('preference') if t.get('preference') is not None else -1,
1652                 t.get('width') if t.get('width') is not None else -1,
1653                 t.get('height') if t.get('height') is not None else -1,
1654                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1655             for i, t in enumerate(thumbnails):
1656                 t['url'] = sanitize_url(t['url'])
1657                 if t.get('width') and t.get('height'):
1658                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1659                 if t.get('id') is None:
1660                     t['id'] = '%d' % i
1661
1662         if self.params.get('list_thumbnails'):
1663             self.list_thumbnails(info_dict)
1664             return
1665
1666         thumbnail = info_dict.get('thumbnail')
1667         if thumbnail:
1668             info_dict['thumbnail'] = sanitize_url(thumbnail)
1669         elif thumbnails:
1670             info_dict['thumbnail'] = thumbnails[-1]['url']
1671
1672         if 'display_id' not in info_dict and 'id' in info_dict:
1673             info_dict['display_id'] = info_dict['id']
1674
1675         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1676             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1677             # see http://bugs.python.org/issue1646728)
1678             try:
1679                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1680                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1681             except (ValueError, OverflowError, OSError):
1682                 pass
1683
1684         # Auto generate title fields corresponding to the *_number fields when missing
1685         # in order to always have clean titles. This is very common for TV series.
1686         for field in ('chapter', 'season', 'episode'):
1687             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1688                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1689
1690         for cc_kind in ('subtitles', 'automatic_captions'):
1691             cc = info_dict.get(cc_kind)
1692             if cc:
1693                 for _, subtitle in cc.items():
1694                     for subtitle_format in subtitle:
1695                         if subtitle_format.get('url'):
1696                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1697                         if subtitle_format.get('ext') is None:
1698                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1699
1700         automatic_captions = info_dict.get('automatic_captions')
1701         subtitles = info_dict.get('subtitles')
1702
1703         if self.params.get('listsubtitles', False):
1704             if 'automatic_captions' in info_dict:
1705                 self.list_subtitles(
1706                     info_dict['id'], automatic_captions, 'automatic captions')
1707             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1708             return
1709
1710         info_dict['requested_subtitles'] = self.process_subtitles(
1711             info_dict['id'], subtitles, automatic_captions)
1712
1713         # We now pick which formats have to be downloaded
1714         if info_dict.get('formats') is None:
1715             # There's only one format available
1716             formats = [info_dict]
1717         else:
1718             formats = info_dict['formats']
1719
1720         if not formats:
1721             raise ExtractorError('No video formats found!')
1722
1723         def is_wellformed(f):
1724             url = f.get('url')
1725             if not url:
1726                 self.report_warning(
1727                     '"url" field is missing or empty - skipping format, '
1728                     'there is an error in extractor')
1729                 return False
1730             if isinstance(url, bytes):
1731                 sanitize_string_field(f, 'url')
1732             return True
1733
1734         # Filter out malformed formats for better extraction robustness
1735         formats = list(filter(is_wellformed, formats))
1736
1737         formats_dict = {}
1738
1739         # We check that all the formats have the format and format_id fields
1740         for i, format in enumerate(formats):
1741             sanitize_string_field(format, 'format_id')
1742             sanitize_numeric_fields(format)
1743             format['url'] = sanitize_url(format['url'])
1744             if not format.get('format_id'):
1745                 format['format_id'] = compat_str(i)
1746             else:
1747                 # Sanitize format_id from characters used in format selector expression
1748                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1749             format_id = format['format_id']
1750             if format_id not in formats_dict:
1751                 formats_dict[format_id] = []
1752             formats_dict[format_id].append(format)
1753
1754         # Make sure all formats have unique format_id
1755         for format_id, ambiguous_formats in formats_dict.items():
1756             if len(ambiguous_formats) > 1:
1757                 for i, format in enumerate(ambiguous_formats):
1758                     format['format_id'] = '%s-%d' % (format_id, i)
1759
1760         for i, format in enumerate(formats):
1761             if format.get('format') is None:
1762                 format['format'] = '{id} - {res}{note}'.format(
1763                     id=format['format_id'],
1764                     res=self.format_resolution(format),
1765                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1766                 )
1767             # Automatically determine file extension if missing
1768             if format.get('ext') is None:
1769                 format['ext'] = determine_ext(format['url']).lower()
1770             # Automatically determine protocol if missing (useful for format
1771             # selection purposes)
1772             if format.get('protocol') is None:
1773                 format['protocol'] = determine_protocol(format)
1774             # Add HTTP headers, so that external programs can use them from the
1775             # json output
1776             full_format_info = info_dict.copy()
1777             full_format_info.update(format)
1778             format['http_headers'] = self._calc_headers(full_format_info)
1779         # Remove private housekeeping stuff
1780         if '__x_forwarded_for_ip' in info_dict:
1781             del info_dict['__x_forwarded_for_ip']
1782
1783         # TODO Central sorting goes here
1784
1785         if formats[0] is not info_dict:
1786             # only set the 'formats' fields if the original info_dict list them
1787             # otherwise we end up with a circular reference, the first (and unique)
1788             # element in the 'formats' field in info_dict is info_dict itself,
1789             # which can't be exported to json
1790             info_dict['formats'] = formats
1791         if self.params.get('listformats'):
1792             self.list_formats(info_dict)
1793             return
1794
1795         req_format = self.params.get('format')
1796         if req_format is None:
1797             req_format = self._default_format_spec(info_dict, download=download)
1798             if self.params.get('verbose'):
1799                 self._write_string('[debug] Default format spec: %s\n' % req_format)
1800
1801         format_selector = self.build_format_selector(req_format)
1802
1803         # While in format selection we may need to have an access to the original
1804         # format set in order to calculate some metrics or do some processing.
1805         # For now we need to be able to guess whether original formats provided
1806         # by extractor are incomplete or not (i.e. whether extractor provides only
1807         # video-only or audio-only formats) for proper formats selection for
1808         # extractors with such incomplete formats (see
1809         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1810         # Since formats may be filtered during format selection and may not match
1811         # the original formats the results may be incorrect. Thus original formats
1812         # or pre-calculated metrics should be passed to format selection routines
1813         # as well.
1814         # We will pass a context object containing all necessary additional data
1815         # instead of just formats.
1816         # This fixes incorrect format selection issue (see
1817         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1818         incomplete_formats = (
1819             # All formats are video-only or
1820             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1821             # all formats are audio-only
1822             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1823
1824         ctx = {
1825             'formats': formats,
1826             'incomplete_formats': incomplete_formats,
1827         }
1828
1829         formats_to_download = list(format_selector(ctx))
1830         if not formats_to_download:
1831             raise ExtractorError('requested format not available',
1832                                  expected=True)
1833
1834         if download:
1835             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1836             if len(formats_to_download) > 1:
1837                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1838             for format in formats_to_download:
1839                 new_info = dict(info_dict)
1840                 new_info.update(format)
1841                 self.process_info(new_info)
1842         # We update the info dict with the best quality format (backwards compatibility)
1843         info_dict.update(formats_to_download[-1])
1844         return info_dict
1845
1846     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1847         """Select the requested subtitles and their format"""
1848         available_subs = {}
1849         if normal_subtitles and self.params.get('writesubtitles'):
1850             available_subs.update(normal_subtitles)
1851         if automatic_captions and self.params.get('writeautomaticsub'):
1852             for lang, cap_info in automatic_captions.items():
1853                 if lang not in available_subs:
1854                     available_subs[lang] = cap_info
1855
1856         if (not self.params.get('writesubtitles') and not
1857                 self.params.get('writeautomaticsub') or not
1858                 available_subs):
1859             return None
1860
1861         if self.params.get('allsubtitles', False):
1862             requested_langs = available_subs.keys()
1863         else:
1864             if self.params.get('subtitleslangs', False):
1865                 requested_langs = self.params.get('subtitleslangs')
1866             elif 'en' in available_subs:
1867                 requested_langs = ['en']
1868             else:
1869                 requested_langs = [list(available_subs.keys())[0]]
1870
1871         formats_query = self.params.get('subtitlesformat', 'best')
1872         formats_preference = formats_query.split('/') if formats_query else []
1873         subs = {}
1874         for lang in requested_langs:
1875             formats = available_subs.get(lang)
1876             if formats is None:
1877                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1878                 continue
1879             for ext in formats_preference:
1880                 if ext == 'best':
1881                     f = formats[-1]
1882                     break
1883                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1884                 if matches:
1885                     f = matches[-1]
1886                     break
1887             else:
1888                 f = formats[-1]
1889                 self.report_warning(
1890                     'No subtitle format found matching "%s" for language %s, '
1891                     'using %s' % (formats_query, lang, f['ext']))
1892             subs[lang] = f
1893         return subs
1894
1895     def __forced_printings(self, info_dict, filename, incomplete):
1896         def print_mandatory(field):
1897             if (self.params.get('force%s' % field, False)
1898                     and (not incomplete or info_dict.get(field) is not None)):
1899                 self.to_stdout(info_dict[field])
1900
1901         def print_optional(field):
1902             if (self.params.get('force%s' % field, False)
1903                     and info_dict.get(field) is not None):
1904                 self.to_stdout(info_dict[field])
1905
1906         print_mandatory('title')
1907         print_mandatory('id')
1908         if self.params.get('forceurl', False) and not incomplete:
1909             if info_dict.get('requested_formats') is not None:
1910                 for f in info_dict['requested_formats']:
1911                     self.to_stdout(f['url'] + f.get('play_path', ''))
1912             else:
1913                 # For RTMP URLs, also include the playpath
1914                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1915         print_optional('thumbnail')
1916         print_optional('description')
1917         if self.params.get('forcefilename', False) and filename is not None:
1918             self.to_stdout(filename)
1919         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1920             self.to_stdout(formatSeconds(info_dict['duration']))
1921         print_mandatory('format')
1922         if self.params.get('forcejson', False):
1923             self.to_stdout(json.dumps(info_dict))
1924
1925     def process_info(self, info_dict):
1926         """Process a single resolved IE result."""
1927
1928         assert info_dict.get('_type', 'video') == 'video'
1929
1930         info_dict.setdefault('__postprocessors', [])
1931
1932         max_downloads = self.params.get('max_downloads')
1933         if max_downloads is not None:
1934             if self._num_downloads >= int(max_downloads):
1935                 raise MaxDownloadsReached()
1936
1937         # TODO: backward compatibility, to be removed
1938         info_dict['fulltitle'] = info_dict['title']
1939
1940         if 'format' not in info_dict:
1941             info_dict['format'] = info_dict['ext']
1942
1943         if self._match_entry(info_dict, incomplete=False) is not None:
1944             return
1945
1946         self._num_downloads += 1
1947
1948         filename = self.prepare_filename(info_dict, warn=True)
1949         info_dict['_filename'] = full_filename = self.prepare_filepath(filename)
1950         temp_filename = self.prepare_filepath(filename, 'temp')
1951         files_to_move = {}
1952
1953         # Forced printings
1954         self.__forced_printings(info_dict, full_filename, incomplete=False)
1955
1956         if self.params.get('simulate', False):
1957             if self.params.get('force_write_download_archive', False):
1958                 self.record_download_archive(info_dict)
1959
1960             # Do nothing else if in simulate mode
1961             return
1962
1963         if filename is None:
1964             return
1965
1966         def ensure_dir_exists(path):
1967             return make_dir(path, self.report_error)
1968
1969         if not ensure_dir_exists(encodeFilename(full_filename)):
1970             return
1971         if not ensure_dir_exists(encodeFilename(temp_filename)):
1972             return
1973
1974         if self.params.get('writedescription', False):
1975             descfn = replace_extension(
1976                 self.prepare_filepath(filename, 'description'),
1977                 'description', info_dict.get('ext'))
1978             if not ensure_dir_exists(encodeFilename(descfn)):
1979                 return
1980             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1981                 self.to_screen('[info] Video description is already present')
1982             elif info_dict.get('description') is None:
1983                 self.report_warning('There\'s no description to write.')
1984             else:
1985                 try:
1986                     self.to_screen('[info] Writing video description to: ' + descfn)
1987                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1988                         descfile.write(info_dict['description'])
1989                 except (OSError, IOError):
1990                     self.report_error('Cannot write description file ' + descfn)
1991                     return
1992
1993         if self.params.get('writeannotations', False):
1994             annofn = replace_extension(
1995                 self.prepare_filepath(filename, 'annotation'),
1996                 'annotations.xml', info_dict.get('ext'))
1997             if not ensure_dir_exists(encodeFilename(annofn)):
1998                 return
1999             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2000                 self.to_screen('[info] Video annotations are already present')
2001             elif not info_dict.get('annotations'):
2002                 self.report_warning('There are no annotations to write.')
2003             else:
2004                 try:
2005                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2006                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2007                         annofile.write(info_dict['annotations'])
2008                 except (KeyError, TypeError):
2009                     self.report_warning('There are no annotations to write.')
2010                 except (OSError, IOError):
2011                     self.report_error('Cannot write annotations file: ' + annofn)
2012                     return
2013
2014         def dl(name, info, subtitle=False):
2015             fd = get_suitable_downloader(info, self.params)(self, self.params)
2016             for ph in self._progress_hooks:
2017                 fd.add_progress_hook(ph)
2018             if self.params.get('verbose'):
2019                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2020             return fd.download(name, info, subtitle)
2021
2022         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2023                                        self.params.get('writeautomaticsub')])
2024
2025         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2026             # subtitles download errors are already managed as troubles in relevant IE
2027             # that way it will silently go on when used with unsupporting IE
2028             subtitles = info_dict['requested_subtitles']
2029             # ie = self.get_info_extractor(info_dict['extractor_key'])
2030             for sub_lang, sub_info in subtitles.items():
2031                 sub_format = sub_info['ext']
2032                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2033                 sub_filename_final = subtitles_filename(
2034                     self.prepare_filepath(filename, 'subtitle'),
2035                     sub_lang, sub_format, info_dict.get('ext'))
2036                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2037                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2038                     files_to_move[sub_filename] = sub_filename_final
2039                 else:
2040                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2041                     if sub_info.get('data') is not None:
2042                         try:
2043                             # Use newline='' to prevent conversion of newline characters
2044                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2045                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2046                                 subfile.write(sub_info['data'])
2047                             files_to_move[sub_filename] = sub_filename_final
2048                         except (OSError, IOError):
2049                             self.report_error('Cannot write subtitles file ' + sub_filename)
2050                             return
2051                     else:
2052                         try:
2053                             dl(sub_filename, sub_info, subtitle=True)
2054                             '''
2055                             if self.params.get('sleep_interval_subtitles', False):
2056                                 dl(sub_filename, sub_info)
2057                             else:
2058                                 sub_data = ie._request_webpage(
2059                                     sub_info['url'], info_dict['id'], note=False).read()
2060                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2061                                     subfile.write(sub_data)
2062                             '''
2063                             files_to_move[sub_filename] = sub_filename_final
2064                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2065                             self.report_warning('Unable to download subtitle for "%s": %s' %
2066                                                 (sub_lang, error_to_compat_str(err)))
2067                             continue
2068
2069         if self.params.get('skip_download', False):
2070             if self.params.get('convertsubtitles', False):
2071                 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2072                 filename_real_ext = os.path.splitext(filename)[1][1:]
2073                 filename_wo_ext = (
2074                     os.path.splitext(full_filename)[0]
2075                     if filename_real_ext == info_dict['ext']
2076                     else full_filename)
2077                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2078                 # if subconv.available:
2079                 #     info_dict['__postprocessors'].append(subconv)
2080                 if os.path.exists(encodeFilename(afilename)):
2081                     self.to_screen(
2082                         '[download] %s has already been downloaded and '
2083                         'converted' % afilename)
2084                 else:
2085                     try:
2086                         self.post_process(full_filename, info_dict, files_to_move)
2087                     except (PostProcessingError) as err:
2088                         self.report_error('postprocessing: %s' % str(err))
2089                         return
2090
2091         if self.params.get('writeinfojson', False):
2092             infofn = replace_extension(
2093                 self.prepare_filepath(filename, 'infojson'),
2094                 'info.json', info_dict.get('ext'))
2095             if not ensure_dir_exists(encodeFilename(infofn)):
2096                 return
2097             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2098                 self.to_screen('[info] Video description metadata is already present')
2099             else:
2100                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
2101                 try:
2102                     write_json_file(self.filter_requested_info(info_dict), infofn)
2103                 except (OSError, IOError):
2104                     self.report_error('Cannot write metadata to JSON file ' + infofn)
2105                     return
2106
2107         thumbdir = os.path.dirname(self.prepare_filepath(filename, 'thumbnail'))
2108         for thumbfn in self._write_thumbnails(info_dict, temp_filename):
2109             files_to_move[thumbfn] = os.path.join(thumbdir, os.path.basename(thumbfn))
2110
2111         # Write internet shortcut files
2112         url_link = webloc_link = desktop_link = False
2113         if self.params.get('writelink', False):
2114             if sys.platform == "darwin":  # macOS.
2115                 webloc_link = True
2116             elif sys.platform.startswith("linux"):
2117                 desktop_link = True
2118             else:  # if sys.platform in ['win32', 'cygwin']:
2119                 url_link = True
2120         if self.params.get('writeurllink', False):
2121             url_link = True
2122         if self.params.get('writewebloclink', False):
2123             webloc_link = True
2124         if self.params.get('writedesktoplink', False):
2125             desktop_link = True
2126
2127         if url_link or webloc_link or desktop_link:
2128             if 'webpage_url' not in info_dict:
2129                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2130                 return
2131             ascii_url = iri_to_uri(info_dict['webpage_url'])
2132
2133         def _write_link_file(extension, template, newline, embed_filename):
2134             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2135             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2136                 self.to_screen('[info] Internet shortcut is already present')
2137             else:
2138                 try:
2139                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2140                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2141                         template_vars = {'url': ascii_url}
2142                         if embed_filename:
2143                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2144                         linkfile.write(template % template_vars)
2145                 except (OSError, IOError):
2146                     self.report_error('Cannot write internet shortcut ' + linkfn)
2147                     return False
2148             return True
2149
2150         if url_link:
2151             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2152                 return
2153         if webloc_link:
2154             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2155                 return
2156         if desktop_link:
2157             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2158                 return
2159
2160         # Download
2161         must_record_download_archive = False
2162         if not self.params.get('skip_download', False):
2163             try:
2164
2165                 def existing_file(filename, temp_filename):
2166                     file_exists = os.path.exists(encodeFilename(filename))
2167                     tempfile_exists = (
2168                         False if temp_filename == filename
2169                         else os.path.exists(encodeFilename(temp_filename)))
2170                     if not self.params.get('overwrites', False) and (file_exists or tempfile_exists):
2171                         existing_filename = temp_filename if tempfile_exists else filename
2172                         self.to_screen('[download] %s has already been downloaded and merged' % existing_filename)
2173                         return existing_filename
2174                     if tempfile_exists:
2175                         self.report_file_delete(temp_filename)
2176                         os.remove(encodeFilename(temp_filename))
2177                     if file_exists:
2178                         self.report_file_delete(filename)
2179                         os.remove(encodeFilename(filename))
2180                     return None
2181
2182                 success = True
2183                 if info_dict.get('requested_formats') is not None:
2184                     downloaded = []
2185                     merger = FFmpegMergerPP(self)
2186                     if not merger.available:
2187                         postprocessors = []
2188                         self.report_warning('You have requested multiple '
2189                                             'formats but ffmpeg or avconv are not installed.'
2190                                             ' The formats won\'t be merged.')
2191                     else:
2192                         postprocessors = [merger]
2193
2194                     def compatible_formats(formats):
2195                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2196                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2197                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2198                         if len(video_formats) > 2 or len(audio_formats) > 2:
2199                             return False
2200
2201                         # Check extension
2202                         exts = set(format.get('ext') for format in formats)
2203                         COMPATIBLE_EXTS = (
2204                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2205                             set(('webm',)),
2206                         )
2207                         for ext_sets in COMPATIBLE_EXTS:
2208                             if ext_sets.issuperset(exts):
2209                                 return True
2210                         # TODO: Check acodec/vcodec
2211                         return False
2212
2213                     requested_formats = info_dict['requested_formats']
2214                     old_ext = info_dict['ext']
2215                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2216                         info_dict['ext'] = 'mkv'
2217                         self.report_warning(
2218                             'Requested formats are incompatible for merge and will be merged into mkv.')
2219
2220                     def correct_ext(filename):
2221                         filename_real_ext = os.path.splitext(filename)[1][1:]
2222                         filename_wo_ext = (
2223                             os.path.splitext(filename)[0]
2224                             if filename_real_ext == old_ext
2225                             else filename)
2226                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2227
2228                     # Ensure filename always has a correct extension for successful merge
2229                     full_filename = correct_ext(full_filename)
2230                     temp_filename = correct_ext(temp_filename)
2231                     dl_filename = existing_file(full_filename, temp_filename)
2232                     if dl_filename is None:
2233                         for f in requested_formats:
2234                             new_info = dict(info_dict)
2235                             new_info.update(f)
2236                             fname = prepend_extension(
2237                                 self.prepare_filepath(self.prepare_filename(new_info), 'temp'),
2238                                 'f%s' % f['format_id'], new_info['ext'])
2239                             if not ensure_dir_exists(fname):
2240                                 return
2241                             downloaded.append(fname)
2242                             partial_success, real_download = dl(fname, new_info)
2243                             success = success and partial_success
2244                         info_dict['__postprocessors'] = postprocessors
2245                         info_dict['__files_to_merge'] = downloaded
2246                         # Even if there were no downloads, it is being merged only now
2247                         info_dict['__real_download'] = True
2248                 else:
2249                     # Just a single file
2250                     dl_filename = existing_file(full_filename, temp_filename)
2251                     if dl_filename is None:
2252                         success, real_download = dl(temp_filename, info_dict)
2253                         info_dict['__real_download'] = real_download
2254
2255                 # info_dict['__temp_filename'] = temp_filename
2256                 dl_filename = dl_filename or temp_filename
2257                 info_dict['__dl_filename'] = dl_filename
2258                 info_dict['__final_filename'] = full_filename
2259
2260             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2261                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2262                 return
2263             except (OSError, IOError) as err:
2264                 raise UnavailableVideoError(err)
2265             except (ContentTooShortError, ) as err:
2266                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2267                 return
2268
2269             if success and filename != '-':
2270                 # Fixup content
2271                 fixup_policy = self.params.get('fixup')
2272                 if fixup_policy is None:
2273                     fixup_policy = 'detect_or_warn'
2274
2275                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2276
2277                 stretched_ratio = info_dict.get('stretched_ratio')
2278                 if stretched_ratio is not None and stretched_ratio != 1:
2279                     if fixup_policy == 'warn':
2280                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2281                             info_dict['id'], stretched_ratio))
2282                     elif fixup_policy == 'detect_or_warn':
2283                         stretched_pp = FFmpegFixupStretchedPP(self)
2284                         if stretched_pp.available:
2285                             info_dict['__postprocessors'].append(stretched_pp)
2286                         else:
2287                             self.report_warning(
2288                                 '%s: Non-uniform pixel ratio (%s). %s'
2289                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2290                     else:
2291                         assert fixup_policy in ('ignore', 'never')
2292
2293                 if (info_dict.get('requested_formats') is None
2294                         and info_dict.get('container') == 'm4a_dash'):
2295                     if fixup_policy == 'warn':
2296                         self.report_warning(
2297                             '%s: writing DASH m4a. '
2298                             'Only some players support this container.'
2299                             % info_dict['id'])
2300                     elif fixup_policy == 'detect_or_warn':
2301                         fixup_pp = FFmpegFixupM4aPP(self)
2302                         if fixup_pp.available:
2303                             info_dict['__postprocessors'].append(fixup_pp)
2304                         else:
2305                             self.report_warning(
2306                                 '%s: writing DASH m4a. '
2307                                 'Only some players support this container. %s'
2308                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2309                     else:
2310                         assert fixup_policy in ('ignore', 'never')
2311
2312                 if (info_dict.get('protocol') == 'm3u8_native'
2313                         or info_dict.get('protocol') == 'm3u8'
2314                         and self.params.get('hls_prefer_native')):
2315                     if fixup_policy == 'warn':
2316                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2317                             info_dict['id']))
2318                     elif fixup_policy == 'detect_or_warn':
2319                         fixup_pp = FFmpegFixupM3u8PP(self)
2320                         if fixup_pp.available:
2321                             info_dict['__postprocessors'].append(fixup_pp)
2322                         else:
2323                             self.report_warning(
2324                                 '%s: malformed AAC bitstream detected. %s'
2325                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2326                     else:
2327                         assert fixup_policy in ('ignore', 'never')
2328
2329                 try:
2330                     self.post_process(dl_filename, info_dict, files_to_move)
2331                 except (PostProcessingError) as err:
2332                     self.report_error('postprocessing: %s' % str(err))
2333                     return
2334                 try:
2335                     for ph in self._post_hooks:
2336                         ph(full_filename)
2337                 except Exception as err:
2338                     self.report_error('post hooks: %s' % str(err))
2339                     return
2340                 must_record_download_archive = True
2341
2342         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2343             self.record_download_archive(info_dict)
2344         max_downloads = self.params.get('max_downloads')
2345         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2346             raise MaxDownloadsReached()
2347
2348     def download(self, url_list):
2349         """Download a given list of URLs."""
2350         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2351         if (len(url_list) > 1
2352                 and outtmpl != '-'
2353                 and '%' not in outtmpl
2354                 and self.params.get('max_downloads') != 1):
2355             raise SameFileError(outtmpl)
2356
2357         for url in url_list:
2358             try:
2359                 # It also downloads the videos
2360                 res = self.extract_info(
2361                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2362             except UnavailableVideoError:
2363                 self.report_error('unable to download video')
2364             except MaxDownloadsReached:
2365                 self.to_screen('[info] Maximum number of downloaded files reached')
2366                 raise
2367             except ExistingVideoReached:
2368                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2369                 raise
2370             except RejectedVideoReached:
2371                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2372                 raise
2373             else:
2374                 if self.params.get('dump_single_json', False):
2375                     self.to_stdout(json.dumps(res))
2376
2377         return self._download_retcode
2378
2379     def download_with_info_file(self, info_filename):
2380         with contextlib.closing(fileinput.FileInput(
2381                 [info_filename], mode='r',
2382                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2383             # FileInput doesn't have a read method, we can't call json.load
2384             info = self.filter_requested_info(json.loads('\n'.join(f)))
2385         try:
2386             self.process_ie_result(info, download=True)
2387         except DownloadError:
2388             webpage_url = info.get('webpage_url')
2389             if webpage_url is not None:
2390                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2391                 return self.download([webpage_url])
2392             else:
2393                 raise
2394         return self._download_retcode
2395
2396     @staticmethod
2397     def filter_requested_info(info_dict):
2398         return dict(
2399             (k, v) for k, v in info_dict.items()
2400             if k not in ['requested_formats', 'requested_subtitles'])
2401
2402     def post_process(self, filename, ie_info, files_to_move={}):
2403         """Run all the postprocessors on the given file."""
2404         info = dict(ie_info)
2405         info['filepath'] = filename
2406
2407         def run_pp(pp):
2408             files_to_delete = []
2409             infodict = info
2410             try:
2411                 files_to_delete, infodict = pp.run(infodict)
2412             except PostProcessingError as e:
2413                 self.report_error(e.msg)
2414             if not files_to_delete:
2415                 return infodict
2416
2417             if self.params.get('keepvideo', False):
2418                 for f in files_to_delete:
2419                     files_to_move.setdefault(f, '')
2420             else:
2421                 for old_filename in set(files_to_delete):
2422                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2423                     try:
2424                         os.remove(encodeFilename(old_filename))
2425                     except (IOError, OSError):
2426                         self.report_warning('Unable to remove downloaded original file')
2427                     if old_filename in files_to_move:
2428                         del files_to_move[old_filename]
2429             return infodict
2430
2431         for pp in ie_info.get('__postprocessors', []) + self._pps:
2432             info = run_pp(pp)
2433         info = run_pp(MoveFilesAfterDownloadPP(self, files_to_move))
2434         files_to_move = {}
2435         for pp in self._pps_end:
2436             info = run_pp(pp)
2437
2438     def _make_archive_id(self, info_dict):
2439         video_id = info_dict.get('id')
2440         if not video_id:
2441             return
2442         # Future-proof against any change in case
2443         # and backwards compatibility with prior versions
2444         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2445         if extractor is None:
2446             url = str_or_none(info_dict.get('url'))
2447             if not url:
2448                 return
2449             # Try to find matching extractor for the URL and take its ie_key
2450             for ie in self._ies:
2451                 if ie.suitable(url):
2452                     extractor = ie.ie_key()
2453                     break
2454             else:
2455                 return
2456         return '%s %s' % (extractor.lower(), video_id)
2457
2458     def in_download_archive(self, info_dict):
2459         fn = self.params.get('download_archive')
2460         if fn is None:
2461             return False
2462
2463         vid_id = self._make_archive_id(info_dict)
2464         if not vid_id:
2465             return False  # Incomplete video information
2466
2467         return vid_id in self.archive
2468
2469     def record_download_archive(self, info_dict):
2470         fn = self.params.get('download_archive')
2471         if fn is None:
2472             return
2473         vid_id = self._make_archive_id(info_dict)
2474         assert vid_id
2475         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2476             archive_file.write(vid_id + '\n')
2477         self.archive.add(vid_id)
2478
2479     @staticmethod
2480     def format_resolution(format, default='unknown'):
2481         if format.get('vcodec') == 'none':
2482             return 'audio only'
2483         if format.get('resolution') is not None:
2484             return format['resolution']
2485         if format.get('height') is not None:
2486             if format.get('width') is not None:
2487                 res = '%sx%s' % (format['width'], format['height'])
2488             else:
2489                 res = '%sp' % format['height']
2490         elif format.get('width') is not None:
2491             res = '%dx?' % format['width']
2492         else:
2493             res = default
2494         return res
2495
2496     def _format_note(self, fdict):
2497         res = ''
2498         if fdict.get('ext') in ['f4f', 'f4m']:
2499             res += '(unsupported) '
2500         if fdict.get('language'):
2501             if res:
2502                 res += ' '
2503             res += '[%s] ' % fdict['language']
2504         if fdict.get('format_note') is not None:
2505             res += fdict['format_note'] + ' '
2506         if fdict.get('tbr') is not None:
2507             res += '%4dk ' % fdict['tbr']
2508         if fdict.get('container') is not None:
2509             if res:
2510                 res += ', '
2511             res += '%s container' % fdict['container']
2512         if (fdict.get('vcodec') is not None
2513                 and fdict.get('vcodec') != 'none'):
2514             if res:
2515                 res += ', '
2516             res += fdict['vcodec']
2517             if fdict.get('vbr') is not None:
2518                 res += '@'
2519         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2520             res += 'video@'
2521         if fdict.get('vbr') is not None:
2522             res += '%4dk' % fdict['vbr']
2523         if fdict.get('fps') is not None:
2524             if res:
2525                 res += ', '
2526             res += '%sfps' % fdict['fps']
2527         if fdict.get('acodec') is not None:
2528             if res:
2529                 res += ', '
2530             if fdict['acodec'] == 'none':
2531                 res += 'video only'
2532             else:
2533                 res += '%-5s' % fdict['acodec']
2534         elif fdict.get('abr') is not None:
2535             if res:
2536                 res += ', '
2537             res += 'audio'
2538         if fdict.get('abr') is not None:
2539             res += '@%3dk' % fdict['abr']
2540         if fdict.get('asr') is not None:
2541             res += ' (%5dHz)' % fdict['asr']
2542         if fdict.get('filesize') is not None:
2543             if res:
2544                 res += ', '
2545             res += format_bytes(fdict['filesize'])
2546         elif fdict.get('filesize_approx') is not None:
2547             if res:
2548                 res += ', '
2549             res += '~' + format_bytes(fdict['filesize_approx'])
2550         return res
2551
2552     def _format_note_table(self, f):
2553         def join_fields(*vargs):
2554             return ', '.join((val for val in vargs if val != ''))
2555
2556         return join_fields(
2557             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2558             format_field(f, 'language', '[%s]'),
2559             format_field(f, 'format_note'),
2560             format_field(f, 'container', ignore=(None, f.get('ext'))),
2561             format_field(f, 'asr', '%5dHz'))
2562
2563     def list_formats(self, info_dict):
2564         formats = info_dict.get('formats', [info_dict])
2565         new_format = self.params.get('listformats_table', False)
2566         if new_format:
2567             table = [
2568                 [
2569                     format_field(f, 'format_id'),
2570                     format_field(f, 'ext'),
2571                     self.format_resolution(f),
2572                     format_field(f, 'fps', '%d'),
2573                     '|',
2574                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2575                     format_field(f, 'tbr', '%4dk'),
2576                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2577                     '|',
2578                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2579                     format_field(f, 'vbr', '%4dk'),
2580                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2581                     format_field(f, 'abr', '%3dk'),
2582                     format_field(f, 'asr', '%5dHz'),
2583                     self._format_note_table(f)]
2584                 for f in formats
2585                 if f.get('preference') is None or f['preference'] >= -1000]
2586             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2587                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2588         else:
2589             table = [
2590                 [
2591                     format_field(f, 'format_id'),
2592                     format_field(f, 'ext'),
2593                     self.format_resolution(f),
2594                     self._format_note(f)]
2595                 for f in formats
2596                 if f.get('preference') is None or f['preference'] >= -1000]
2597             header_line = ['format code', 'extension', 'resolution', 'note']
2598
2599         # if len(formats) > 1:
2600         #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2601         self.to_screen(
2602             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2603                 header_line,
2604                 table,
2605                 delim=new_format,
2606                 extraGap=(0 if new_format else 1),
2607                 hideEmpty=new_format)))
2608
2609     def list_thumbnails(self, info_dict):
2610         thumbnails = info_dict.get('thumbnails')
2611         if not thumbnails:
2612             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2613             return
2614
2615         self.to_screen(
2616             '[info] Thumbnails for %s:' % info_dict['id'])
2617         self.to_screen(render_table(
2618             ['ID', 'width', 'height', 'URL'],
2619             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2620
2621     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2622         if not subtitles:
2623             self.to_screen('%s has no %s' % (video_id, name))
2624             return
2625         self.to_screen(
2626             'Available %s for %s:' % (name, video_id))
2627         self.to_screen(render_table(
2628             ['Language', 'formats'],
2629             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2630                 for lang, formats in subtitles.items()]))
2631
2632     def urlopen(self, req):
2633         """ Start an HTTP download """
2634         if isinstance(req, compat_basestring):
2635             req = sanitized_Request(req)
2636         return self._opener.open(req, timeout=self._socket_timeout)
2637
2638     def print_debug_header(self):
2639         if not self.params.get('verbose'):
2640             return
2641
2642         if type('') is not compat_str:
2643             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2644             self.report_warning(
2645                 'Your Python is broken! Update to a newer and supported version')
2646
2647         stdout_encoding = getattr(
2648             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2649         encoding_str = (
2650             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2651                 locale.getpreferredencoding(),
2652                 sys.getfilesystemencoding(),
2653                 stdout_encoding,
2654                 self.get_encoding()))
2655         write_string(encoding_str, encoding=None)
2656
2657         self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
2658         if _LAZY_LOADER:
2659             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2660         try:
2661             sp = subprocess.Popen(
2662                 ['git', 'rev-parse', '--short', 'HEAD'],
2663                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2664                 cwd=os.path.dirname(os.path.abspath(__file__)))
2665             out, err = process_communicate_or_kill(sp)
2666             out = out.decode().strip()
2667             if re.match('[0-9a-f]+', out):
2668                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2669         except Exception:
2670             try:
2671                 sys.exc_clear()
2672             except Exception:
2673                 pass
2674
2675         def python_implementation():
2676             impl_name = platform.python_implementation()
2677             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2678                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2679             return impl_name
2680
2681         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2682             platform.python_version(), python_implementation(),
2683             platform_name()))
2684
2685         exe_versions = FFmpegPostProcessor.get_versions(self)
2686         exe_versions['rtmpdump'] = rtmpdump_version()
2687         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2688         exe_str = ', '.join(
2689             '%s %s' % (exe, v)
2690             for exe, v in sorted(exe_versions.items())
2691             if v
2692         )
2693         if not exe_str:
2694             exe_str = 'none'
2695         self._write_string('[debug] exe versions: %s\n' % exe_str)
2696
2697         proxy_map = {}
2698         for handler in self._opener.handlers:
2699             if hasattr(handler, 'proxies'):
2700                 proxy_map.update(handler.proxies)
2701         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2702
2703         if self.params.get('call_home', False):
2704             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2705             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2706             return
2707             latest_version = self.urlopen(
2708                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2709             if version_tuple(latest_version) > version_tuple(__version__):
2710                 self.report_warning(
2711                     'You are using an outdated version (newest version: %s)! '
2712                     'See https://yt-dl.org/update if you need help updating.' %
2713                     latest_version)
2714
2715     def _setup_opener(self):
2716         timeout_val = self.params.get('socket_timeout')
2717         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2718
2719         opts_cookiefile = self.params.get('cookiefile')
2720         opts_proxy = self.params.get('proxy')
2721
2722         if opts_cookiefile is None:
2723             self.cookiejar = compat_cookiejar.CookieJar()
2724         else:
2725             opts_cookiefile = expand_path(opts_cookiefile)
2726             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2727             if os.access(opts_cookiefile, os.R_OK):
2728                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2729
2730         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2731         if opts_proxy is not None:
2732             if opts_proxy == '':
2733                 proxies = {}
2734             else:
2735                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2736         else:
2737             proxies = compat_urllib_request.getproxies()
2738             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2739             if 'http' in proxies and 'https' not in proxies:
2740                 proxies['https'] = proxies['http']
2741         proxy_handler = PerRequestProxyHandler(proxies)
2742
2743         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2744         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2745         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2746         redirect_handler = YoutubeDLRedirectHandler()
2747         data_handler = compat_urllib_request_DataHandler()
2748
2749         # When passing our own FileHandler instance, build_opener won't add the
2750         # default FileHandler and allows us to disable the file protocol, which
2751         # can be used for malicious purposes (see
2752         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2753         file_handler = compat_urllib_request.FileHandler()
2754
2755         def file_open(*args, **kwargs):
2756             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2757         file_handler.file_open = file_open
2758
2759         opener = compat_urllib_request.build_opener(
2760             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2761
2762         # Delete the default user-agent header, which would otherwise apply in
2763         # cases where our custom HTTP handler doesn't come into play
2764         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2765         opener.addheaders = []
2766         self._opener = opener
2767
2768     def encode(self, s):
2769         if isinstance(s, bytes):
2770             return s  # Already encoded
2771
2772         try:
2773             return s.encode(self.get_encoding())
2774         except UnicodeEncodeError as err:
2775             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2776             raise
2777
2778     def get_encoding(self):
2779         encoding = self.params.get('encoding')
2780         if encoding is None:
2781             encoding = preferredencoding()
2782         return encoding
2783
2784     def _write_thumbnails(self, info_dict, filename):
2785         if self.params.get('writethumbnail', False):
2786             thumbnails = info_dict.get('thumbnails')
2787             if thumbnails:
2788                 thumbnails = [thumbnails[-1]]
2789         elif self.params.get('write_all_thumbnails', False):
2790             thumbnails = info_dict.get('thumbnails') or []
2791         else:
2792             thumbnails = []
2793
2794         ret = []
2795         for t in thumbnails:
2796             thumb_ext = determine_ext(t['url'], 'jpg')
2797             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2798             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2799             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2800
2801             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2802                 ret.append(thumb_filename)
2803                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2804                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2805             else:
2806                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2807                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2808                 try:
2809                     uf = self.urlopen(t['url'])
2810                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2811                         shutil.copyfileobj(uf, thumbf)
2812                     ret.append(thumb_filename)
2813                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2814                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2815                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2816                     self.report_warning('Unable to download thumbnail "%s": %s' %
2817                                         (t['url'], error_to_compat_str(err)))
2818         return ret