youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_http_client,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     OUTTMPL_TYPES,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     int_or_none,
  72     iri_to_uri,
  73     ISO3166Utils,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     orderedSet,
  79     PagedList,
  80     parse_filesize,
  81     PerRequestProxyHandler,
  82     platform_name,
  83     PostProcessingError,
  84     preferredencoding,
  85     prepend_extension,
  86     register_socks_protocols,
  87     render_table,
  88     replace_extension,
  89     RejectedVideoReached,
  90     SameFileError,
  91     sanitize_filename,
  92     sanitize_path,
  93     sanitize_url,
  94     sanitized_Request,
  95     std_headers,
  96     str_or_none,
  97     strftime_or_none,
  98     subtitles_filename,
  99     to_high_limit_path,
 100     UnavailableVideoError,
 101     url_basename,
 102     version_tuple,
 103     write_json_file,
 104     write_string,
 105     YoutubeDLCookieJar,
 106     YoutubeDLCookieProcessor,
 107     YoutubeDLHandler,
 108     YoutubeDLRedirectHandler,
 109     process_communicate_or_kill,
 110 )
 111 from .cache import Cache
 112 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
 113 from .extractor.openload import PhantomJSwrapper
 114 from .downloader import get_suitable_downloader
 115 from .downloader.rtmp import rtmpdump_version
 116 from .postprocessor import (
 117     FFmpegFixupM3u8PP,
 118     FFmpegFixupM4aPP,
 119     FFmpegFixupStretchedPP,
 120     FFmpegMergerPP,
 121     FFmpegPostProcessor,
 122     # FFmpegSubtitlesConvertorPP,
 123     get_postprocessor,
 124     MoveFilesAfterDownloadPP,
 125 )
 126 from .version import __version__
 127
 128 if compat_os_name == 'nt':
 129     import ctypes
 130
 131
 132 class YoutubeDL(object):
 133     """YoutubeDL class.
 134
 135     YoutubeDL objects are the ones responsible of downloading the
 136     actual video file and writing it to disk if the user has requested
 137     it, among some other tasks. In most cases there should be one per
 138     program. As, given a video URL, the downloader doesn't know how to
 139     extract all the needed information, task that InfoExtractors do, it
 140     has to pass the URL to one of them.
 141
 142     For this, YoutubeDL objects have a method that allows
 143     InfoExtractors to be registered in a given order. When it is passed
 144     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 145     finds that reports being able to handle it. The InfoExtractor extracts
 146     all the information about the video or videos the URL refers to, and
 147     YoutubeDL process the extracted information, possibly using a File
 148     Downloader to download the video.
 149
 150     YoutubeDL objects accept a lot of parameters. In order not to saturate
 151     the object constructor with arguments, it receives a dictionary of
 152     options instead. These options are available through the params
 153     attribute for the InfoExtractors to use. The YoutubeDL also
 154     registers itself as the downloader in charge for the InfoExtractors
 155     that are added to it, so this is a "mutual registration".
 156
 157     Available options:
 158
 159     username:          Username for authentication purposes.
 160     password:          Password for authentication purposes.
 161     videopassword:     Password for accessing a video.
 162     ap_mso:            Adobe Pass multiple-system operator identifier.
 163     ap_username:       Multiple-system operator account username.
 164     ap_password:       Multiple-system operator account password.
 165     usenetrc:          Use netrc for authentication instead.
 166     verbose:           Print additional info to stdout.
 167     quiet:             Do not print messages to stdout.
 168     no_warnings:       Do not print out anything for warnings.
 169     forceurl:          Force printing final URL.
 170     forcetitle:        Force printing title.
 171     forceid:           Force printing ID.
 172     forcethumbnail:    Force printing thumbnail URL.
 173     forcedescription:  Force printing description.
 174     forcefilename:     Force printing final filename.
 175     forceduration:     Force printing duration.
 176     forcejson:         Force printing info_dict as JSON.
 177     dump_single_json:  Force printing the info_dict of the whole playlist
 178                        (or video) as a single JSON line.
 179     force_write_download_archive: Force writing download archive regardless
 180                        of 'skip_download' or 'simulate'.
 181     simulate:          Do not download the video files.
 182     format:            Video format code. see "FORMAT SELECTION" for more details.
 183     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 184     format_sort:       How to sort the video formats. see "Sorting Formats"
 185                        for more details.
 186     format_sort_force: Force the given format_sort. see "Sorting Formats"
 187                        for more details.
 188     allow_multiple_video_streams:   Allow multiple video streams to be merged
 189                        into a single file
 190     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 191                        into a single file
 192     outtmpl:           Dictionary of templates for output names. Allowed keys
 193                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py)
 194     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 195     restrictfilenames: Do not allow "&" and spaces in file names
 196     trim_file_name:    Limit length of filename (extension excluded)
 197     ignoreerrors:      Do not stop on download errors
 198                        (Default True when running youtube-dlc,
 199                        but False when directly accessing YoutubeDL class)
 200     force_generic_extractor: Force downloader to use the generic extractor
 201     overwrites:        Overwrite all video and metadata files if True,
 202                        overwrite only non-video files if None
 203                        and don't overwrite any file if False
 204     playliststart:     Playlist item to start at.
 205     playlistend:       Playlist item to end at.
 206     playlist_items:    Specific indices of playlist to download.
 207     playlistreverse:   Download playlist items in reverse order.
 208     playlistrandom:    Download playlist items in random order.
 209     matchtitle:        Download only matching titles.
 210     rejecttitle:       Reject downloads for matching titles.
 211     logger:            Log messages to a logging.Logger instance.
 212     logtostderr:       Log messages to stderr instead of stdout.
 213     writedescription:  Write the video description to a .description file
 214     writeinfojson:     Write the video description to a .info.json file
 215     writecomments:     Extract video comments. This will not be written to disk
 216                        unless writeinfojson is also given
 217     writeannotations:  Write the video annotations to a .annotations.xml file
 218     writethumbnail:    Write the thumbnail image to a file
 219     allow_playlist_files: Whether to write playlists' description, infojson etc
 220                        also to disk when using the 'write*' options
 221     write_all_thumbnails:  Write all thumbnail formats to files
 222     writelink:         Write an internet shortcut file, depending on the
 223                        current platform (.url/.webloc/.desktop)
 224     writeurllink:      Write a Windows internet shortcut file (.url)
 225     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 226     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 227     writesubtitles:    Write the video subtitles to a file
 228     writeautomaticsub: Write the automatically generated subtitles to a file
 229     allsubtitles:      Downloads all the subtitles of the video
 230                        (requires writesubtitles or writeautomaticsub)
 231     listsubtitles:     Lists all available subtitles for the video
 232     subtitlesformat:   The format code for subtitles
 233     subtitleslangs:    List of languages of the subtitles to download
 234     keepvideo:         Keep the video file after post-processing
 235     daterange:         A DateRange object, download only if the upload_date is in the range.
 236     skip_download:     Skip the actual download of the video file
 237     cachedir:          Location of the cache files in the filesystem.
 238                        False to disable filesystem cache.
 239     noplaylist:        Download single video instead of a playlist if in doubt.
 240     age_limit:         An integer representing the user's age in years.
 241                        Unsuitable videos for the given age are skipped.
 242     min_views:         An integer representing the minimum view count the video
 243                        must have in order to not be skipped.
 244                        Videos without view count information are always
 245                        downloaded. None for no limit.
 246     max_views:         An integer representing the maximum view count.
 247                        Videos that are more popular than that are not
 248                        downloaded.
 249                        Videos without view count information are always
 250                        downloaded. None for no limit.
 251     download_archive:  File name of a file where all downloads are recorded.
 252                        Videos already present in the file are not downloaded
 253                        again.
 254     break_on_existing: Stop the download process after attempting to download a
 255                        file that is in the archive.
 256     break_on_reject:   Stop the download process when encountering a video that
 257                        has been filtered out.
 258     cookiefile:        File name where cookies should be read from and dumped to
 259     nocheckcertificate:Do not verify SSL certificates
 260     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 261                        At the moment, this is only supported by YouTube.
 262     proxy:             URL of the proxy server to use
 263     geo_verification_proxy:  URL of the proxy to use for IP address verification
 264                        on geo-restricted sites.
 265     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 266     bidi_workaround:   Work around buggy terminals without bidirectional text
 267                        support, using fridibi
 268     debug_printtraffic:Print out sent and received HTTP traffic
 269     include_ads:       Download ads as well
 270     default_search:    Prepend this string if an input url is not valid.
 271                        'auto' for elaborate guessing
 272     encoding:          Use this encoding instead of the system-specified.
 273     extract_flat:      Do not resolve URLs, return the immediate result.
 274                        Pass in 'in_playlist' to only show this behavior for
 275                        playlist items.
 276     postprocessors:    A list of dictionaries, each with an entry
 277                        * key:  The name of the postprocessor. See
 278                                youtube_dlc/postprocessor/__init__.py for a list.
 279                        * _after_move: Optional. If True, run this post_processor
 280                                after 'MoveFilesAfterDownload'
 281                        as well as any further keyword arguments for the
 282                        postprocessor.
 283     post_hooks:        A list of functions that get called as the final step
 284                        for each video file, after all postprocessors have been
 285                        called. The filename will be passed as the only argument.
 286     progress_hooks:    A list of functions that get called on download
 287                        progress, with a dictionary with the entries
 288                        * status: One of "downloading", "error", or "finished".
 289                                  Check this first and ignore unknown values.
 290
 291                        If status is one of "downloading", or "finished", the
 292                        following properties may also be present:
 293                        * filename: The final filename (always present)
 294                        * tmpfilename: The filename we're currently writing to
 295                        * downloaded_bytes: Bytes on disk
 296                        * total_bytes: Size of the whole file, None if unknown
 297                        * total_bytes_estimate: Guess of the eventual file size,
 298                                                None if unavailable.
 299                        * elapsed: The number of seconds since download started.
 300                        * eta: The estimated time in seconds, None if unknown
 301                        * speed: The download speed in bytes/second, None if
 302                                 unknown
 303                        * fragment_index: The counter of the currently
 304                                          downloaded video fragment.
 305                        * fragment_count: The number of fragments (= individual
 306                                          files that will be merged)
 307
 308                        Progress hooks are guaranteed to be called at least once
 309                        (with status "finished") if the download is successful.
 310     merge_output_format: Extension to use when merging formats.
 311     final_ext:         Expected final extension; used to detect when the file was
 312                        already downloaded and converted. "merge_output_format" is
 313                        replaced by this extension when given
 314     fixup:             Automatically correct known faults of the file.
 315                        One of:
 316                        - "never": do nothing
 317                        - "warn": only emit a warning
 318                        - "detect_or_warn": check whether we can do anything
 319                                            about it, warn otherwise (default)
 320     source_address:    Client-side IP address to bind to.
 321     call_home:         Boolean, true iff we are allowed to contact the
 322                        youtube-dlc servers for debugging.
 323     sleep_interval:    Number of seconds to sleep before each download when
 324                        used alone or a lower bound of a range for randomized
 325                        sleep before each download (minimum possible number
 326                        of seconds to sleep) when used along with
 327                        max_sleep_interval.
 328     max_sleep_interval:Upper bound of a range for randomized sleep before each
 329                        download (maximum possible number of seconds to sleep).
 330                        Must only be used along with sleep_interval.
 331                        Actual sleep time will be a random float from range
 332                        [sleep_interval; max_sleep_interval].
 333     listformats:       Print an overview of available video formats and exit.
 334     list_thumbnails:   Print a table of all thumbnails and exit.
 335     match_filter:      A function that gets called with the info_dict of
 336                        every video.
 337                        If it returns a message, the video is ignored.
 338                        If it returns None, the video is downloaded.
 339                        match_filter_func in utils.py is one example for this.
 340     no_color:          Do not emit color codes in output.
 341     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 342                        HTTP header
 343     geo_bypass_country:
 344                        Two-letter ISO 3166-2 country code that will be used for
 345                        explicit geographic restriction bypassing via faking
 346                        X-Forwarded-For HTTP header
 347     geo_bypass_ip_block:
 348                        IP range in CIDR notation that will be used similarly to
 349                        geo_bypass_country
 350
 351     The following options determine which downloader is picked:
 352     external_downloader: Executable of the external downloader to call.
 353                        None or unset for standard (built-in) downloader.
 354     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 355                        if True, otherwise use ffmpeg/avconv if False, otherwise
 356                        use downloader suggested by extractor if None.
 357
 358     The following parameters are not used by YoutubeDL itself, they are used by
 359     the downloader (see youtube_dlc/downloader/common.py):
 360     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 361     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 362     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 363     http_chunk_size.
 364
 365     The following options are used by the post processors:
 366     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 367                        otherwise prefer ffmpeg. (avconv support is deprecated)
 368     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 369                        to the binary or its containing directory.
 370     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 371                         and a list of additional command-line arguments for the
 372                         postprocessor/executable. The dict can also have "PP+EXE" keys
 373                         which are used when the given exe is used by the given PP.
 374                         Use 'default' as the name for arguments to passed to all PP
 375     The following options are used by the Youtube extractor:
 376     youtube_include_dash_manifest: If True (default), DASH manifests and related
 377                         data will be downloaded and processed by extractor.
 378                         You can reduce network I/O by disabling it if you don't
 379                         care about DASH.
 380     """
 381
 382     _NUMERIC_FIELDS = set((
 383         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 384         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 385         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 386         'average_rating', 'comment_count', 'age_limit',
 387         'start_time', 'end_time',
 388         'chapter_number', 'season_number', 'episode_number',
 389         'track_number', 'disc_number', 'release_year',
 390         'playlist_index',
 391     ))
 392
 393     params = None
 394     _ies = []
 395     _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 396     __prepare_filename_warned = False
 397     _download_retcode = None
 398     _num_downloads = None
 399     _playlist_level = 0
 400     _playlist_urls = set()
 401     _screen_file = None
 402
 403     def __init__(self, params=None, auto_init=True):
 404         """Create a FileDownloader object with the given options."""
 405         if params is None:
 406             params = {}
 407         self._ies = []
 408         self._ies_instances = {}
 409         self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 410         self.__prepare_filename_warned = False
 411         self._post_hooks = []
 412         self._progress_hooks = []
 413         self._download_retcode = 0
 414         self._num_downloads = 0
 415         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 416         self._err_file = sys.stderr
 417         self.params = {
 418             # Default parameters
 419             'nocheckcertificate': False,
 420         }
 421         self.params.update(params)
 422         self.cache = Cache(self)
 423         self.archive = set()
 424
 425         """Preload the archive, if any is specified"""
 426         def preload_download_archive(self):
 427             fn = self.params.get('download_archive')
 428             if fn is None:
 429                 return False
 430             try:
 431                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 432                     for line in archive_file:
 433                         self.archive.add(line.strip())
 434             except IOError as ioe:
 435                 if ioe.errno != errno.ENOENT:
 436                     raise
 437                 return False
 438             return True
 439
 440         def check_deprecated(param, option, suggestion):
 441             if self.params.get(param) is not None:
 442                 self.report_warning(
 443                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 444                 return True
 445             return False
 446
 447         if self.params.get('verbose'):
 448             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 449
 450         preload_download_archive(self)
 451
 452         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 453             if self.params.get('geo_verification_proxy') is None:
 454                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 455
 456         if self.params.get('final_ext'):
 457             if self.params.get('merge_output_format'):
 458                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 459             self.params['merge_output_format'] = self.params['final_ext']
 460
 461         if 'overwrites' in self.params and self.params['overwrites'] is None:
 462             del self.params['overwrites']
 463
 464         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 465         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 466         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 467
 468         if params.get('bidi_workaround', False):
 469             try:
 470                 import pty
 471                 master, slave = pty.openpty()
 472                 width = compat_get_terminal_size().columns
 473                 if width is None:
 474                     width_args = []
 475                 else:
 476                     width_args = ['-w', str(width)]
 477                 sp_kwargs = dict(
 478                     stdin=subprocess.PIPE,
 479                     stdout=slave,
 480                     stderr=self._err_file)
 481                 try:
 482                     self._output_process = subprocess.Popen(
 483                         ['bidiv'] + width_args, **sp_kwargs
 484                     )
 485                 except OSError:
 486                     self._output_process = subprocess.Popen(
 487                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 488                 self._output_channel = os.fdopen(master, 'rb')
 489             except OSError as ose:
 490                 if ose.errno == errno.ENOENT:
 491                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 492                 else:
 493                     raise
 494
 495         if (sys.platform != 'win32'
 496                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 497                 and not params.get('restrictfilenames', False)):
 498             # Unicode filesystem API will throw errors (#1474, #13027)
 499             self.report_warning(
 500                 'Assuming --restrict-filenames since file system encoding '
 501                 'cannot encode all characters. '
 502                 'Set the LC_ALL environment variable to fix this.')
 503             self.params['restrictfilenames'] = True
 504
 505         self.outtmpl_dict = self.parse_outtmpl()
 506
 507         self._setup_opener()
 508
 509         if auto_init:
 510             self.print_debug_header()
 511             self.add_default_info_extractors()
 512
 513         for pp_def_raw in self.params.get('postprocessors', []):
 514             pp_class = get_postprocessor(pp_def_raw['key'])
 515             pp_def = dict(pp_def_raw)
 516             del pp_def['key']
 517             if 'when' in pp_def:
 518                 when = pp_def['when']
 519                 del pp_def['when']
 520             else:
 521                 when = 'normal'
 522             pp = pp_class(self, **compat_kwargs(pp_def))
 523             self.add_post_processor(pp, when=when)
 524
 525         for ph in self.params.get('post_hooks', []):
 526             self.add_post_hook(ph)
 527
 528         for ph in self.params.get('progress_hooks', []):
 529             self.add_progress_hook(ph)
 530
 531         register_socks_protocols()
 532
 533     def warn_if_short_id(self, argv):
 534         # short YouTube ID starting with dash?
 535         idxs = [
 536             i for i, a in enumerate(argv)
 537             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 538         if idxs:
 539             correct_argv = (
 540                 ['youtube-dlc']
 541                 + [a for i, a in enumerate(argv) if i not in idxs]
 542                 + ['--'] + [argv[i] for i in idxs]
 543             )
 544             self.report_warning(
 545                 'Long argument string detected. '
 546                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 547                 args_to_str(correct_argv))
 548
 549     def add_info_extractor(self, ie):
 550         """Add an InfoExtractor object to the end of the list."""
 551         self._ies.append(ie)
 552         if not isinstance(ie, type):
 553             self._ies_instances[ie.ie_key()] = ie
 554             ie.set_downloader(self)
 555
 556     def get_info_extractor(self, ie_key):
 557         """
 558         Get an instance of an IE with name ie_key, it will try to get one from
 559         the _ies list, if there's no instance it will create a new one and add
 560         it to the extractor list.
 561         """
 562         ie = self._ies_instances.get(ie_key)
 563         if ie is None:
 564             ie = get_info_extractor(ie_key)()
 565             self.add_info_extractor(ie)
 566         return ie
 567
 568     def add_default_info_extractors(self):
 569         """
 570         Add the InfoExtractors returned by gen_extractors to the end of the list
 571         """
 572         for ie in gen_extractor_classes():
 573             self.add_info_extractor(ie)
 574
 575     def add_post_processor(self, pp, when='normal'):
 576         """Add a PostProcessor object to the end of the chain."""
 577         self._pps[when].append(pp)
 578         pp.set_downloader(self)
 579
 580     def add_post_hook(self, ph):
 581         """Add the post hook"""
 582         self._post_hooks.append(ph)
 583
 584     def add_progress_hook(self, ph):
 585         """Add the progress hook (currently only for the file downloader)"""
 586         self._progress_hooks.append(ph)
 587
 588     def _bidi_workaround(self, message):
 589         if not hasattr(self, '_output_channel'):
 590             return message
 591
 592         assert hasattr(self, '_output_process')
 593         assert isinstance(message, compat_str)
 594         line_count = message.count('\n') + 1
 595         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 596         self._output_process.stdin.flush()
 597         res = ''.join(self._output_channel.readline().decode('utf-8')
 598                       for _ in range(line_count))
 599         return res[:-len('\n')]
 600
 601     def to_screen(self, message, skip_eol=False):
 602         """Print message to stdout if not in quiet mode."""
 603         return self.to_stdout(message, skip_eol, check_quiet=True)
 604
 605     def _write_string(self, s, out=None):
 606         write_string(s, out=out, encoding=self.params.get('encoding'))
 607
 608     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 609         """Print message to stdout if not in quiet mode."""
 610         if self.params.get('logger'):
 611             self.params['logger'].debug(message)
 612         elif not check_quiet or not self.params.get('quiet', False):
 613             message = self._bidi_workaround(message)
 614             terminator = ['\n', ''][skip_eol]
 615             output = message + terminator
 616
 617             self._write_string(output, self._screen_file)
 618
 619     def to_stderr(self, message):
 620         """Print message to stderr."""
 621         assert isinstance(message, compat_str)
 622         if self.params.get('logger'):
 623             self.params['logger'].error(message)
 624         else:
 625             message = self._bidi_workaround(message)
 626             output = message + '\n'
 627             self._write_string(output, self._err_file)
 628
 629     def to_console_title(self, message):
 630         if not self.params.get('consoletitle', False):
 631             return
 632         if compat_os_name == 'nt':
 633             if ctypes.windll.kernel32.GetConsoleWindow():
 634                 # c_wchar_p() might not be necessary if `message` is
 635                 # already of type unicode()
 636                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 637         elif 'TERM' in os.environ:
 638             self._write_string('\033]0;%s\007' % message, self._screen_file)
 639
 640     def save_console_title(self):
 641         if not self.params.get('consoletitle', False):
 642             return
 643         if self.params.get('simulate', False):
 644             return
 645         if compat_os_name != 'nt' and 'TERM' in os.environ:
 646             # Save the title on stack
 647             self._write_string('\033[22;0t', self._screen_file)
 648
 649     def restore_console_title(self):
 650         if not self.params.get('consoletitle', False):
 651             return
 652         if self.params.get('simulate', False):
 653             return
 654         if compat_os_name != 'nt' and 'TERM' in os.environ:
 655             # Restore the title from stack
 656             self._write_string('\033[23;0t', self._screen_file)
 657
 658     def __enter__(self):
 659         self.save_console_title()
 660         return self
 661
 662     def __exit__(self, *args):
 663         self.restore_console_title()
 664
 665         if self.params.get('cookiefile') is not None:
 666             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 667
 668     def trouble(self, message=None, tb=None):
 669         """Determine action to take when a download problem appears.
 670
 671         Depending on if the downloader has been configured to ignore
 672         download errors or not, this method may throw an exception or
 673         not when errors are found, after printing the message.
 674
 675         tb, if given, is additional traceback information.
 676         """
 677         if message is not None:
 678             self.to_stderr(message)
 679         if self.params.get('verbose'):
 680             if tb is None:
 681                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 682                     tb = ''
 683                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 684                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 685                     tb += encode_compat_str(traceback.format_exc())
 686                 else:
 687                     tb_data = traceback.format_list(traceback.extract_stack())
 688                     tb = ''.join(tb_data)
 689             self.to_stderr(tb)
 690         if not self.params.get('ignoreerrors', False):
 691             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 692                 exc_info = sys.exc_info()[1].exc_info
 693             else:
 694                 exc_info = sys.exc_info()
 695             raise DownloadError(message, exc_info)
 696         self._download_retcode = 1
 697
 698     def report_warning(self, message):
 699         '''
 700         Print the message to stderr, it will be prefixed with 'WARNING:'
 701         If stderr is a tty file the 'WARNING:' will be colored
 702         '''
 703         if self.params.get('logger') is not None:
 704             self.params['logger'].warning(message)
 705         else:
 706             if self.params.get('no_warnings'):
 707                 return
 708             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 709                 _msg_header = '\033[0;33mWARNING:\033[0m'
 710             else:
 711                 _msg_header = 'WARNING:'
 712             warning_message = '%s %s' % (_msg_header, message)
 713             self.to_stderr(warning_message)
 714
 715     def report_error(self, message, tb=None):
 716         '''
 717         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 718         in red if stderr is a tty file.
 719         '''
 720         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 721             _msg_header = '\033[0;31mERROR:\033[0m'
 722         else:
 723             _msg_header = 'ERROR:'
 724         error_message = '%s %s' % (_msg_header, message)
 725         self.trouble(error_message, tb)
 726
 727     def report_file_already_downloaded(self, file_name):
 728         """Report file has already been fully downloaded."""
 729         try:
 730             self.to_screen('[download] %s has already been downloaded' % file_name)
 731         except UnicodeEncodeError:
 732             self.to_screen('[download] The file has already been downloaded')
 733
 734     def report_file_delete(self, file_name):
 735         """Report that existing file will be deleted."""
 736         try:
 737             self.to_screen('Deleting existing file %s' % file_name)
 738         except UnicodeEncodeError:
 739             self.to_screen('Deleting existing file')
 740
 741     def parse_outtmpl(self):
 742         outtmpl_dict = self.params.get('outtmpl', {})
 743         if not isinstance(outtmpl_dict, dict):
 744             outtmpl_dict = {'default': outtmpl_dict}
 745         outtmpl_dict.update({
 746             k: v for k, v in DEFAULT_OUTTMPL.items()
 747             if not outtmpl_dict.get(k)})
 748         for key, val in outtmpl_dict.items():
 749             if isinstance(val, bytes):
 750                 self.report_warning(
 751                     'Parameter outtmpl is bytes, but should be a unicode string. '
 752                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 753         return outtmpl_dict
 754
 755     def _prepare_filename(self, info_dict, tmpl_type='default'):
 756         try:
 757             template_dict = dict(info_dict)
 758
 759             template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 760                 formatSeconds(info_dict['duration'], '-')
 761                 if info_dict.get('duration', None) is not None
 762                 else None)
 763
 764             template_dict['epoch'] = int(time.time())
 765             autonumber_size = self.params.get('autonumber_size')
 766             if autonumber_size is None:
 767                 autonumber_size = 5
 768             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 769             if template_dict.get('resolution') is None:
 770                 if template_dict.get('width') and template_dict.get('height'):
 771                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 772                 elif template_dict.get('height'):
 773                     template_dict['resolution'] = '%sp' % template_dict['height']
 774                 elif template_dict.get('width'):
 775                     template_dict['resolution'] = '%dx?' % template_dict['width']
 776
 777             sanitize = lambda k, v: sanitize_filename(
 778                 compat_str(v),
 779                 restricted=self.params.get('restrictfilenames'),
 780                 is_id=(k == 'id' or k.endswith('_id')))
 781             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 782                                  for k, v in template_dict.items()
 783                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 784             na = self.params.get('outtmpl_na_placeholder', 'NA')
 785             template_dict = collections.defaultdict(lambda: na, template_dict)
 786
 787             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 788             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 789
 790             # For fields playlist_index and autonumber convert all occurrences
 791             # of %(field)s to %(field)0Nd for backward compatibility
 792             field_size_compat_map = {
 793                 'playlist_index': len(str(template_dict['n_entries'])),
 794                 'autonumber': autonumber_size,
 795             }
 796             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 797             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 798             if mobj:
 799                 outtmpl = re.sub(
 800                     FIELD_SIZE_COMPAT_RE,
 801                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 802                     outtmpl)
 803
 804             # As of [1] format syntax is:
 805             #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 806             # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 807             FORMAT_RE = r'''(?x)
 808                 (?<!%)
 809                 %
 810                 \({0}\)  # mapping key
 811                 (?:[#0\-+ ]+)?  # conversion flags (optional)
 812                 (?:\d+)?  # minimum field width (optional)
 813                 (?:\.\d+)?  # precision (optional)
 814                 [hlL]?  # length modifier (optional)
 815                 (?P<type>[diouxXeEfFgGcrs%])  # conversion type
 816             '''
 817
 818             numeric_fields = list(self._NUMERIC_FIELDS)
 819
 820             # Format date
 821             FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
 822             for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
 823                 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
 824                 if key in template_dict:
 825                     continue
 826                 value = strftime_or_none(template_dict.get(field), frmt, na)
 827                 if conv_type in 'crs':  # string
 828                     value = sanitize(field, value)
 829                 else:  # number
 830                     numeric_fields.append(key)
 831                     value = float_or_none(value, default=None)
 832                 if value is not None:
 833                     template_dict[key] = value
 834
 835             # Missing numeric fields used together with integer presentation types
 836             # in format specification will break the argument substitution since
 837             # string NA placeholder is returned for missing fields. We will patch
 838             # output template for missing fields to meet string presentation type.
 839             for numeric_field in numeric_fields:
 840                 if numeric_field not in template_dict:
 841                     outtmpl = re.sub(
 842                         FORMAT_RE.format(re.escape(numeric_field)),
 843                         r'%({0})s'.format(numeric_field), outtmpl)
 844
 845             # expand_path translates '%%' into '%' and '$$' into '$'
 846             # correspondingly that is not what we want since we need to keep
 847             # '%%' intact for template dict substitution step. Working around
 848             # with boundary-alike separator hack.
 849             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 850             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 851
 852             # outtmpl should be expand_path'ed before template dict substitution
 853             # because meta fields may contain env variables we don't want to
 854             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 855             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 856             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 857
 858             if force_ext is not None:
 859                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 860
 861             # https://github.com/blackjack4494/youtube-dlc/issues/85
 862             trim_file_name = self.params.get('trim_file_name', False)
 863             if trim_file_name:
 864                 fn_groups = filename.rsplit('.')
 865                 ext = fn_groups[-1]
 866                 sub_ext = ''
 867                 if len(fn_groups) > 2:
 868                     sub_ext = fn_groups[-2]
 869                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 870
 871             return filename
 872         except ValueError as err:
 873             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 874             return None
 875
 876     def prepare_filename(self, info_dict, dir_type='', warn=False):
 877         """Generate the output filename."""
 878         paths = self.params.get('paths', {})
 879         assert isinstance(paths, dict)
 880         filename = self._prepare_filename(info_dict, dir_type or 'default')
 881
 882         if warn and not self.__prepare_filename_warned:
 883             if not paths:
 884                 pass
 885             elif filename == '-':
 886                 self.report_warning('--paths is ignored when an outputting to stdout')
 887             elif os.path.isabs(filename):
 888                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 889             self.__prepare_filename_warned = True
 890         if filename == '-' or not filename:
 891             return filename
 892
 893         homepath = expand_path(paths.get('home', '').strip())
 894         assert isinstance(homepath, compat_str)
 895         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 896         assert isinstance(subdir, compat_str)
 897         path = os.path.join(homepath, subdir, filename)
 898
 899         # Temporary fix for #4787
 900         # 'Treat' all problem characters by passing filename through preferredencoding
 901         # to workaround encoding issues with subprocess on python2 @ Windows
 902         if sys.version_info < (3, 0) and sys.platform == 'win32':
 903             path = encodeFilename(path, True).decode(preferredencoding())
 904         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 905
 906     def _match_entry(self, info_dict, incomplete):
 907         """ Returns None if the file should be downloaded """
 908
 909         def check_filter():
 910             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 911             if 'title' in info_dict:
 912                 # This can happen when we're just evaluating the playlist
 913                 title = info_dict['title']
 914                 matchtitle = self.params.get('matchtitle', False)
 915                 if matchtitle:
 916                     if not re.search(matchtitle, title, re.IGNORECASE):
 917                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 918                 rejecttitle = self.params.get('rejecttitle', False)
 919                 if rejecttitle:
 920                     if re.search(rejecttitle, title, re.IGNORECASE):
 921                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 922             date = info_dict.get('upload_date')
 923             if date is not None:
 924                 dateRange = self.params.get('daterange', DateRange())
 925                 if date not in dateRange:
 926                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 927             view_count = info_dict.get('view_count')
 928             if view_count is not None:
 929                 min_views = self.params.get('min_views')
 930                 if min_views is not None and view_count < min_views:
 931                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 932                 max_views = self.params.get('max_views')
 933                 if max_views is not None and view_count > max_views:
 934                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 935             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 936                 return 'Skipping "%s" because it is age restricted' % video_title
 937             if self.in_download_archive(info_dict):
 938                 return '%s has already been recorded in archive' % video_title
 939
 940             if not incomplete:
 941                 match_filter = self.params.get('match_filter')
 942                 if match_filter is not None:
 943                     ret = match_filter(info_dict)
 944                     if ret is not None:
 945                         return ret
 946             return None
 947
 948         reason = check_filter()
 949         if reason is not None:
 950             self.to_screen('[download] ' + reason)
 951             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 952                 raise ExistingVideoReached()
 953             elif self.params.get('break_on_reject', False):
 954                 raise RejectedVideoReached()
 955         return reason
 956
 957     @staticmethod
 958     def add_extra_info(info_dict, extra_info):
 959         '''Set the keys from extra_info in info dict if they are missing'''
 960         for key, value in extra_info.items():
 961             info_dict.setdefault(key, value)
 962
 963     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 964                      process=True, force_generic_extractor=False):
 965         '''
 966         Returns a list with a dictionary for each video we find.
 967         If 'download', also downloads the videos.
 968         extra_info is a dict containing the extra values to add to each result
 969         '''
 970
 971         if not ie_key and force_generic_extractor:
 972             ie_key = 'Generic'
 973
 974         if ie_key:
 975             ies = [self.get_info_extractor(ie_key)]
 976         else:
 977             ies = self._ies
 978
 979         for ie in ies:
 980             if not ie.suitable(url):
 981                 continue
 982
 983             ie_key = ie.ie_key()
 984             ie = self.get_info_extractor(ie_key)
 985             if not ie.working():
 986                 self.report_warning('The program functionality for this site has been marked as broken, '
 987                                     'and will probably not work.')
 988
 989             try:
 990                 temp_id = str_or_none(
 991                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
 992                     else ie._match_id(url))
 993             except (AssertionError, IndexError, AttributeError):
 994                 temp_id = None
 995             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 996                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 997                                ie_key, temp_id))
 998                 break
 999             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
1000         else:
1001             self.report_error('no suitable InfoExtractor for URL %s' % url)
1002
1003     def __handle_extraction_exceptions(func):
1004         def wrapper(self, *args, **kwargs):
1005             try:
1006                 return func(self, *args, **kwargs)
1007             except GeoRestrictedError as e:
1008                 msg = e.msg
1009                 if e.countries:
1010                     msg += '\nThis video is available in %s.' % ', '.join(
1011                         map(ISO3166Utils.short2full, e.countries))
1012                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1013                 self.report_error(msg)
1014             except ExtractorError as e:  # An error we somewhat expected
1015                 self.report_error(compat_str(e), e.format_traceback())
1016             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1017                 raise
1018             except Exception as e:
1019                 if self.params.get('ignoreerrors', False):
1020                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1021                 else:
1022                     raise
1023         return wrapper
1024
1025     @__handle_extraction_exceptions
1026     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1027         ie_result = ie.extract(url)
1028         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1029             return
1030         if isinstance(ie_result, list):
1031             # Backwards compatibility: old IE result format
1032             ie_result = {
1033                 '_type': 'compat_list',
1034                 'entries': ie_result,
1035             }
1036         if info_dict:
1037             if info_dict.get('id'):
1038                 ie_result['id'] = info_dict['id']
1039             if info_dict.get('title'):
1040                 ie_result['title'] = info_dict['title']
1041         self.add_default_extra_info(ie_result, ie, url)
1042         if process:
1043             return self.process_ie_result(ie_result, download, extra_info)
1044         else:
1045             return ie_result
1046
1047     def add_default_extra_info(self, ie_result, ie, url):
1048         self.add_extra_info(ie_result, {
1049             'extractor': ie.IE_NAME,
1050             'webpage_url': url,
1051             'webpage_url_basename': url_basename(url),
1052             'extractor_key': ie.ie_key(),
1053         })
1054
1055     def process_ie_result(self, ie_result, download=True, extra_info={}):
1056         """
1057         Take the result of the ie(may be modified) and resolve all unresolved
1058         references (URLs, playlist items).
1059
1060         It will also download the videos if 'download'.
1061         Returns the resolved ie_result.
1062         """
1063         result_type = ie_result.get('_type', 'video')
1064
1065         if result_type in ('url', 'url_transparent'):
1066             ie_result['url'] = sanitize_url(ie_result['url'])
1067             extract_flat = self.params.get('extract_flat', False)
1068             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1069                     or extract_flat is True):
1070                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1071                 return ie_result
1072
1073         if result_type == 'video':
1074             self.add_extra_info(ie_result, extra_info)
1075             return self.process_video_result(ie_result, download=download)
1076         elif result_type == 'url':
1077             # We have to add extra_info to the results because it may be
1078             # contained in a playlist
1079             return self.extract_info(ie_result['url'],
1080                                      download, info_dict=ie_result,
1081                                      ie_key=ie_result.get('ie_key'),
1082                                      extra_info=extra_info)
1083         elif result_type == 'url_transparent':
1084             # Use the information from the embedding page
1085             info = self.extract_info(
1086                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1087                 extra_info=extra_info, download=False, process=False)
1088
1089             # extract_info may return None when ignoreerrors is enabled and
1090             # extraction failed with an error, don't crash and return early
1091             # in this case
1092             if not info:
1093                 return info
1094
1095             force_properties = dict(
1096                 (k, v) for k, v in ie_result.items() if v is not None)
1097             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1098                 if f in force_properties:
1099                     del force_properties[f]
1100             new_result = info.copy()
1101             new_result.update(force_properties)
1102
1103             # Extracted info may not be a video result (i.e.
1104             # info.get('_type', 'video') != video) but rather an url or
1105             # url_transparent. In such cases outer metadata (from ie_result)
1106             # should be propagated to inner one (info). For this to happen
1107             # _type of info should be overridden with url_transparent. This
1108             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1109             if new_result.get('_type') == 'url':
1110                 new_result['_type'] = 'url_transparent'
1111
1112             return self.process_ie_result(
1113                 new_result, download=download, extra_info=extra_info)
1114         elif result_type in ('playlist', 'multi_video'):
1115             # Protect from infinite recursion due to recursively nested playlists
1116             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1117             webpage_url = ie_result['webpage_url']
1118             if webpage_url in self._playlist_urls:
1119                 self.to_screen(
1120                     '[download] Skipping already downloaded playlist: %s'
1121                     % ie_result.get('title') or ie_result.get('id'))
1122                 return
1123
1124             self._playlist_level += 1
1125             self._playlist_urls.add(webpage_url)
1126             try:
1127                 return self.__process_playlist(ie_result, download)
1128             finally:
1129                 self._playlist_level -= 1
1130                 if not self._playlist_level:
1131                     self._playlist_urls.clear()
1132         elif result_type == 'compat_list':
1133             self.report_warning(
1134                 'Extractor %s returned a compat_list result. '
1135                 'It needs to be updated.' % ie_result.get('extractor'))
1136
1137             def _fixup(r):
1138                 self.add_extra_info(
1139                     r,
1140                     {
1141                         'extractor': ie_result['extractor'],
1142                         'webpage_url': ie_result['webpage_url'],
1143                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1144                         'extractor_key': ie_result['extractor_key'],
1145                     }
1146                 )
1147                 return r
1148             ie_result['entries'] = [
1149                 self.process_ie_result(_fixup(r), download, extra_info)
1150                 for r in ie_result['entries']
1151             ]
1152             return ie_result
1153         else:
1154             raise Exception('Invalid result type: %s' % result_type)
1155
1156     def __process_playlist(self, ie_result, download):
1157         # We process each entry in the playlist
1158         playlist = ie_result.get('title') or ie_result.get('id')
1159         self.to_screen('[download] Downloading playlist: %s' % playlist)
1160
1161         if self.params.get('allow_playlist_files', True):
1162             ie_copy = {
1163                 'playlist': playlist,
1164                 'playlist_id': ie_result.get('id'),
1165                 'playlist_title': ie_result.get('title'),
1166                 'playlist_uploader': ie_result.get('uploader'),
1167                 'playlist_uploader_id': ie_result.get('uploader_id'),
1168                 'playlist_index': 0
1169             }
1170             ie_copy.update(dict(ie_result))
1171
1172             def ensure_dir_exists(path):
1173                 return make_dir(path, self.report_error)
1174
1175             if self.params.get('writeinfojson', False):
1176                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1177                 if not ensure_dir_exists(encodeFilename(infofn)):
1178                     return
1179                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1180                     self.to_screen('[info] Playlist metadata is already present')
1181                 else:
1182                     playlist_info = dict(ie_result)
1183                     # playlist_info['entries'] = list(playlist_info['entries'])  # Entries is a generator which shouldnot be resolved here
1184                     del playlist_info['entries']
1185                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1186                     try:
1187                         write_json_file(self.filter_requested_info(playlist_info), infofn)
1188                     except (OSError, IOError):
1189                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1190
1191             if self.params.get('writedescription', False):
1192                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1193                 if not ensure_dir_exists(encodeFilename(descfn)):
1194                     return
1195                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1196                     self.to_screen('[info] Playlist description is already present')
1197                 elif ie_result.get('description') is None:
1198                     self.report_warning('There\'s no playlist description to write.')
1199                 else:
1200                     try:
1201                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1202                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1203                             descfile.write(ie_result['description'])
1204                     except (OSError, IOError):
1205                         self.report_error('Cannot write playlist description file ' + descfn)
1206                         return
1207
1208         playlist_results = []
1209
1210         playliststart = self.params.get('playliststart', 1) - 1
1211         playlistend = self.params.get('playlistend')
1212         # For backwards compatibility, interpret -1 as whole list
1213         if playlistend == -1:
1214             playlistend = None
1215
1216         playlistitems_str = self.params.get('playlist_items')
1217         playlistitems = None
1218         if playlistitems_str is not None:
1219             def iter_playlistitems(format):
1220                 for string_segment in format.split(','):
1221                     if '-' in string_segment:
1222                         start, end = string_segment.split('-')
1223                         for item in range(int(start), int(end) + 1):
1224                             yield int(item)
1225                     else:
1226                         yield int(string_segment)
1227             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1228
1229         ie_entries = ie_result['entries']
1230
1231         def make_playlistitems_entries(list_ie_entries):
1232             num_entries = len(list_ie_entries)
1233             return [
1234                 list_ie_entries[i - 1] for i in playlistitems
1235                 if -num_entries <= i - 1 < num_entries]
1236
1237         def report_download(num_entries):
1238             self.to_screen(
1239                 '[%s] playlist %s: Downloading %d videos' %
1240                 (ie_result['extractor'], playlist, num_entries))
1241
1242         if isinstance(ie_entries, list):
1243             n_all_entries = len(ie_entries)
1244             if playlistitems:
1245                 entries = make_playlistitems_entries(ie_entries)
1246             else:
1247                 entries = ie_entries[playliststart:playlistend]
1248             n_entries = len(entries)
1249             self.to_screen(
1250                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1251                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1252         elif isinstance(ie_entries, PagedList):
1253             if playlistitems:
1254                 entries = []
1255                 for item in playlistitems:
1256                     entries.extend(ie_entries.getslice(
1257                         item - 1, item
1258                     ))
1259             else:
1260                 entries = ie_entries.getslice(
1261                     playliststart, playlistend)
1262             n_entries = len(entries)
1263             report_download(n_entries)
1264         else:  # iterable
1265             if playlistitems:
1266                 entries = make_playlistitems_entries(list(itertools.islice(
1267                     ie_entries, 0, max(playlistitems))))
1268             else:
1269                 entries = list(itertools.islice(
1270                     ie_entries, playliststart, playlistend))
1271             n_entries = len(entries)
1272             report_download(n_entries)
1273
1274         if self.params.get('playlistreverse', False):
1275             entries = entries[::-1]
1276
1277         if self.params.get('playlistrandom', False):
1278             random.shuffle(entries)
1279
1280         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1281
1282         for i, entry in enumerate(entries, 1):
1283             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1284             # This __x_forwarded_for_ip thing is a bit ugly but requires
1285             # minimal changes
1286             if x_forwarded_for:
1287                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1288             extra = {
1289                 'n_entries': n_entries,
1290                 'playlist': playlist,
1291                 'playlist_id': ie_result.get('id'),
1292                 'playlist_title': ie_result.get('title'),
1293                 'playlist_uploader': ie_result.get('uploader'),
1294                 'playlist_uploader_id': ie_result.get('uploader_id'),
1295                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1296                 'extractor': ie_result['extractor'],
1297                 'webpage_url': ie_result['webpage_url'],
1298                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1299                 'extractor_key': ie_result['extractor_key'],
1300             }
1301
1302             if self._match_entry(entry, incomplete=True) is not None:
1303                 continue
1304
1305             entry_result = self.__process_iterable_entry(entry, download, extra)
1306             # TODO: skip failed (empty) entries?
1307             playlist_results.append(entry_result)
1308         ie_result['entries'] = playlist_results
1309         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1310         return ie_result
1311
1312     @__handle_extraction_exceptions
1313     def __process_iterable_entry(self, entry, download, extra_info):
1314         return self.process_ie_result(
1315             entry, download=download, extra_info=extra_info)
1316
1317     def _build_format_filter(self, filter_spec):
1318         " Returns a function to filter the formats according to the filter_spec "
1319
1320         OPERATORS = {
1321             '<': operator.lt,
1322             '<=': operator.le,
1323             '>': operator.gt,
1324             '>=': operator.ge,
1325             '=': operator.eq,
1326             '!=': operator.ne,
1327         }
1328         operator_rex = re.compile(r'''(?x)\s*
1329             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1330             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1331             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1332             $
1333             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1334         m = operator_rex.search(filter_spec)
1335         if m:
1336             try:
1337                 comparison_value = int(m.group('value'))
1338             except ValueError:
1339                 comparison_value = parse_filesize(m.group('value'))
1340                 if comparison_value is None:
1341                     comparison_value = parse_filesize(m.group('value') + 'B')
1342                 if comparison_value is None:
1343                     raise ValueError(
1344                         'Invalid value %r in format specification %r' % (
1345                             m.group('value'), filter_spec))
1346             op = OPERATORS[m.group('op')]
1347
1348         if not m:
1349             STR_OPERATORS = {
1350                 '=': operator.eq,
1351                 '^=': lambda attr, value: attr.startswith(value),
1352                 '$=': lambda attr, value: attr.endswith(value),
1353                 '*=': lambda attr, value: value in attr,
1354             }
1355             str_operator_rex = re.compile(r'''(?x)
1356                 \s*(?P<key>[a-zA-Z0-9._-]+)
1357                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1358                 \s*(?P<value>[a-zA-Z0-9._-]+)
1359                 \s*$
1360                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1361             m = str_operator_rex.search(filter_spec)
1362             if m:
1363                 comparison_value = m.group('value')
1364                 str_op = STR_OPERATORS[m.group('op')]
1365                 if m.group('negation'):
1366                     op = lambda attr, value: not str_op(attr, value)
1367                 else:
1368                     op = str_op
1369
1370         if not m:
1371             raise ValueError('Invalid filter specification %r' % filter_spec)
1372
1373         def _filter(f):
1374             actual_value = f.get(m.group('key'))
1375             if actual_value is None:
1376                 return m.group('none_inclusive')
1377             return op(actual_value, comparison_value)
1378         return _filter
1379
1380     def _default_format_spec(self, info_dict, download=True):
1381
1382         def can_merge():
1383             merger = FFmpegMergerPP(self)
1384             return merger.available and merger.can_merge()
1385
1386         prefer_best = (
1387             not self.params.get('simulate', False)
1388             and download
1389             and (
1390                 not can_merge()
1391                 or info_dict.get('is_live', False)
1392                 or self.outtmpl_dict['default'] == '-'))
1393
1394         return (
1395             'best/bestvideo+bestaudio'
1396             if prefer_best
1397             else 'bestvideo*+bestaudio/best'
1398             if not self.params.get('allow_multiple_audio_streams', False)
1399             else 'bestvideo+bestaudio/best')
1400
1401     def build_format_selector(self, format_spec):
1402         def syntax_error(note, start):
1403             message = (
1404                 'Invalid format specification: '
1405                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1406             return SyntaxError(message)
1407
1408         PICKFIRST = 'PICKFIRST'
1409         MERGE = 'MERGE'
1410         SINGLE = 'SINGLE'
1411         GROUP = 'GROUP'
1412         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1413
1414         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1415                                   'video': self.params.get('allow_multiple_video_streams', False)}
1416
1417         def _parse_filter(tokens):
1418             filter_parts = []
1419             for type, string, start, _, _ in tokens:
1420                 if type == tokenize.OP and string == ']':
1421                     return ''.join(filter_parts)
1422                 else:
1423                     filter_parts.append(string)
1424
1425         def _remove_unused_ops(tokens):
1426             # Remove operators that we don't use and join them with the surrounding strings
1427             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1428             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1429             last_string, last_start, last_end, last_line = None, None, None, None
1430             for type, string, start, end, line in tokens:
1431                 if type == tokenize.OP and string == '[':
1432                     if last_string:
1433                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1434                         last_string = None
1435                     yield type, string, start, end, line
1436                     # everything inside brackets will be handled by _parse_filter
1437                     for type, string, start, end, line in tokens:
1438                         yield type, string, start, end, line
1439                         if type == tokenize.OP and string == ']':
1440                             break
1441                 elif type == tokenize.OP and string in ALLOWED_OPS:
1442                     if last_string:
1443                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1444                         last_string = None
1445                     yield type, string, start, end, line
1446                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1447                     if not last_string:
1448                         last_string = string
1449                         last_start = start
1450                         last_end = end
1451                     else:
1452                         last_string += string
1453             if last_string:
1454                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1455
1456         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1457             selectors = []
1458             current_selector = None
1459             for type, string, start, _, _ in tokens:
1460                 # ENCODING is only defined in python 3.x
1461                 if type == getattr(tokenize, 'ENCODING', None):
1462                     continue
1463                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1464                     current_selector = FormatSelector(SINGLE, string, [])
1465                 elif type == tokenize.OP:
1466                     if string == ')':
1467                         if not inside_group:
1468                             # ')' will be handled by the parentheses group
1469                             tokens.restore_last_token()
1470                         break
1471                     elif inside_merge and string in ['/', ',']:
1472                         tokens.restore_last_token()
1473                         break
1474                     elif inside_choice and string == ',':
1475                         tokens.restore_last_token()
1476                         break
1477                     elif string == ',':
1478                         if not current_selector:
1479                             raise syntax_error('"," must follow a format selector', start)
1480                         selectors.append(current_selector)
1481                         current_selector = None
1482                     elif string == '/':
1483                         if not current_selector:
1484                             raise syntax_error('"/" must follow a format selector', start)
1485                         first_choice = current_selector
1486                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1487                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1488                     elif string == '[':
1489                         if not current_selector:
1490                             current_selector = FormatSelector(SINGLE, 'best', [])
1491                         format_filter = _parse_filter(tokens)
1492                         current_selector.filters.append(format_filter)
1493                     elif string == '(':
1494                         if current_selector:
1495                             raise syntax_error('Unexpected "("', start)
1496                         group = _parse_format_selection(tokens, inside_group=True)
1497                         current_selector = FormatSelector(GROUP, group, [])
1498                     elif string == '+':
1499                         if not current_selector:
1500                             raise syntax_error('Unexpected "+"', start)
1501                         selector_1 = current_selector
1502                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1503                         if not selector_2:
1504                             raise syntax_error('Expected a selector', start)
1505                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1506                     else:
1507                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1508                 elif type == tokenize.ENDMARKER:
1509                     break
1510             if current_selector:
1511                 selectors.append(current_selector)
1512             return selectors
1513
1514         def _build_selector_function(selector):
1515             if isinstance(selector, list):  # ,
1516                 fs = [_build_selector_function(s) for s in selector]
1517
1518                 def selector_function(ctx):
1519                     for f in fs:
1520                         for format in f(ctx):
1521                             yield format
1522                 return selector_function
1523
1524             elif selector.type == GROUP:  # ()
1525                 selector_function = _build_selector_function(selector.selector)
1526
1527             elif selector.type == PICKFIRST:  # /
1528                 fs = [_build_selector_function(s) for s in selector.selector]
1529
1530                 def selector_function(ctx):
1531                     for f in fs:
1532                         picked_formats = list(f(ctx))
1533                         if picked_formats:
1534                             return picked_formats
1535                     return []
1536
1537             elif selector.type == SINGLE:  # atom
1538                 format_spec = selector.selector if selector.selector is not None else 'best'
1539
1540                 if format_spec == 'all':
1541                     def selector_function(ctx):
1542                         formats = list(ctx['formats'])
1543                         if formats:
1544                             for f in formats:
1545                                 yield f
1546
1547                 else:
1548                     format_fallback = False
1549                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1550                     if format_spec_obj is not None:
1551                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1552                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1553                         not_format_type = 'v' if format_type == 'a' else 'a'
1554                         format_modified = format_spec_obj.group(3) is not None
1555
1556                         format_fallback = not format_type and not format_modified  # for b, w
1557                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1558                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1559                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1560                                     if format_type  # bv, ba, wv, wa
1561                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1562                                     if not format_modified  # b, w
1563                                     else None)  # b*, w*
1564                     else:
1565                         format_idx = -1
1566                         filter_f = ((lambda f: f.get('ext') == format_spec)
1567                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1568                                     else (lambda f: f.get('format_id') == format_spec))  # id
1569
1570                     def selector_function(ctx):
1571                         formats = list(ctx['formats'])
1572                         if not formats:
1573                             return
1574                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1575                         if matches:
1576                             yield matches[format_idx]
1577                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1578                             # for extractors with incomplete formats (audio only (soundcloud)
1579                             # or video only (imgur)) best/worst will fallback to
1580                             # best/worst {video,audio}-only format
1581                             yield formats[format_idx]
1582
1583             elif selector.type == MERGE:        # +
1584                 def _merge(formats_pair):
1585                     format_1, format_2 = formats_pair
1586
1587                     formats_info = []
1588                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1589                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1590
1591                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1592                         get_no_more = {"video": False, "audio": False}
1593                         for (i, fmt_info) in enumerate(formats_info):
1594                             for aud_vid in ["audio", "video"]:
1595                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1596                                     if get_no_more[aud_vid]:
1597                                         formats_info.pop(i)
1598                                     get_no_more[aud_vid] = True
1599
1600                     if len(formats_info) == 1:
1601                         return formats_info[0]
1602
1603                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1604                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1605
1606                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1607                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1608
1609                     output_ext = self.params.get('merge_output_format')
1610                     if not output_ext:
1611                         if the_only_video:
1612                             output_ext = the_only_video['ext']
1613                         elif the_only_audio and not video_fmts:
1614                             output_ext = the_only_audio['ext']
1615                         else:
1616                             output_ext = 'mkv'
1617
1618                     new_dict = {
1619                         'requested_formats': formats_info,
1620                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1621                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1622                         'ext': output_ext,
1623                     }
1624
1625                     if the_only_video:
1626                         new_dict.update({
1627                             'width': the_only_video.get('width'),
1628                             'height': the_only_video.get('height'),
1629                             'resolution': the_only_video.get('resolution'),
1630                             'fps': the_only_video.get('fps'),
1631                             'vcodec': the_only_video.get('vcodec'),
1632                             'vbr': the_only_video.get('vbr'),
1633                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1634                         })
1635
1636                     if the_only_audio:
1637                         new_dict.update({
1638                             'acodec': the_only_audio.get('acodec'),
1639                             'abr': the_only_audio.get('abr'),
1640                         })
1641
1642                     return new_dict
1643
1644                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1645
1646                 def selector_function(ctx):
1647                     for pair in itertools.product(
1648                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1649                         yield _merge(pair)
1650
1651             filters = [self._build_format_filter(f) for f in selector.filters]
1652
1653             def final_selector(ctx):
1654                 ctx_copy = copy.deepcopy(ctx)
1655                 for _filter in filters:
1656                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1657                 return selector_function(ctx_copy)
1658             return final_selector
1659
1660         stream = io.BytesIO(format_spec.encode('utf-8'))
1661         try:
1662             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1663         except tokenize.TokenError:
1664             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1665
1666         class TokenIterator(object):
1667             def __init__(self, tokens):
1668                 self.tokens = tokens
1669                 self.counter = 0
1670
1671             def __iter__(self):
1672                 return self
1673
1674             def __next__(self):
1675                 if self.counter >= len(self.tokens):
1676                     raise StopIteration()
1677                 value = self.tokens[self.counter]
1678                 self.counter += 1
1679                 return value
1680
1681             next = __next__
1682
1683             def restore_last_token(self):
1684                 self.counter -= 1
1685
1686         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1687         return _build_selector_function(parsed_selector)
1688
1689     def _calc_headers(self, info_dict):
1690         res = std_headers.copy()
1691
1692         add_headers = info_dict.get('http_headers')
1693         if add_headers:
1694             res.update(add_headers)
1695
1696         cookies = self._calc_cookies(info_dict)
1697         if cookies:
1698             res['Cookie'] = cookies
1699
1700         if 'X-Forwarded-For' not in res:
1701             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1702             if x_forwarded_for_ip:
1703                 res['X-Forwarded-For'] = x_forwarded_for_ip
1704
1705         return res
1706
1707     def _calc_cookies(self, info_dict):
1708         pr = sanitized_Request(info_dict['url'])
1709         self.cookiejar.add_cookie_header(pr)
1710         return pr.get_header('Cookie')
1711
1712     def process_video_result(self, info_dict, download=True):
1713         assert info_dict.get('_type', 'video') == 'video'
1714
1715         if 'id' not in info_dict:
1716             raise ExtractorError('Missing "id" field in extractor result')
1717         if 'title' not in info_dict:
1718             raise ExtractorError('Missing "title" field in extractor result')
1719
1720         def report_force_conversion(field, field_not, conversion):
1721             self.report_warning(
1722                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1723                 % (field, field_not, conversion))
1724
1725         def sanitize_string_field(info, string_field):
1726             field = info.get(string_field)
1727             if field is None or isinstance(field, compat_str):
1728                 return
1729             report_force_conversion(string_field, 'a string', 'string')
1730             info[string_field] = compat_str(field)
1731
1732         def sanitize_numeric_fields(info):
1733             for numeric_field in self._NUMERIC_FIELDS:
1734                 field = info.get(numeric_field)
1735                 if field is None or isinstance(field, compat_numeric_types):
1736                     continue
1737                 report_force_conversion(numeric_field, 'numeric', 'int')
1738                 info[numeric_field] = int_or_none(field)
1739
1740         sanitize_string_field(info_dict, 'id')
1741         sanitize_numeric_fields(info_dict)
1742
1743         if 'playlist' not in info_dict:
1744             # It isn't part of a playlist
1745             info_dict['playlist'] = None
1746             info_dict['playlist_index'] = None
1747
1748         thumbnails = info_dict.get('thumbnails')
1749         if thumbnails is None:
1750             thumbnail = info_dict.get('thumbnail')
1751             if thumbnail:
1752                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1753         if thumbnails:
1754             thumbnails.sort(key=lambda t: (
1755                 t.get('preference') if t.get('preference') is not None else -1,
1756                 t.get('width') if t.get('width') is not None else -1,
1757                 t.get('height') if t.get('height') is not None else -1,
1758                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1759             for i, t in enumerate(thumbnails):
1760                 t['url'] = sanitize_url(t['url'])
1761                 if t.get('width') and t.get('height'):
1762                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1763                 if t.get('id') is None:
1764                     t['id'] = '%d' % i
1765
1766         if self.params.get('list_thumbnails'):
1767             self.list_thumbnails(info_dict)
1768             return
1769
1770         thumbnail = info_dict.get('thumbnail')
1771         if thumbnail:
1772             info_dict['thumbnail'] = sanitize_url(thumbnail)
1773         elif thumbnails:
1774             info_dict['thumbnail'] = thumbnails[-1]['url']
1775
1776         if 'display_id' not in info_dict and 'id' in info_dict:
1777             info_dict['display_id'] = info_dict['id']
1778
1779         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1780             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1781             # see http://bugs.python.org/issue1646728)
1782             try:
1783                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1784                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1785             except (ValueError, OverflowError, OSError):
1786                 pass
1787
1788         # Auto generate title fields corresponding to the *_number fields when missing
1789         # in order to always have clean titles. This is very common for TV series.
1790         for field in ('chapter', 'season', 'episode'):
1791             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1792                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1793
1794         for cc_kind in ('subtitles', 'automatic_captions'):
1795             cc = info_dict.get(cc_kind)
1796             if cc:
1797                 for _, subtitle in cc.items():
1798                     for subtitle_format in subtitle:
1799                         if subtitle_format.get('url'):
1800                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1801                         if subtitle_format.get('ext') is None:
1802                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1803
1804         automatic_captions = info_dict.get('automatic_captions')
1805         subtitles = info_dict.get('subtitles')
1806
1807         if self.params.get('listsubtitles', False):
1808             if 'automatic_captions' in info_dict:
1809                 self.list_subtitles(
1810                     info_dict['id'], automatic_captions, 'automatic captions')
1811             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1812             return
1813
1814         info_dict['requested_subtitles'] = self.process_subtitles(
1815             info_dict['id'], subtitles, automatic_captions)
1816
1817         # We now pick which formats have to be downloaded
1818         if info_dict.get('formats') is None:
1819             # There's only one format available
1820             formats = [info_dict]
1821         else:
1822             formats = info_dict['formats']
1823
1824         if not formats:
1825             raise ExtractorError('No video formats found!')
1826
1827         def is_wellformed(f):
1828             url = f.get('url')
1829             if not url:
1830                 self.report_warning(
1831                     '"url" field is missing or empty - skipping format, '
1832                     'there is an error in extractor')
1833                 return False
1834             if isinstance(url, bytes):
1835                 sanitize_string_field(f, 'url')
1836             return True
1837
1838         # Filter out malformed formats for better extraction robustness
1839         formats = list(filter(is_wellformed, formats))
1840
1841         formats_dict = {}
1842
1843         # We check that all the formats have the format and format_id fields
1844         for i, format in enumerate(formats):
1845             sanitize_string_field(format, 'format_id')
1846             sanitize_numeric_fields(format)
1847             format['url'] = sanitize_url(format['url'])
1848             if not format.get('format_id'):
1849                 format['format_id'] = compat_str(i)
1850             else:
1851                 # Sanitize format_id from characters used in format selector expression
1852                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1853             format_id = format['format_id']
1854             if format_id not in formats_dict:
1855                 formats_dict[format_id] = []
1856             formats_dict[format_id].append(format)
1857
1858         # Make sure all formats have unique format_id
1859         for format_id, ambiguous_formats in formats_dict.items():
1860             if len(ambiguous_formats) > 1:
1861                 for i, format in enumerate(ambiguous_formats):
1862                     format['format_id'] = '%s-%d' % (format_id, i)
1863
1864         for i, format in enumerate(formats):
1865             if format.get('format') is None:
1866                 format['format'] = '{id} - {res}{note}'.format(
1867                     id=format['format_id'],
1868                     res=self.format_resolution(format),
1869                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1870                 )
1871             # Automatically determine file extension if missing
1872             if format.get('ext') is None:
1873                 format['ext'] = determine_ext(format['url']).lower()
1874             # Automatically determine protocol if missing (useful for format
1875             # selection purposes)
1876             if format.get('protocol') is None:
1877                 format['protocol'] = determine_protocol(format)
1878             # Add HTTP headers, so that external programs can use them from the
1879             # json output
1880             full_format_info = info_dict.copy()
1881             full_format_info.update(format)
1882             format['http_headers'] = self._calc_headers(full_format_info)
1883         # Remove private housekeeping stuff
1884         if '__x_forwarded_for_ip' in info_dict:
1885             del info_dict['__x_forwarded_for_ip']
1886
1887         # TODO Central sorting goes here
1888
1889         if formats[0] is not info_dict:
1890             # only set the 'formats' fields if the original info_dict list them
1891             # otherwise we end up with a circular reference, the first (and unique)
1892             # element in the 'formats' field in info_dict is info_dict itself,
1893             # which can't be exported to json
1894             info_dict['formats'] = formats
1895         if self.params.get('listformats'):
1896             self.list_formats(info_dict)
1897             return
1898
1899         req_format = self.params.get('format')
1900         if req_format is None:
1901             req_format = self._default_format_spec(info_dict, download=download)
1902             if self.params.get('verbose'):
1903                 self.to_screen('[debug] Default format spec: %s' % req_format)
1904
1905         format_selector = self.build_format_selector(req_format)
1906
1907         # While in format selection we may need to have an access to the original
1908         # format set in order to calculate some metrics or do some processing.
1909         # For now we need to be able to guess whether original formats provided
1910         # by extractor are incomplete or not (i.e. whether extractor provides only
1911         # video-only or audio-only formats) for proper formats selection for
1912         # extractors with such incomplete formats (see
1913         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1914         # Since formats may be filtered during format selection and may not match
1915         # the original formats the results may be incorrect. Thus original formats
1916         # or pre-calculated metrics should be passed to format selection routines
1917         # as well.
1918         # We will pass a context object containing all necessary additional data
1919         # instead of just formats.
1920         # This fixes incorrect format selection issue (see
1921         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1922         incomplete_formats = (
1923             # All formats are video-only or
1924             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1925             # all formats are audio-only
1926             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1927
1928         ctx = {
1929             'formats': formats,
1930             'incomplete_formats': incomplete_formats,
1931         }
1932
1933         formats_to_download = list(format_selector(ctx))
1934         if not formats_to_download:
1935             raise ExtractorError('requested format not available',
1936                                  expected=True)
1937
1938         if download:
1939             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1940             if len(formats_to_download) > 1:
1941                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1942             for format in formats_to_download:
1943                 new_info = dict(info_dict)
1944                 new_info.update(format)
1945                 self.process_info(new_info)
1946         # We update the info dict with the best quality format (backwards compatibility)
1947         info_dict.update(formats_to_download[-1])
1948         return info_dict
1949
1950     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1951         """Select the requested subtitles and their format"""
1952         available_subs = {}
1953         if normal_subtitles and self.params.get('writesubtitles'):
1954             available_subs.update(normal_subtitles)
1955         if automatic_captions and self.params.get('writeautomaticsub'):
1956             for lang, cap_info in automatic_captions.items():
1957                 if lang not in available_subs:
1958                     available_subs[lang] = cap_info
1959
1960         if (not self.params.get('writesubtitles') and not
1961                 self.params.get('writeautomaticsub') or not
1962                 available_subs):
1963             return None
1964
1965         if self.params.get('allsubtitles', False):
1966             requested_langs = available_subs.keys()
1967         else:
1968             if self.params.get('subtitleslangs', False):
1969                 requested_langs = self.params.get('subtitleslangs')
1970             elif 'en' in available_subs:
1971                 requested_langs = ['en']
1972             else:
1973                 requested_langs = [list(available_subs.keys())[0]]
1974
1975         formats_query = self.params.get('subtitlesformat', 'best')
1976         formats_preference = formats_query.split('/') if formats_query else []
1977         subs = {}
1978         for lang in requested_langs:
1979             formats = available_subs.get(lang)
1980             if formats is None:
1981                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1982                 continue
1983             for ext in formats_preference:
1984                 if ext == 'best':
1985                     f = formats[-1]
1986                     break
1987                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1988                 if matches:
1989                     f = matches[-1]
1990                     break
1991             else:
1992                 f = formats[-1]
1993                 self.report_warning(
1994                     'No subtitle format found matching "%s" for language %s, '
1995                     'using %s' % (formats_query, lang, f['ext']))
1996             subs[lang] = f
1997         return subs
1998
1999     def __forced_printings(self, info_dict, filename, incomplete):
2000         def print_mandatory(field):
2001             if (self.params.get('force%s' % field, False)
2002                     and (not incomplete or info_dict.get(field) is not None)):
2003                 self.to_stdout(info_dict[field])
2004
2005         def print_optional(field):
2006             if (self.params.get('force%s' % field, False)
2007                     and info_dict.get(field) is not None):
2008                 self.to_stdout(info_dict[field])
2009
2010         print_mandatory('title')
2011         print_mandatory('id')
2012         if self.params.get('forceurl', False) and not incomplete:
2013             if info_dict.get('requested_formats') is not None:
2014                 for f in info_dict['requested_formats']:
2015                     self.to_stdout(f['url'] + f.get('play_path', ''))
2016             else:
2017                 # For RTMP URLs, also include the playpath
2018                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2019         print_optional('thumbnail')
2020         print_optional('description')
2021         if self.params.get('forcefilename', False) and filename is not None:
2022             self.to_stdout(filename)
2023         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2024             self.to_stdout(formatSeconds(info_dict['duration']))
2025         print_mandatory('format')
2026         if self.params.get('forcejson', False):
2027             self.to_stdout(json.dumps(info_dict))
2028
2029     def process_info(self, info_dict):
2030         """Process a single resolved IE result."""
2031
2032         assert info_dict.get('_type', 'video') == 'video'
2033
2034         info_dict.setdefault('__postprocessors', [])
2035
2036         max_downloads = self.params.get('max_downloads')
2037         if max_downloads is not None:
2038             if self._num_downloads >= int(max_downloads):
2039                 raise MaxDownloadsReached()
2040
2041         # TODO: backward compatibility, to be removed
2042         info_dict['fulltitle'] = info_dict['title']
2043
2044         if 'format' not in info_dict:
2045             info_dict['format'] = info_dict['ext']
2046
2047         if self._match_entry(info_dict, incomplete=False) is not None:
2048             return
2049
2050         self._num_downloads += 1
2051
2052         info_dict = self.pre_process(info_dict)
2053
2054         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2055         temp_filename = self.prepare_filename(info_dict, 'temp')
2056         files_to_move = {}
2057         skip_dl = self.params.get('skip_download', False)
2058
2059         # Forced printings
2060         self.__forced_printings(info_dict, full_filename, incomplete=False)
2061
2062         if self.params.get('simulate', False):
2063             if self.params.get('force_write_download_archive', False):
2064                 self.record_download_archive(info_dict)
2065
2066             # Do nothing else if in simulate mode
2067             return
2068
2069         if full_filename is None:
2070             return
2071
2072         def ensure_dir_exists(path):
2073             return make_dir(path, self.report_error)
2074
2075         if not ensure_dir_exists(encodeFilename(full_filename)):
2076             return
2077         if not ensure_dir_exists(encodeFilename(temp_filename)):
2078             return
2079
2080         if self.params.get('writedescription', False):
2081             descfn = self.prepare_filename(info_dict, 'description')
2082             if not ensure_dir_exists(encodeFilename(descfn)):
2083                 return
2084             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2085                 self.to_screen('[info] Video description is already present')
2086             elif info_dict.get('description') is None:
2087                 self.report_warning('There\'s no description to write.')
2088             else:
2089                 try:
2090                     self.to_screen('[info] Writing video description to: ' + descfn)
2091                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2092                         descfile.write(info_dict['description'])
2093                 except (OSError, IOError):
2094                     self.report_error('Cannot write description file ' + descfn)
2095                     return
2096
2097         if self.params.get('writeannotations', False):
2098             annofn = self.prepare_filename(info_dict, 'annotation')
2099             if not ensure_dir_exists(encodeFilename(annofn)):
2100                 return
2101             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2102                 self.to_screen('[info] Video annotations are already present')
2103             elif not info_dict.get('annotations'):
2104                 self.report_warning('There are no annotations to write.')
2105             else:
2106                 try:
2107                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2108                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2109                         annofile.write(info_dict['annotations'])
2110                 except (KeyError, TypeError):
2111                     self.report_warning('There are no annotations to write.')
2112                 except (OSError, IOError):
2113                     self.report_error('Cannot write annotations file: ' + annofn)
2114                     return
2115
2116         def dl(name, info, subtitle=False):
2117             fd = get_suitable_downloader(info, self.params)(self, self.params)
2118             for ph in self._progress_hooks:
2119                 fd.add_progress_hook(ph)
2120             if self.params.get('verbose'):
2121                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2122             return fd.download(name, info, subtitle)
2123
2124         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2125                                        self.params.get('writeautomaticsub')])
2126
2127         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2128             # subtitles download errors are already managed as troubles in relevant IE
2129             # that way it will silently go on when used with unsupporting IE
2130             subtitles = info_dict['requested_subtitles']
2131             # ie = self.get_info_extractor(info_dict['extractor_key'])
2132             for sub_lang, sub_info in subtitles.items():
2133                 sub_format = sub_info['ext']
2134                 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2135                 sub_filename = subtitles_filename(
2136                     temp_filename if not skip_dl else sub_fn,
2137                     sub_lang, sub_format, info_dict.get('ext'))
2138                 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
2139                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2140                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2141                     files_to_move[sub_filename] = sub_filename_final
2142                 else:
2143                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2144                     if sub_info.get('data') is not None:
2145                         try:
2146                             # Use newline='' to prevent conversion of newline characters
2147                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2148                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2149                                 subfile.write(sub_info['data'])
2150                             files_to_move[sub_filename] = sub_filename_final
2151                         except (OSError, IOError):
2152                             self.report_error('Cannot write subtitles file ' + sub_filename)
2153                             return
2154                     else:
2155                         try:
2156                             dl(sub_filename, sub_info, subtitle=True)
2157                             '''
2158                             if self.params.get('sleep_interval_subtitles', False):
2159                                 dl(sub_filename, sub_info)
2160                             else:
2161                                 sub_data = ie._request_webpage(
2162                                     sub_info['url'], info_dict['id'], note=False).read()
2163                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2164                                     subfile.write(sub_data)
2165                             '''
2166                             files_to_move[sub_filename] = sub_filename_final
2167                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2168                             self.report_warning('Unable to download subtitle for "%s": %s' %
2169                                                 (sub_lang, error_to_compat_str(err)))
2170                             continue
2171
2172         if skip_dl:
2173             if self.params.get('convertsubtitles', False):
2174                 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2175                 filename_real_ext = os.path.splitext(full_filename)[1][1:]
2176                 filename_wo_ext = (
2177                     os.path.splitext(full_filename)[0]
2178                     if filename_real_ext == info_dict['ext']
2179                     else full_filename)
2180                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2181                 # if subconv.available:
2182                 #     info_dict['__postprocessors'].append(subconv)
2183                 if os.path.exists(encodeFilename(afilename)):
2184                     self.to_screen(
2185                         '[download] %s has already been downloaded and '
2186                         'converted' % afilename)
2187                 else:
2188                     try:
2189                         self.post_process(full_filename, info_dict, files_to_move)
2190                     except PostProcessingError as err:
2191                         self.report_error('Postprocessing: %s' % str(err))
2192                         return
2193
2194         if self.params.get('writeinfojson', False):
2195             infofn = self.prepare_filename(info_dict, 'infojson')
2196             if not ensure_dir_exists(encodeFilename(infofn)):
2197                 return
2198             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2199                 self.to_screen('[info] Video metadata is already present')
2200             else:
2201                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2202                 try:
2203                     write_json_file(self.filter_requested_info(info_dict), infofn)
2204                 except (OSError, IOError):
2205                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2206                     return
2207             info_dict['__infojson_filename'] = infofn
2208
2209         thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2210         thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2211         for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2212             thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2213             thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2214             files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
2215
2216         # Write internet shortcut files
2217         url_link = webloc_link = desktop_link = False
2218         if self.params.get('writelink', False):
2219             if sys.platform == "darwin":  # macOS.
2220                 webloc_link = True
2221             elif sys.platform.startswith("linux"):
2222                 desktop_link = True
2223             else:  # if sys.platform in ['win32', 'cygwin']:
2224                 url_link = True
2225         if self.params.get('writeurllink', False):
2226             url_link = True
2227         if self.params.get('writewebloclink', False):
2228             webloc_link = True
2229         if self.params.get('writedesktoplink', False):
2230             desktop_link = True
2231
2232         if url_link or webloc_link or desktop_link:
2233             if 'webpage_url' not in info_dict:
2234                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2235                 return
2236             ascii_url = iri_to_uri(info_dict['webpage_url'])
2237
2238         def _write_link_file(extension, template, newline, embed_filename):
2239             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2240             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2241                 self.to_screen('[info] Internet shortcut is already present')
2242             else:
2243                 try:
2244                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2245                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2246                         template_vars = {'url': ascii_url}
2247                         if embed_filename:
2248                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2249                         linkfile.write(template % template_vars)
2250                 except (OSError, IOError):
2251                     self.report_error('Cannot write internet shortcut ' + linkfn)
2252                     return False
2253             return True
2254
2255         if url_link:
2256             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2257                 return
2258         if webloc_link:
2259             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2260                 return
2261         if desktop_link:
2262             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2263                 return
2264
2265         # Download
2266         must_record_download_archive = False
2267         if not skip_dl:
2268             try:
2269
2270                 def existing_file(*filepaths):
2271                     ext = info_dict.get('ext')
2272                     final_ext = self.params.get('final_ext', ext)
2273                     existing_files = []
2274                     for file in orderedSet(filepaths):
2275                         if final_ext != ext:
2276                             converted = replace_extension(file, final_ext, ext)
2277                             if os.path.exists(encodeFilename(converted)):
2278                                 existing_files.append(converted)
2279                         if os.path.exists(encodeFilename(file)):
2280                             existing_files.append(file)
2281
2282                     if not existing_files or self.params.get('overwrites', False):
2283                         for file in orderedSet(existing_files):
2284                             self.report_file_delete(file)
2285                             os.remove(encodeFilename(file))
2286                         return None
2287
2288                     self.report_file_already_downloaded(existing_files[0])
2289                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2290                     return existing_files[0]
2291
2292                 success = True
2293                 if info_dict.get('requested_formats') is not None:
2294                     downloaded = []
2295                     merger = FFmpegMergerPP(self)
2296                     if self.params.get('allow_unplayable_formats'):
2297                         self.report_warning(
2298                             'You have requested merging of multiple formats '
2299                             'while also allowing unplayable formats to be downloaded. '
2300                             'The formats won\'t be merged to prevent data corruption.')
2301                     elif not merger.available:
2302                         self.report_warning(
2303                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2304                             'The formats won\'t be merged.')
2305
2306                     def compatible_formats(formats):
2307                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2308                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2309                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2310                         if len(video_formats) > 2 or len(audio_formats) > 2:
2311                             return False
2312
2313                         # Check extension
2314                         exts = set(format.get('ext') for format in formats)
2315                         COMPATIBLE_EXTS = (
2316                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2317                             set(('webm',)),
2318                         )
2319                         for ext_sets in COMPATIBLE_EXTS:
2320                             if ext_sets.issuperset(exts):
2321                                 return True
2322                         # TODO: Check acodec/vcodec
2323                         return False
2324
2325                     requested_formats = info_dict['requested_formats']
2326                     old_ext = info_dict['ext']
2327                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2328                         info_dict['ext'] = 'mkv'
2329                         self.report_warning(
2330                             'Requested formats are incompatible for merge and will be merged into mkv.')
2331
2332                     def correct_ext(filename):
2333                         filename_real_ext = os.path.splitext(filename)[1][1:]
2334                         filename_wo_ext = (
2335                             os.path.splitext(filename)[0]
2336                             if filename_real_ext == old_ext
2337                             else filename)
2338                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2339
2340                     # Ensure filename always has a correct extension for successful merge
2341                     full_filename = correct_ext(full_filename)
2342                     temp_filename = correct_ext(temp_filename)
2343                     dl_filename = existing_file(full_filename, temp_filename)
2344                     info_dict['__real_download'] = False
2345                     if dl_filename is None:
2346                         for f in requested_formats:
2347                             new_info = dict(info_dict)
2348                             new_info.update(f)
2349                             fname = prepend_extension(
2350                                 self.prepare_filename(new_info, 'temp'),
2351                                 'f%s' % f['format_id'], new_info['ext'])
2352                             if not ensure_dir_exists(fname):
2353                                 return
2354                             downloaded.append(fname)
2355                             partial_success, real_download = dl(fname, new_info)
2356                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2357                             success = success and partial_success
2358                         if merger.available and not self.params.get('allow_unplayable_formats'):
2359                             info_dict['__postprocessors'].append(merger)
2360                             info_dict['__files_to_merge'] = downloaded
2361                             # Even if there were no downloads, it is being merged only now
2362                             info_dict['__real_download'] = True
2363                         else:
2364                             for file in downloaded:
2365                                 files_to_move[file] = None
2366                 else:
2367                     # Just a single file
2368                     dl_filename = existing_file(full_filename, temp_filename)
2369                     if dl_filename is None:
2370                         success, real_download = dl(temp_filename, info_dict)
2371                         info_dict['__real_download'] = real_download
2372
2373                 dl_filename = dl_filename or temp_filename
2374                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2375
2376             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2377                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2378                 return
2379             except (OSError, IOError) as err:
2380                 raise UnavailableVideoError(err)
2381             except (ContentTooShortError, ) as err:
2382                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2383                 return
2384
2385             if success and full_filename != '-':
2386                 # Fixup content
2387                 fixup_policy = self.params.get('fixup')
2388                 if fixup_policy is None:
2389                     fixup_policy = 'detect_or_warn'
2390
2391                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2392
2393                 stretched_ratio = info_dict.get('stretched_ratio')
2394                 if stretched_ratio is not None and stretched_ratio != 1:
2395                     if fixup_policy == 'warn':
2396                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2397                             info_dict['id'], stretched_ratio))
2398                     elif fixup_policy == 'detect_or_warn':
2399                         stretched_pp = FFmpegFixupStretchedPP(self)
2400                         if stretched_pp.available:
2401                             info_dict['__postprocessors'].append(stretched_pp)
2402                         else:
2403                             self.report_warning(
2404                                 '%s: Non-uniform pixel ratio (%s). %s'
2405                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2406                     else:
2407                         assert fixup_policy in ('ignore', 'never')
2408
2409                 if (info_dict.get('requested_formats') is None
2410                         and info_dict.get('container') == 'm4a_dash'
2411                         and info_dict.get('ext') == 'm4a'):
2412                     if fixup_policy == 'warn':
2413                         self.report_warning(
2414                             '%s: writing DASH m4a. '
2415                             'Only some players support this container.'
2416                             % info_dict['id'])
2417                     elif fixup_policy == 'detect_or_warn':
2418                         fixup_pp = FFmpegFixupM4aPP(self)
2419                         if fixup_pp.available:
2420                             info_dict['__postprocessors'].append(fixup_pp)
2421                         else:
2422                             self.report_warning(
2423                                 '%s: writing DASH m4a. '
2424                                 'Only some players support this container. %s'
2425                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2426                     else:
2427                         assert fixup_policy in ('ignore', 'never')
2428
2429                 if (info_dict.get('protocol') == 'm3u8_native'
2430                         or info_dict.get('protocol') == 'm3u8'
2431                         and self.params.get('hls_prefer_native')):
2432                     if fixup_policy == 'warn':
2433                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2434                             info_dict['id']))
2435                     elif fixup_policy == 'detect_or_warn':
2436                         fixup_pp = FFmpegFixupM3u8PP(self)
2437                         if fixup_pp.available:
2438                             info_dict['__postprocessors'].append(fixup_pp)
2439                         else:
2440                             self.report_warning(
2441                                 '%s: malformed AAC bitstream detected. %s'
2442                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2443                     else:
2444                         assert fixup_policy in ('ignore', 'never')
2445
2446                 try:
2447                     self.post_process(dl_filename, info_dict, files_to_move)
2448                 except PostProcessingError as err:
2449                     self.report_error('Postprocessing: %s' % str(err))
2450                     return
2451                 try:
2452                     for ph in self._post_hooks:
2453                         ph(full_filename)
2454                 except Exception as err:
2455                     self.report_error('post hooks: %s' % str(err))
2456                     return
2457                 must_record_download_archive = True
2458
2459         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2460             self.record_download_archive(info_dict)
2461         max_downloads = self.params.get('max_downloads')
2462         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2463             raise MaxDownloadsReached()
2464
2465     def download(self, url_list):
2466         """Download a given list of URLs."""
2467         outtmpl = self.outtmpl_dict['default']
2468         if (len(url_list) > 1
2469                 and outtmpl != '-'
2470                 and '%' not in outtmpl
2471                 and self.params.get('max_downloads') != 1):
2472             raise SameFileError(outtmpl)
2473
2474         for url in url_list:
2475             try:
2476                 # It also downloads the videos
2477                 res = self.extract_info(
2478                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2479             except UnavailableVideoError:
2480                 self.report_error('unable to download video')
2481             except MaxDownloadsReached:
2482                 self.to_screen('[info] Maximum number of downloaded files reached')
2483                 raise
2484             except ExistingVideoReached:
2485                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2486                 raise
2487             except RejectedVideoReached:
2488                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2489                 raise
2490             else:
2491                 if self.params.get('dump_single_json', False):
2492                     self.to_stdout(json.dumps(res))
2493
2494         return self._download_retcode
2495
2496     def download_with_info_file(self, info_filename):
2497         with contextlib.closing(fileinput.FileInput(
2498                 [info_filename], mode='r',
2499                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2500             # FileInput doesn't have a read method, we can't call json.load
2501             info = self.filter_requested_info(json.loads('\n'.join(f)))
2502         try:
2503             self.process_ie_result(info, download=True)
2504         except DownloadError:
2505             webpage_url = info.get('webpage_url')
2506             if webpage_url is not None:
2507                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2508                 return self.download([webpage_url])
2509             else:
2510                 raise
2511         return self._download_retcode
2512
2513     @staticmethod
2514     def filter_requested_info(info_dict):
2515         fields_to_remove = ('requested_formats', 'requested_subtitles')
2516         return dict(
2517             (k, v) for k, v in info_dict.items()
2518             if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
2519
2520     def run_pp(self, pp, infodict, files_to_move={}):
2521         files_to_delete = []
2522         files_to_delete, infodict = pp.run(infodict)
2523         if not files_to_delete:
2524             return files_to_move, infodict
2525
2526         if self.params.get('keepvideo', False):
2527             for f in files_to_delete:
2528                 files_to_move.setdefault(f, '')
2529         else:
2530             for old_filename in set(files_to_delete):
2531                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2532                 try:
2533                     os.remove(encodeFilename(old_filename))
2534                 except (IOError, OSError):
2535                     self.report_warning('Unable to remove downloaded original file')
2536                 if old_filename in files_to_move:
2537                     del files_to_move[old_filename]
2538         return files_to_move, infodict
2539
2540     def pre_process(self, ie_info):
2541         info = dict(ie_info)
2542         for pp in self._pps['beforedl']:
2543             info = self.run_pp(pp, info)[1]
2544         return info
2545
2546     def post_process(self, filename, ie_info, files_to_move={}):
2547         """Run all the postprocessors on the given file."""
2548         info = dict(ie_info)
2549         info['filepath'] = filename
2550         info['__files_to_move'] = {}
2551
2552         for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2553             files_to_move, info = self.run_pp(pp, info, files_to_move)
2554         info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
2555         for pp in self._pps['aftermove']:
2556             info = self.run_pp(pp, info, {})[1]
2557
2558     def _make_archive_id(self, info_dict):
2559         video_id = info_dict.get('id')
2560         if not video_id:
2561             return
2562         # Future-proof against any change in case
2563         # and backwards compatibility with prior versions
2564         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2565         if extractor is None:
2566             url = str_or_none(info_dict.get('url'))
2567             if not url:
2568                 return
2569             # Try to find matching extractor for the URL and take its ie_key
2570             for ie in self._ies:
2571                 if ie.suitable(url):
2572                     extractor = ie.ie_key()
2573                     break
2574             else:
2575                 return
2576         return '%s %s' % (extractor.lower(), video_id)
2577
2578     def in_download_archive(self, info_dict):
2579         fn = self.params.get('download_archive')
2580         if fn is None:
2581             return False
2582
2583         vid_id = self._make_archive_id(info_dict)
2584         if not vid_id:
2585             return False  # Incomplete video information
2586
2587         return vid_id in self.archive
2588
2589     def record_download_archive(self, info_dict):
2590         fn = self.params.get('download_archive')
2591         if fn is None:
2592             return
2593         vid_id = self._make_archive_id(info_dict)
2594         assert vid_id
2595         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2596             archive_file.write(vid_id + '\n')
2597         self.archive.add(vid_id)
2598
2599     @staticmethod
2600     def format_resolution(format, default='unknown'):
2601         if format.get('vcodec') == 'none':
2602             return 'audio only'
2603         if format.get('resolution') is not None:
2604             return format['resolution']
2605         if format.get('height') is not None:
2606             if format.get('width') is not None:
2607                 res = '%sx%s' % (format['width'], format['height'])
2608             else:
2609                 res = '%sp' % format['height']
2610         elif format.get('width') is not None:
2611             res = '%dx?' % format['width']
2612         else:
2613             res = default
2614         return res
2615
2616     def _format_note(self, fdict):
2617         res = ''
2618         if fdict.get('ext') in ['f4f', 'f4m']:
2619             res += '(unsupported) '
2620         if fdict.get('language'):
2621             if res:
2622                 res += ' '
2623             res += '[%s] ' % fdict['language']
2624         if fdict.get('format_note') is not None:
2625             res += fdict['format_note'] + ' '
2626         if fdict.get('tbr') is not None:
2627             res += '%4dk ' % fdict['tbr']
2628         if fdict.get('container') is not None:
2629             if res:
2630                 res += ', '
2631             res += '%s container' % fdict['container']
2632         if (fdict.get('vcodec') is not None
2633                 and fdict.get('vcodec') != 'none'):
2634             if res:
2635                 res += ', '
2636             res += fdict['vcodec']
2637             if fdict.get('vbr') is not None:
2638                 res += '@'
2639         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2640             res += 'video@'
2641         if fdict.get('vbr') is not None:
2642             res += '%4dk' % fdict['vbr']
2643         if fdict.get('fps') is not None:
2644             if res:
2645                 res += ', '
2646             res += '%sfps' % fdict['fps']
2647         if fdict.get('acodec') is not None:
2648             if res:
2649                 res += ', '
2650             if fdict['acodec'] == 'none':
2651                 res += 'video only'
2652             else:
2653                 res += '%-5s' % fdict['acodec']
2654         elif fdict.get('abr') is not None:
2655             if res:
2656                 res += ', '
2657             res += 'audio'
2658         if fdict.get('abr') is not None:
2659             res += '@%3dk' % fdict['abr']
2660         if fdict.get('asr') is not None:
2661             res += ' (%5dHz)' % fdict['asr']
2662         if fdict.get('filesize') is not None:
2663             if res:
2664                 res += ', '
2665             res += format_bytes(fdict['filesize'])
2666         elif fdict.get('filesize_approx') is not None:
2667             if res:
2668                 res += ', '
2669             res += '~' + format_bytes(fdict['filesize_approx'])
2670         return res
2671
2672     def _format_note_table(self, f):
2673         def join_fields(*vargs):
2674             return ', '.join((val for val in vargs if val != ''))
2675
2676         return join_fields(
2677             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2678             format_field(f, 'language', '[%s]'),
2679             format_field(f, 'format_note'),
2680             format_field(f, 'container', ignore=(None, f.get('ext'))),
2681             format_field(f, 'asr', '%5dHz'))
2682
2683     def list_formats(self, info_dict):
2684         formats = info_dict.get('formats', [info_dict])
2685         new_format = self.params.get('listformats_table', False)
2686         if new_format:
2687             table = [
2688                 [
2689                     format_field(f, 'format_id'),
2690                     format_field(f, 'ext'),
2691                     self.format_resolution(f),
2692                     format_field(f, 'fps', '%d'),
2693                     '|',
2694                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2695                     format_field(f, 'tbr', '%4dk'),
2696                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
2697                     '|',
2698                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2699                     format_field(f, 'vbr', '%4dk'),
2700                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2701                     format_field(f, 'abr', '%3dk'),
2702                     format_field(f, 'asr', '%5dHz'),
2703                     self._format_note_table(f)]
2704                 for f in formats
2705                 if f.get('preference') is None or f['preference'] >= -1000]
2706             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2707                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2708         else:
2709             table = [
2710                 [
2711                     format_field(f, 'format_id'),
2712                     format_field(f, 'ext'),
2713                     self.format_resolution(f),
2714                     self._format_note(f)]
2715                 for f in formats
2716                 if f.get('preference') is None or f['preference'] >= -1000]
2717             header_line = ['format code', 'extension', 'resolution', 'note']
2718
2719         self.to_screen(
2720             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2721                 header_line,
2722                 table,
2723                 delim=new_format,
2724                 extraGap=(0 if new_format else 1),
2725                 hideEmpty=new_format)))
2726
2727     def list_thumbnails(self, info_dict):
2728         thumbnails = info_dict.get('thumbnails')
2729         if not thumbnails:
2730             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2731             return
2732
2733         self.to_screen(
2734             '[info] Thumbnails for %s:' % info_dict['id'])
2735         self.to_screen(render_table(
2736             ['ID', 'width', 'height', 'URL'],
2737             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2738
2739     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2740         if not subtitles:
2741             self.to_screen('%s has no %s' % (video_id, name))
2742             return
2743         self.to_screen(
2744             'Available %s for %s:' % (name, video_id))
2745         self.to_screen(render_table(
2746             ['Language', 'formats'],
2747             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2748                 for lang, formats in subtitles.items()]))
2749
2750     def urlopen(self, req):
2751         """ Start an HTTP download """
2752         if isinstance(req, compat_basestring):
2753             req = sanitized_Request(req)
2754         return self._opener.open(req, timeout=self._socket_timeout)
2755
2756     def print_debug_header(self):
2757         if not self.params.get('verbose'):
2758             return
2759
2760         if type('') is not compat_str:
2761             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2762             self.report_warning(
2763                 'Your Python is broken! Update to a newer and supported version')
2764
2765         stdout_encoding = getattr(
2766             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2767         encoding_str = (
2768             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2769                 locale.getpreferredencoding(),
2770                 sys.getfilesystemencoding(),
2771                 stdout_encoding,
2772                 self.get_encoding()))
2773         write_string(encoding_str, encoding=None)
2774
2775         source = (
2776             '(exe)' if hasattr(sys, 'frozen')
2777             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2778             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2779             else '')
2780         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
2781         if _LAZY_LOADER:
2782             self._write_string('[debug] Lazy loading extractors enabled\n')
2783         if _PLUGIN_CLASSES:
2784             self._write_string(
2785                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2786         try:
2787             sp = subprocess.Popen(
2788                 ['git', 'rev-parse', '--short', 'HEAD'],
2789                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2790                 cwd=os.path.dirname(os.path.abspath(__file__)))
2791             out, err = process_communicate_or_kill(sp)
2792             out = out.decode().strip()
2793             if re.match('[0-9a-f]+', out):
2794                 self._write_string('[debug] Git HEAD: %s\n' % out)
2795         except Exception:
2796             try:
2797                 sys.exc_clear()
2798             except Exception:
2799                 pass
2800
2801         def python_implementation():
2802             impl_name = platform.python_implementation()
2803             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2804                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2805             return impl_name
2806
2807         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2808             platform.python_version(),
2809             python_implementation(),
2810             platform.architecture()[0],
2811             platform_name()))
2812
2813         exe_versions = FFmpegPostProcessor.get_versions(self)
2814         exe_versions['rtmpdump'] = rtmpdump_version()
2815         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2816         exe_str = ', '.join(
2817             '%s %s' % (exe, v)
2818             for exe, v in sorted(exe_versions.items())
2819             if v
2820         )
2821         if not exe_str:
2822             exe_str = 'none'
2823         self._write_string('[debug] exe versions: %s\n' % exe_str)
2824
2825         proxy_map = {}
2826         for handler in self._opener.handlers:
2827             if hasattr(handler, 'proxies'):
2828                 proxy_map.update(handler.proxies)
2829         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2830
2831         if self.params.get('call_home', False):
2832             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2833             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2834             return
2835             latest_version = self.urlopen(
2836                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2837             if version_tuple(latest_version) > version_tuple(__version__):
2838                 self.report_warning(
2839                     'You are using an outdated version (newest version: %s)! '
2840                     'See https://yt-dl.org/update if you need help updating.' %
2841                     latest_version)
2842
2843     def _setup_opener(self):
2844         timeout_val = self.params.get('socket_timeout')
2845         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2846
2847         opts_cookiefile = self.params.get('cookiefile')
2848         opts_proxy = self.params.get('proxy')
2849
2850         if opts_cookiefile is None:
2851             self.cookiejar = compat_cookiejar.CookieJar()
2852         else:
2853             opts_cookiefile = expand_path(opts_cookiefile)
2854             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2855             if os.access(opts_cookiefile, os.R_OK):
2856                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2857
2858         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2859         if opts_proxy is not None:
2860             if opts_proxy == '':
2861                 proxies = {}
2862             else:
2863                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2864         else:
2865             proxies = compat_urllib_request.getproxies()
2866             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2867             if 'http' in proxies and 'https' not in proxies:
2868                 proxies['https'] = proxies['http']
2869         proxy_handler = PerRequestProxyHandler(proxies)
2870
2871         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2872         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2873         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2874         redirect_handler = YoutubeDLRedirectHandler()
2875         data_handler = compat_urllib_request_DataHandler()
2876
2877         # When passing our own FileHandler instance, build_opener won't add the
2878         # default FileHandler and allows us to disable the file protocol, which
2879         # can be used for malicious purposes (see
2880         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2881         file_handler = compat_urllib_request.FileHandler()
2882
2883         def file_open(*args, **kwargs):
2884             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2885         file_handler.file_open = file_open
2886
2887         opener = compat_urllib_request.build_opener(
2888             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2889
2890         # Delete the default user-agent header, which would otherwise apply in
2891         # cases where our custom HTTP handler doesn't come into play
2892         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2893         opener.addheaders = []
2894         self._opener = opener
2895
2896     def encode(self, s):
2897         if isinstance(s, bytes):
2898             return s  # Already encoded
2899
2900         try:
2901             return s.encode(self.get_encoding())
2902         except UnicodeEncodeError as err:
2903             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2904             raise
2905
2906     def get_encoding(self):
2907         encoding = self.params.get('encoding')
2908         if encoding is None:
2909             encoding = preferredencoding()
2910         return encoding
2911
2912     def _write_thumbnails(self, info_dict, filename):  # return the extensions
2913         write_all = self.params.get('write_all_thumbnails', False)
2914         thumbnails = []
2915         if write_all or self.params.get('writethumbnail', False):
2916             thumbnails = info_dict.get('thumbnails') or []
2917         multiple = write_all and len(thumbnails) > 1
2918
2919         ret = []
2920         for t in thumbnails[::1 if write_all else -1]:
2921             thumb_ext = determine_ext(t['url'], 'jpg')
2922             suffix = '%s.' % t['id'] if multiple else ''
2923             thumb_display_id = '%s ' % t['id'] if multiple else ''
2924             t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
2925
2926             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2927                 ret.append(suffix + thumb_ext)
2928                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2929                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2930             else:
2931                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2932                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2933                 try:
2934                     uf = self.urlopen(t['url'])
2935                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2936                         shutil.copyfileobj(uf, thumbf)
2937                     ret.append(suffix + thumb_ext)
2938                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2939                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2940                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2941                     self.report_warning('Unable to download thumbnail "%s": %s' %
2942                                         (t['url'], error_to_compat_str(err)))
2943             if ret and not write_all:
2944                 break
2945         return ret