youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_http_client,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     OUTTMPL_TYPES,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     int_or_none,
  72     iri_to_uri,
  73     ISO3166Utils,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     orderedSet,
  79     PagedList,
  80     parse_filesize,
  81     PerRequestProxyHandler,
  82     platform_name,
  83     PostProcessingError,
  84     preferredencoding,
  85     prepend_extension,
  86     register_socks_protocols,
  87     render_table,
  88     replace_extension,
  89     RejectedVideoReached,
  90     SameFileError,
  91     sanitize_filename,
  92     sanitize_path,
  93     sanitize_url,
  94     sanitized_Request,
  95     std_headers,
  96     str_or_none,
  97     strftime_or_none,
  98     subtitles_filename,
  99     to_high_limit_path,
 100     UnavailableVideoError,
 101     url_basename,
 102     version_tuple,
 103     write_json_file,
 104     write_string,
 105     YoutubeDLCookieJar,
 106     YoutubeDLCookieProcessor,
 107     YoutubeDLHandler,
 108     YoutubeDLRedirectHandler,
 109     process_communicate_or_kill,
 110 )
 111 from .cache import Cache
 112 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
 113 from .extractor.openload import PhantomJSwrapper
 114 from .downloader import get_suitable_downloader
 115 from .downloader.rtmp import rtmpdump_version
 116 from .postprocessor import (
 117     FFmpegFixupM3u8PP,
 118     FFmpegFixupM4aPP,
 119     FFmpegFixupStretchedPP,
 120     FFmpegMergerPP,
 121     FFmpegPostProcessor,
 122     # FFmpegSubtitlesConvertorPP,
 123     get_postprocessor,
 124     MoveFilesAfterDownloadPP,
 125 )
 126 from .version import __version__
 127
 128 if compat_os_name == 'nt':
 129     import ctypes
 130
 131
 132 class YoutubeDL(object):
 133     """YoutubeDL class.
 134
 135     YoutubeDL objects are the ones responsible of downloading the
 136     actual video file and writing it to disk if the user has requested
 137     it, among some other tasks. In most cases there should be one per
 138     program. As, given a video URL, the downloader doesn't know how to
 139     extract all the needed information, task that InfoExtractors do, it
 140     has to pass the URL to one of them.
 141
 142     For this, YoutubeDL objects have a method that allows
 143     InfoExtractors to be registered in a given order. When it is passed
 144     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 145     finds that reports being able to handle it. The InfoExtractor extracts
 146     all the information about the video or videos the URL refers to, and
 147     YoutubeDL process the extracted information, possibly using a File
 148     Downloader to download the video.
 149
 150     YoutubeDL objects accept a lot of parameters. In order not to saturate
 151     the object constructor with arguments, it receives a dictionary of
 152     options instead. These options are available through the params
 153     attribute for the InfoExtractors to use. The YoutubeDL also
 154     registers itself as the downloader in charge for the InfoExtractors
 155     that are added to it, so this is a "mutual registration".
 156
 157     Available options:
 158
 159     username:          Username for authentication purposes.
 160     password:          Password for authentication purposes.
 161     videopassword:     Password for accessing a video.
 162     ap_mso:            Adobe Pass multiple-system operator identifier.
 163     ap_username:       Multiple-system operator account username.
 164     ap_password:       Multiple-system operator account password.
 165     usenetrc:          Use netrc for authentication instead.
 166     verbose:           Print additional info to stdout.
 167     quiet:             Do not print messages to stdout.
 168     no_warnings:       Do not print out anything for warnings.
 169     forceurl:          Force printing final URL.
 170     forcetitle:        Force printing title.
 171     forceid:           Force printing ID.
 172     forcethumbnail:    Force printing thumbnail URL.
 173     forcedescription:  Force printing description.
 174     forcefilename:     Force printing final filename.
 175     forceduration:     Force printing duration.
 176     forcejson:         Force printing info_dict as JSON.
 177     dump_single_json:  Force printing the info_dict of the whole playlist
 178                        (or video) as a single JSON line.
 179     force_write_download_archive: Force writing download archive regardless
 180                        of 'skip_download' or 'simulate'.
 181     simulate:          Do not download the video files.
 182     format:            Video format code. see "FORMAT SELECTION" for more details.
 183     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 184     format_sort:       How to sort the video formats. see "Sorting Formats"
 185                        for more details.
 186     format_sort_force: Force the given format_sort. see "Sorting Formats"
 187                        for more details.
 188     allow_multiple_video_streams:   Allow multiple video streams to be merged
 189                        into a single file
 190     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 191                        into a single file
 192     paths:             Dictionary of output paths. The allowed keys are 'home'
 193                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 194     outtmpl:           Dictionary of templates for output names. Allowed keys
 195                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 196                        A string a also accepted for backward compatibility
 197     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 198     restrictfilenames: Do not allow "&" and spaces in file names
 199     trim_file_name:    Limit length of filename (extension excluded)
 200     windowsfilenames:  Force the filenames to be windows compatible
 201     ignoreerrors:      Do not stop on download errors
 202                        (Default True when running youtube-dlc,
 203                        but False when directly accessing YoutubeDL class)
 204     force_generic_extractor: Force downloader to use the generic extractor
 205     overwrites:        Overwrite all video and metadata files if True,
 206                        overwrite only non-video files if None
 207                        and don't overwrite any file if False
 208     playliststart:     Playlist item to start at.
 209     playlistend:       Playlist item to end at.
 210     playlist_items:    Specific indices of playlist to download.
 211     playlistreverse:   Download playlist items in reverse order.
 212     playlistrandom:    Download playlist items in random order.
 213     matchtitle:        Download only matching titles.
 214     rejecttitle:       Reject downloads for matching titles.
 215     logger:            Log messages to a logging.Logger instance.
 216     logtostderr:       Log messages to stderr instead of stdout.
 217     writedescription:  Write the video description to a .description file
 218     writeinfojson:     Write the video description to a .info.json file
 219     writecomments:     Extract video comments. This will not be written to disk
 220                        unless writeinfojson is also given
 221     writeannotations:  Write the video annotations to a .annotations.xml file
 222     writethumbnail:    Write the thumbnail image to a file
 223     allow_playlist_files: Whether to write playlists' description, infojson etc
 224                        also to disk when using the 'write*' options
 225     write_all_thumbnails:  Write all thumbnail formats to files
 226     writelink:         Write an internet shortcut file, depending on the
 227                        current platform (.url/.webloc/.desktop)
 228     writeurllink:      Write a Windows internet shortcut file (.url)
 229     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 230     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 231     writesubtitles:    Write the video subtitles to a file
 232     writeautomaticsub: Write the automatically generated subtitles to a file
 233     allsubtitles:      Downloads all the subtitles of the video
 234                        (requires writesubtitles or writeautomaticsub)
 235     listsubtitles:     Lists all available subtitles for the video
 236     subtitlesformat:   The format code for subtitles
 237     subtitleslangs:    List of languages of the subtitles to download
 238     keepvideo:         Keep the video file after post-processing
 239     daterange:         A DateRange object, download only if the upload_date is in the range.
 240     skip_download:     Skip the actual download of the video file
 241     cachedir:          Location of the cache files in the filesystem.
 242                        False to disable filesystem cache.
 243     noplaylist:        Download single video instead of a playlist if in doubt.
 244     age_limit:         An integer representing the user's age in years.
 245                        Unsuitable videos for the given age are skipped.
 246     min_views:         An integer representing the minimum view count the video
 247                        must have in order to not be skipped.
 248                        Videos without view count information are always
 249                        downloaded. None for no limit.
 250     max_views:         An integer representing the maximum view count.
 251                        Videos that are more popular than that are not
 252                        downloaded.
 253                        Videos without view count information are always
 254                        downloaded. None for no limit.
 255     download_archive:  File name of a file where all downloads are recorded.
 256                        Videos already present in the file are not downloaded
 257                        again.
 258     break_on_existing: Stop the download process after attempting to download a
 259                        file that is in the archive.
 260     break_on_reject:   Stop the download process when encountering a video that
 261                        has been filtered out.
 262     cookiefile:        File name where cookies should be read from and dumped to
 263     nocheckcertificate:Do not verify SSL certificates
 264     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 265                        At the moment, this is only supported by YouTube.
 266     proxy:             URL of the proxy server to use
 267     geo_verification_proxy:  URL of the proxy to use for IP address verification
 268                        on geo-restricted sites.
 269     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 270     bidi_workaround:   Work around buggy terminals without bidirectional text
 271                        support, using fridibi
 272     debug_printtraffic:Print out sent and received HTTP traffic
 273     include_ads:       Download ads as well
 274     default_search:    Prepend this string if an input url is not valid.
 275                        'auto' for elaborate guessing
 276     encoding:          Use this encoding instead of the system-specified.
 277     extract_flat:      Do not resolve URLs, return the immediate result.
 278                        Pass in 'in_playlist' to only show this behavior for
 279                        playlist items.
 280     postprocessors:    A list of dictionaries, each with an entry
 281                        * key:  The name of the postprocessor. See
 282                                youtube_dlc/postprocessor/__init__.py for a list.
 283                        * _after_move: Optional. If True, run this post_processor
 284                                after 'MoveFilesAfterDownload'
 285                        as well as any further keyword arguments for the
 286                        postprocessor.
 287     post_hooks:        A list of functions that get called as the final step
 288                        for each video file, after all postprocessors have been
 289                        called. The filename will be passed as the only argument.
 290     progress_hooks:    A list of functions that get called on download
 291                        progress, with a dictionary with the entries
 292                        * status: One of "downloading", "error", or "finished".
 293                                  Check this first and ignore unknown values.
 294
 295                        If status is one of "downloading", or "finished", the
 296                        following properties may also be present:
 297                        * filename: The final filename (always present)
 298                        * tmpfilename: The filename we're currently writing to
 299                        * downloaded_bytes: Bytes on disk
 300                        * total_bytes: Size of the whole file, None if unknown
 301                        * total_bytes_estimate: Guess of the eventual file size,
 302                                                None if unavailable.
 303                        * elapsed: The number of seconds since download started.
 304                        * eta: The estimated time in seconds, None if unknown
 305                        * speed: The download speed in bytes/second, None if
 306                                 unknown
 307                        * fragment_index: The counter of the currently
 308                                          downloaded video fragment.
 309                        * fragment_count: The number of fragments (= individual
 310                                          files that will be merged)
 311
 312                        Progress hooks are guaranteed to be called at least once
 313                        (with status "finished") if the download is successful.
 314     merge_output_format: Extension to use when merging formats.
 315     final_ext:         Expected final extension; used to detect when the file was
 316                        already downloaded and converted. "merge_output_format" is
 317                        replaced by this extension when given
 318     fixup:             Automatically correct known faults of the file.
 319                        One of:
 320                        - "never": do nothing
 321                        - "warn": only emit a warning
 322                        - "detect_or_warn": check whether we can do anything
 323                                            about it, warn otherwise (default)
 324     source_address:    Client-side IP address to bind to.
 325     call_home:         Boolean, true iff we are allowed to contact the
 326                        youtube-dlc servers for debugging.
 327     sleep_interval:    Number of seconds to sleep before each download when
 328                        used alone or a lower bound of a range for randomized
 329                        sleep before each download (minimum possible number
 330                        of seconds to sleep) when used along with
 331                        max_sleep_interval.
 332     max_sleep_interval:Upper bound of a range for randomized sleep before each
 333                        download (maximum possible number of seconds to sleep).
 334                        Must only be used along with sleep_interval.
 335                        Actual sleep time will be a random float from range
 336                        [sleep_interval; max_sleep_interval].
 337     listformats:       Print an overview of available video formats and exit.
 338     list_thumbnails:   Print a table of all thumbnails and exit.
 339     match_filter:      A function that gets called with the info_dict of
 340                        every video.
 341                        If it returns a message, the video is ignored.
 342                        If it returns None, the video is downloaded.
 343                        match_filter_func in utils.py is one example for this.
 344     no_color:          Do not emit color codes in output.
 345     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 346                        HTTP header
 347     geo_bypass_country:
 348                        Two-letter ISO 3166-2 country code that will be used for
 349                        explicit geographic restriction bypassing via faking
 350                        X-Forwarded-For HTTP header
 351     geo_bypass_ip_block:
 352                        IP range in CIDR notation that will be used similarly to
 353                        geo_bypass_country
 354
 355     The following options determine which downloader is picked:
 356     external_downloader: Executable of the external downloader to call.
 357                        None or unset for standard (built-in) downloader.
 358     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 359                        if True, otherwise use ffmpeg/avconv if False, otherwise
 360                        use downloader suggested by extractor if None.
 361
 362     The following parameters are not used by YoutubeDL itself, they are used by
 363     the downloader (see youtube_dlc/downloader/common.py):
 364     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 365     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 366     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 367     http_chunk_size.
 368
 369     The following options are used by the post processors:
 370     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 371                        otherwise prefer ffmpeg. (avconv support is deprecated)
 372     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 373                        to the binary or its containing directory.
 374     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 375                         and a list of additional command-line arguments for the
 376                         postprocessor/executable. The dict can also have "PP+EXE" keys
 377                         which are used when the given exe is used by the given PP.
 378                         Use 'default' as the name for arguments to passed to all PP
 379     The following options are used by the Youtube extractor:
 380     youtube_include_dash_manifest: If True (default), DASH manifests and related
 381                         data will be downloaded and processed by extractor.
 382                         You can reduce network I/O by disabling it if you don't
 383                         care about DASH.
 384     """
 385
 386     _NUMERIC_FIELDS = set((
 387         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 388         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 389         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 390         'average_rating', 'comment_count', 'age_limit',
 391         'start_time', 'end_time',
 392         'chapter_number', 'season_number', 'episode_number',
 393         'track_number', 'disc_number', 'release_year',
 394         'playlist_index',
 395     ))
 396
 397     params = None
 398     _ies = []
 399     _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 400     __prepare_filename_warned = False
 401     _download_retcode = None
 402     _num_downloads = None
 403     _playlist_level = 0
 404     _playlist_urls = set()
 405     _screen_file = None
 406
 407     def __init__(self, params=None, auto_init=True):
 408         """Create a FileDownloader object with the given options."""
 409         if params is None:
 410             params = {}
 411         self._ies = []
 412         self._ies_instances = {}
 413         self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 414         self.__prepare_filename_warned = False
 415         self._post_hooks = []
 416         self._progress_hooks = []
 417         self._download_retcode = 0
 418         self._num_downloads = 0
 419         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 420         self._err_file = sys.stderr
 421         self.params = {
 422             # Default parameters
 423             'nocheckcertificate': False,
 424         }
 425         self.params.update(params)
 426         self.cache = Cache(self)
 427         self.archive = set()
 428
 429         """Preload the archive, if any is specified"""
 430         def preload_download_archive(self):
 431             fn = self.params.get('download_archive')
 432             if fn is None:
 433                 return False
 434             try:
 435                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 436                     for line in archive_file:
 437                         self.archive.add(line.strip())
 438             except IOError as ioe:
 439                 if ioe.errno != errno.ENOENT:
 440                     raise
 441                 return False
 442             return True
 443
 444         def check_deprecated(param, option, suggestion):
 445             if self.params.get(param) is not None:
 446                 self.report_warning(
 447                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 448                 return True
 449             return False
 450
 451         if self.params.get('verbose'):
 452             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 453
 454         preload_download_archive(self)
 455
 456         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 457             if self.params.get('geo_verification_proxy') is None:
 458                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 459
 460         if self.params.get('final_ext'):
 461             if self.params.get('merge_output_format'):
 462                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 463             self.params['merge_output_format'] = self.params['final_ext']
 464
 465         if 'overwrites' in self.params and self.params['overwrites'] is None:
 466             del self.params['overwrites']
 467
 468         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 469         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 470         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 471
 472         if params.get('bidi_workaround', False):
 473             try:
 474                 import pty
 475                 master, slave = pty.openpty()
 476                 width = compat_get_terminal_size().columns
 477                 if width is None:
 478                     width_args = []
 479                 else:
 480                     width_args = ['-w', str(width)]
 481                 sp_kwargs = dict(
 482                     stdin=subprocess.PIPE,
 483                     stdout=slave,
 484                     stderr=self._err_file)
 485                 try:
 486                     self._output_process = subprocess.Popen(
 487                         ['bidiv'] + width_args, **sp_kwargs
 488                     )
 489                 except OSError:
 490                     self._output_process = subprocess.Popen(
 491                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 492                 self._output_channel = os.fdopen(master, 'rb')
 493             except OSError as ose:
 494                 if ose.errno == errno.ENOENT:
 495                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 496                 else:
 497                     raise
 498
 499         if (sys.platform != 'win32'
 500                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 501                 and not params.get('restrictfilenames', False)):
 502             # Unicode filesystem API will throw errors (#1474, #13027)
 503             self.report_warning(
 504                 'Assuming --restrict-filenames since file system encoding '
 505                 'cannot encode all characters. '
 506                 'Set the LC_ALL environment variable to fix this.')
 507             self.params['restrictfilenames'] = True
 508
 509         self.outtmpl_dict = self.parse_outtmpl()
 510
 511         self._setup_opener()
 512
 513         if auto_init:
 514             self.print_debug_header()
 515             self.add_default_info_extractors()
 516
 517         for pp_def_raw in self.params.get('postprocessors', []):
 518             pp_class = get_postprocessor(pp_def_raw['key'])
 519             pp_def = dict(pp_def_raw)
 520             del pp_def['key']
 521             if 'when' in pp_def:
 522                 when = pp_def['when']
 523                 del pp_def['when']
 524             else:
 525                 when = 'normal'
 526             pp = pp_class(self, **compat_kwargs(pp_def))
 527             self.add_post_processor(pp, when=when)
 528
 529         for ph in self.params.get('post_hooks', []):
 530             self.add_post_hook(ph)
 531
 532         for ph in self.params.get('progress_hooks', []):
 533             self.add_progress_hook(ph)
 534
 535         register_socks_protocols()
 536
 537     def warn_if_short_id(self, argv):
 538         # short YouTube ID starting with dash?
 539         idxs = [
 540             i for i, a in enumerate(argv)
 541             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 542         if idxs:
 543             correct_argv = (
 544                 ['youtube-dlc']
 545                 + [a for i, a in enumerate(argv) if i not in idxs]
 546                 + ['--'] + [argv[i] for i in idxs]
 547             )
 548             self.report_warning(
 549                 'Long argument string detected. '
 550                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 551                 args_to_str(correct_argv))
 552
 553     def add_info_extractor(self, ie):
 554         """Add an InfoExtractor object to the end of the list."""
 555         self._ies.append(ie)
 556         if not isinstance(ie, type):
 557             self._ies_instances[ie.ie_key()] = ie
 558             ie.set_downloader(self)
 559
 560     def get_info_extractor(self, ie_key):
 561         """
 562         Get an instance of an IE with name ie_key, it will try to get one from
 563         the _ies list, if there's no instance it will create a new one and add
 564         it to the extractor list.
 565         """
 566         ie = self._ies_instances.get(ie_key)
 567         if ie is None:
 568             ie = get_info_extractor(ie_key)()
 569             self.add_info_extractor(ie)
 570         return ie
 571
 572     def add_default_info_extractors(self):
 573         """
 574         Add the InfoExtractors returned by gen_extractors to the end of the list
 575         """
 576         for ie in gen_extractor_classes():
 577             self.add_info_extractor(ie)
 578
 579     def add_post_processor(self, pp, when='normal'):
 580         """Add a PostProcessor object to the end of the chain."""
 581         self._pps[when].append(pp)
 582         pp.set_downloader(self)
 583
 584     def add_post_hook(self, ph):
 585         """Add the post hook"""
 586         self._post_hooks.append(ph)
 587
 588     def add_progress_hook(self, ph):
 589         """Add the progress hook (currently only for the file downloader)"""
 590         self._progress_hooks.append(ph)
 591
 592     def _bidi_workaround(self, message):
 593         if not hasattr(self, '_output_channel'):
 594             return message
 595
 596         assert hasattr(self, '_output_process')
 597         assert isinstance(message, compat_str)
 598         line_count = message.count('\n') + 1
 599         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 600         self._output_process.stdin.flush()
 601         res = ''.join(self._output_channel.readline().decode('utf-8')
 602                       for _ in range(line_count))
 603         return res[:-len('\n')]
 604
 605     def to_screen(self, message, skip_eol=False):
 606         """Print message to stdout if not in quiet mode."""
 607         return self.to_stdout(message, skip_eol, check_quiet=True)
 608
 609     def _write_string(self, s, out=None):
 610         write_string(s, out=out, encoding=self.params.get('encoding'))
 611
 612     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 613         """Print message to stdout if not in quiet mode."""
 614         if self.params.get('logger'):
 615             self.params['logger'].debug(message)
 616         elif not check_quiet or not self.params.get('quiet', False):
 617             message = self._bidi_workaround(message)
 618             terminator = ['\n', ''][skip_eol]
 619             output = message + terminator
 620
 621             self._write_string(output, self._screen_file)
 622
 623     def to_stderr(self, message):
 624         """Print message to stderr."""
 625         assert isinstance(message, compat_str)
 626         if self.params.get('logger'):
 627             self.params['logger'].error(message)
 628         else:
 629             message = self._bidi_workaround(message)
 630             output = message + '\n'
 631             self._write_string(output, self._err_file)
 632
 633     def to_console_title(self, message):
 634         if not self.params.get('consoletitle', False):
 635             return
 636         if compat_os_name == 'nt':
 637             if ctypes.windll.kernel32.GetConsoleWindow():
 638                 # c_wchar_p() might not be necessary if `message` is
 639                 # already of type unicode()
 640                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 641         elif 'TERM' in os.environ:
 642             self._write_string('\033]0;%s\007' % message, self._screen_file)
 643
 644     def save_console_title(self):
 645         if not self.params.get('consoletitle', False):
 646             return
 647         if self.params.get('simulate', False):
 648             return
 649         if compat_os_name != 'nt' and 'TERM' in os.environ:
 650             # Save the title on stack
 651             self._write_string('\033[22;0t', self._screen_file)
 652
 653     def restore_console_title(self):
 654         if not self.params.get('consoletitle', False):
 655             return
 656         if self.params.get('simulate', False):
 657             return
 658         if compat_os_name != 'nt' and 'TERM' in os.environ:
 659             # Restore the title from stack
 660             self._write_string('\033[23;0t', self._screen_file)
 661
 662     def __enter__(self):
 663         self.save_console_title()
 664         return self
 665
 666     def __exit__(self, *args):
 667         self.restore_console_title()
 668
 669         if self.params.get('cookiefile') is not None:
 670             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 671
 672     def trouble(self, message=None, tb=None):
 673         """Determine action to take when a download problem appears.
 674
 675         Depending on if the downloader has been configured to ignore
 676         download errors or not, this method may throw an exception or
 677         not when errors are found, after printing the message.
 678
 679         tb, if given, is additional traceback information.
 680         """
 681         if message is not None:
 682             self.to_stderr(message)
 683         if self.params.get('verbose'):
 684             if tb is None:
 685                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 686                     tb = ''
 687                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 688                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 689                     tb += encode_compat_str(traceback.format_exc())
 690                 else:
 691                     tb_data = traceback.format_list(traceback.extract_stack())
 692                     tb = ''.join(tb_data)
 693             self.to_stderr(tb)
 694         if not self.params.get('ignoreerrors', False):
 695             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 696                 exc_info = sys.exc_info()[1].exc_info
 697             else:
 698                 exc_info = sys.exc_info()
 699             raise DownloadError(message, exc_info)
 700         self._download_retcode = 1
 701
 702     def report_warning(self, message):
 703         '''
 704         Print the message to stderr, it will be prefixed with 'WARNING:'
 705         If stderr is a tty file the 'WARNING:' will be colored
 706         '''
 707         if self.params.get('logger') is not None:
 708             self.params['logger'].warning(message)
 709         else:
 710             if self.params.get('no_warnings'):
 711                 return
 712             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 713                 _msg_header = '\033[0;33mWARNING:\033[0m'
 714             else:
 715                 _msg_header = 'WARNING:'
 716             warning_message = '%s %s' % (_msg_header, message)
 717             self.to_stderr(warning_message)
 718
 719     def report_error(self, message, tb=None):
 720         '''
 721         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 722         in red if stderr is a tty file.
 723         '''
 724         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 725             _msg_header = '\033[0;31mERROR:\033[0m'
 726         else:
 727             _msg_header = 'ERROR:'
 728         error_message = '%s %s' % (_msg_header, message)
 729         self.trouble(error_message, tb)
 730
 731     def report_file_already_downloaded(self, file_name):
 732         """Report file has already been fully downloaded."""
 733         try:
 734             self.to_screen('[download] %s has already been downloaded' % file_name)
 735         except UnicodeEncodeError:
 736             self.to_screen('[download] The file has already been downloaded')
 737
 738     def report_file_delete(self, file_name):
 739         """Report that existing file will be deleted."""
 740         try:
 741             self.to_screen('Deleting existing file %s' % file_name)
 742         except UnicodeEncodeError:
 743             self.to_screen('Deleting existing file')
 744
 745     def parse_outtmpl(self):
 746         outtmpl_dict = self.params.get('outtmpl', {})
 747         if not isinstance(outtmpl_dict, dict):
 748             outtmpl_dict = {'default': outtmpl_dict}
 749         outtmpl_dict.update({
 750             k: v for k, v in DEFAULT_OUTTMPL.items()
 751             if not outtmpl_dict.get(k)})
 752         for key, val in outtmpl_dict.items():
 753             if isinstance(val, bytes):
 754                 self.report_warning(
 755                     'Parameter outtmpl is bytes, but should be a unicode string. '
 756                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 757         return outtmpl_dict
 758
 759     def _prepare_filename(self, info_dict, tmpl_type='default'):
 760         try:
 761             template_dict = dict(info_dict)
 762
 763             template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 764                 formatSeconds(info_dict['duration'], '-')
 765                 if info_dict.get('duration', None) is not None
 766                 else None)
 767
 768             template_dict['epoch'] = int(time.time())
 769             autonumber_size = self.params.get('autonumber_size')
 770             if autonumber_size is None:
 771                 autonumber_size = 5
 772             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 773             if template_dict.get('resolution') is None:
 774                 if template_dict.get('width') and template_dict.get('height'):
 775                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 776                 elif template_dict.get('height'):
 777                     template_dict['resolution'] = '%sp' % template_dict['height']
 778                 elif template_dict.get('width'):
 779                     template_dict['resolution'] = '%dx?' % template_dict['width']
 780
 781             sanitize = lambda k, v: sanitize_filename(
 782                 compat_str(v),
 783                 restricted=self.params.get('restrictfilenames'),
 784                 is_id=(k == 'id' or k.endswith('_id')))
 785             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 786                                  for k, v in template_dict.items()
 787                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 788             na = self.params.get('outtmpl_na_placeholder', 'NA')
 789             template_dict = collections.defaultdict(lambda: na, template_dict)
 790
 791             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 792             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 793
 794             # For fields playlist_index and autonumber convert all occurrences
 795             # of %(field)s to %(field)0Nd for backward compatibility
 796             field_size_compat_map = {
 797                 'playlist_index': len(str(template_dict['n_entries'])),
 798                 'autonumber': autonumber_size,
 799             }
 800             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 801             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 802             if mobj:
 803                 outtmpl = re.sub(
 804                     FIELD_SIZE_COMPAT_RE,
 805                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 806                     outtmpl)
 807
 808             # As of [1] format syntax is:
 809             #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 810             # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 811             FORMAT_RE = r'''(?x)
 812                 (?<!%)
 813                 %
 814                 \({0}\)  # mapping key
 815                 (?:[#0\-+ ]+)?  # conversion flags (optional)
 816                 (?:\d+)?  # minimum field width (optional)
 817                 (?:\.\d+)?  # precision (optional)
 818                 [hlL]?  # length modifier (optional)
 819                 (?P<type>[diouxXeEfFgGcrs%])  # conversion type
 820             '''
 821
 822             numeric_fields = list(self._NUMERIC_FIELDS)
 823
 824             # Format date
 825             FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
 826             for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
 827                 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
 828                 if key in template_dict:
 829                     continue
 830                 value = strftime_or_none(template_dict.get(field), frmt, na)
 831                 if conv_type in 'crs':  # string
 832                     value = sanitize(field, value)
 833                 else:  # number
 834                     numeric_fields.append(key)
 835                     value = float_or_none(value, default=None)
 836                 if value is not None:
 837                     template_dict[key] = value
 838
 839             # Missing numeric fields used together with integer presentation types
 840             # in format specification will break the argument substitution since
 841             # string NA placeholder is returned for missing fields. We will patch
 842             # output template for missing fields to meet string presentation type.
 843             for numeric_field in numeric_fields:
 844                 if numeric_field not in template_dict:
 845                     outtmpl = re.sub(
 846                         FORMAT_RE.format(re.escape(numeric_field)),
 847                         r'%({0})s'.format(numeric_field), outtmpl)
 848
 849             # expand_path translates '%%' into '%' and '$$' into '$'
 850             # correspondingly that is not what we want since we need to keep
 851             # '%%' intact for template dict substitution step. Working around
 852             # with boundary-alike separator hack.
 853             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 854             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 855
 856             # outtmpl should be expand_path'ed before template dict substitution
 857             # because meta fields may contain env variables we don't want to
 858             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 859             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 860             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 861
 862             if force_ext is not None:
 863                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 864
 865             # https://github.com/blackjack4494/youtube-dlc/issues/85
 866             trim_file_name = self.params.get('trim_file_name', False)
 867             if trim_file_name:
 868                 fn_groups = filename.rsplit('.')
 869                 ext = fn_groups[-1]
 870                 sub_ext = ''
 871                 if len(fn_groups) > 2:
 872                     sub_ext = fn_groups[-2]
 873                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 874
 875             return filename
 876         except ValueError as err:
 877             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 878             return None
 879
 880     def prepare_filename(self, info_dict, dir_type='', warn=False):
 881         """Generate the output filename."""
 882         paths = self.params.get('paths', {})
 883         assert isinstance(paths, dict)
 884         filename = self._prepare_filename(info_dict, dir_type or 'default')
 885
 886         if warn and not self.__prepare_filename_warned:
 887             if not paths:
 888                 pass
 889             elif filename == '-':
 890                 self.report_warning('--paths is ignored when an outputting to stdout')
 891             elif os.path.isabs(filename):
 892                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 893             self.__prepare_filename_warned = True
 894         if filename == '-' or not filename:
 895             return filename
 896
 897         homepath = expand_path(paths.get('home', '').strip())
 898         assert isinstance(homepath, compat_str)
 899         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 900         assert isinstance(subdir, compat_str)
 901         path = os.path.join(homepath, subdir, filename)
 902
 903         # Temporary fix for #4787
 904         # 'Treat' all problem characters by passing filename through preferredencoding
 905         # to workaround encoding issues with subprocess on python2 @ Windows
 906         if sys.version_info < (3, 0) and sys.platform == 'win32':
 907             path = encodeFilename(path, True).decode(preferredencoding())
 908         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 909
 910     def _match_entry(self, info_dict, incomplete):
 911         """ Returns None if the file should be downloaded """
 912
 913         def check_filter():
 914             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 915             if 'title' in info_dict:
 916                 # This can happen when we're just evaluating the playlist
 917                 title = info_dict['title']
 918                 matchtitle = self.params.get('matchtitle', False)
 919                 if matchtitle:
 920                     if not re.search(matchtitle, title, re.IGNORECASE):
 921                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 922                 rejecttitle = self.params.get('rejecttitle', False)
 923                 if rejecttitle:
 924                     if re.search(rejecttitle, title, re.IGNORECASE):
 925                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 926             date = info_dict.get('upload_date')
 927             if date is not None:
 928                 dateRange = self.params.get('daterange', DateRange())
 929                 if date not in dateRange:
 930                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 931             view_count = info_dict.get('view_count')
 932             if view_count is not None:
 933                 min_views = self.params.get('min_views')
 934                 if min_views is not None and view_count < min_views:
 935                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 936                 max_views = self.params.get('max_views')
 937                 if max_views is not None and view_count > max_views:
 938                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 939             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 940                 return 'Skipping "%s" because it is age restricted' % video_title
 941             if self.in_download_archive(info_dict):
 942                 return '%s has already been recorded in archive' % video_title
 943
 944             if not incomplete:
 945                 match_filter = self.params.get('match_filter')
 946                 if match_filter is not None:
 947                     ret = match_filter(info_dict)
 948                     if ret is not None:
 949                         return ret
 950             return None
 951
 952         reason = check_filter()
 953         if reason is not None:
 954             self.to_screen('[download] ' + reason)
 955             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 956                 raise ExistingVideoReached()
 957             elif self.params.get('break_on_reject', False):
 958                 raise RejectedVideoReached()
 959         return reason
 960
 961     @staticmethod
 962     def add_extra_info(info_dict, extra_info):
 963         '''Set the keys from extra_info in info dict if they are missing'''
 964         for key, value in extra_info.items():
 965             info_dict.setdefault(key, value)
 966
 967     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 968                      process=True, force_generic_extractor=False):
 969         '''
 970         Returns a list with a dictionary for each video we find.
 971         If 'download', also downloads the videos.
 972         extra_info is a dict containing the extra values to add to each result
 973         '''
 974
 975         if not ie_key and force_generic_extractor:
 976             ie_key = 'Generic'
 977
 978         if ie_key:
 979             ies = [self.get_info_extractor(ie_key)]
 980         else:
 981             ies = self._ies
 982
 983         for ie in ies:
 984             if not ie.suitable(url):
 985                 continue
 986
 987             ie_key = ie.ie_key()
 988             ie = self.get_info_extractor(ie_key)
 989             if not ie.working():
 990                 self.report_warning('The program functionality for this site has been marked as broken, '
 991                                     'and will probably not work.')
 992
 993             try:
 994                 temp_id = str_or_none(
 995                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
 996                     else ie._match_id(url))
 997             except (AssertionError, IndexError, AttributeError):
 998                 temp_id = None
 999             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1000                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1001                                ie_key, temp_id))
1002                 break
1003             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
1004         else:
1005             self.report_error('no suitable InfoExtractor for URL %s' % url)
1006
1007     def __handle_extraction_exceptions(func):
1008         def wrapper(self, *args, **kwargs):
1009             try:
1010                 return func(self, *args, **kwargs)
1011             except GeoRestrictedError as e:
1012                 msg = e.msg
1013                 if e.countries:
1014                     msg += '\nThis video is available in %s.' % ', '.join(
1015                         map(ISO3166Utils.short2full, e.countries))
1016                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1017                 self.report_error(msg)
1018             except ExtractorError as e:  # An error we somewhat expected
1019                 self.report_error(compat_str(e), e.format_traceback())
1020             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1021                 raise
1022             except Exception as e:
1023                 if self.params.get('ignoreerrors', False):
1024                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1025                 else:
1026                     raise
1027         return wrapper
1028
1029     @__handle_extraction_exceptions
1030     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1031         ie_result = ie.extract(url)
1032         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1033             return
1034         if isinstance(ie_result, list):
1035             # Backwards compatibility: old IE result format
1036             ie_result = {
1037                 '_type': 'compat_list',
1038                 'entries': ie_result,
1039             }
1040         if info_dict:
1041             if info_dict.get('id'):
1042                 ie_result['id'] = info_dict['id']
1043             if info_dict.get('title'):
1044                 ie_result['title'] = info_dict['title']
1045         self.add_default_extra_info(ie_result, ie, url)
1046         if process:
1047             return self.process_ie_result(ie_result, download, extra_info)
1048         else:
1049             return ie_result
1050
1051     def add_default_extra_info(self, ie_result, ie, url):
1052         self.add_extra_info(ie_result, {
1053             'extractor': ie.IE_NAME,
1054             'webpage_url': url,
1055             'webpage_url_basename': url_basename(url),
1056             'extractor_key': ie.ie_key(),
1057         })
1058
1059     def process_ie_result(self, ie_result, download=True, extra_info={}):
1060         """
1061         Take the result of the ie(may be modified) and resolve all unresolved
1062         references (URLs, playlist items).
1063
1064         It will also download the videos if 'download'.
1065         Returns the resolved ie_result.
1066         """
1067         result_type = ie_result.get('_type', 'video')
1068
1069         if result_type in ('url', 'url_transparent'):
1070             ie_result['url'] = sanitize_url(ie_result['url'])
1071             extract_flat = self.params.get('extract_flat', False)
1072             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1073                     or extract_flat is True):
1074                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1075                 return ie_result
1076
1077         if result_type == 'video':
1078             self.add_extra_info(ie_result, extra_info)
1079             return self.process_video_result(ie_result, download=download)
1080         elif result_type == 'url':
1081             # We have to add extra_info to the results because it may be
1082             # contained in a playlist
1083             return self.extract_info(ie_result['url'],
1084                                      download, info_dict=ie_result,
1085                                      ie_key=ie_result.get('ie_key'),
1086                                      extra_info=extra_info)
1087         elif result_type == 'url_transparent':
1088             # Use the information from the embedding page
1089             info = self.extract_info(
1090                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1091                 extra_info=extra_info, download=False, process=False)
1092
1093             # extract_info may return None when ignoreerrors is enabled and
1094             # extraction failed with an error, don't crash and return early
1095             # in this case
1096             if not info:
1097                 return info
1098
1099             force_properties = dict(
1100                 (k, v) for k, v in ie_result.items() if v is not None)
1101             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1102                 if f in force_properties:
1103                     del force_properties[f]
1104             new_result = info.copy()
1105             new_result.update(force_properties)
1106
1107             # Extracted info may not be a video result (i.e.
1108             # info.get('_type', 'video') != video) but rather an url or
1109             # url_transparent. In such cases outer metadata (from ie_result)
1110             # should be propagated to inner one (info). For this to happen
1111             # _type of info should be overridden with url_transparent. This
1112             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1113             if new_result.get('_type') == 'url':
1114                 new_result['_type'] = 'url_transparent'
1115
1116             return self.process_ie_result(
1117                 new_result, download=download, extra_info=extra_info)
1118         elif result_type in ('playlist', 'multi_video'):
1119             # Protect from infinite recursion due to recursively nested playlists
1120             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1121             webpage_url = ie_result['webpage_url']
1122             if webpage_url in self._playlist_urls:
1123                 self.to_screen(
1124                     '[download] Skipping already downloaded playlist: %s'
1125                     % ie_result.get('title') or ie_result.get('id'))
1126                 return
1127
1128             self._playlist_level += 1
1129             self._playlist_urls.add(webpage_url)
1130             try:
1131                 return self.__process_playlist(ie_result, download)
1132             finally:
1133                 self._playlist_level -= 1
1134                 if not self._playlist_level:
1135                     self._playlist_urls.clear()
1136         elif result_type == 'compat_list':
1137             self.report_warning(
1138                 'Extractor %s returned a compat_list result. '
1139                 'It needs to be updated.' % ie_result.get('extractor'))
1140
1141             def _fixup(r):
1142                 self.add_extra_info(
1143                     r,
1144                     {
1145                         'extractor': ie_result['extractor'],
1146                         'webpage_url': ie_result['webpage_url'],
1147                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1148                         'extractor_key': ie_result['extractor_key'],
1149                     }
1150                 )
1151                 return r
1152             ie_result['entries'] = [
1153                 self.process_ie_result(_fixup(r), download, extra_info)
1154                 for r in ie_result['entries']
1155             ]
1156             return ie_result
1157         else:
1158             raise Exception('Invalid result type: %s' % result_type)
1159
1160     def __process_playlist(self, ie_result, download):
1161         # We process each entry in the playlist
1162         playlist = ie_result.get('title') or ie_result.get('id')
1163         self.to_screen('[download] Downloading playlist: %s' % playlist)
1164
1165         if self.params.get('allow_playlist_files', True):
1166             ie_copy = {
1167                 'playlist': playlist,
1168                 'playlist_id': ie_result.get('id'),
1169                 'playlist_title': ie_result.get('title'),
1170                 'playlist_uploader': ie_result.get('uploader'),
1171                 'playlist_uploader_id': ie_result.get('uploader_id'),
1172                 'playlist_index': 0
1173             }
1174             ie_copy.update(dict(ie_result))
1175
1176             def ensure_dir_exists(path):
1177                 return make_dir(path, self.report_error)
1178
1179             if self.params.get('writeinfojson', False):
1180                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1181                 if not ensure_dir_exists(encodeFilename(infofn)):
1182                     return
1183                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1184                     self.to_screen('[info] Playlist metadata is already present')
1185                 else:
1186                     playlist_info = dict(ie_result)
1187                     # playlist_info['entries'] = list(playlist_info['entries'])  # Entries is a generator which shouldnot be resolved here
1188                     del playlist_info['entries']
1189                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1190                     try:
1191                         write_json_file(self.filter_requested_info(playlist_info), infofn)
1192                     except (OSError, IOError):
1193                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1194
1195             if self.params.get('writedescription', False):
1196                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1197                 if not ensure_dir_exists(encodeFilename(descfn)):
1198                     return
1199                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1200                     self.to_screen('[info] Playlist description is already present')
1201                 elif ie_result.get('description') is None:
1202                     self.report_warning('There\'s no playlist description to write.')
1203                 else:
1204                     try:
1205                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1206                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1207                             descfile.write(ie_result['description'])
1208                     except (OSError, IOError):
1209                         self.report_error('Cannot write playlist description file ' + descfn)
1210                         return
1211
1212         playlist_results = []
1213
1214         playliststart = self.params.get('playliststart', 1) - 1
1215         playlistend = self.params.get('playlistend')
1216         # For backwards compatibility, interpret -1 as whole list
1217         if playlistend == -1:
1218             playlistend = None
1219
1220         playlistitems_str = self.params.get('playlist_items')
1221         playlistitems = None
1222         if playlistitems_str is not None:
1223             def iter_playlistitems(format):
1224                 for string_segment in format.split(','):
1225                     if '-' in string_segment:
1226                         start, end = string_segment.split('-')
1227                         for item in range(int(start), int(end) + 1):
1228                             yield int(item)
1229                     else:
1230                         yield int(string_segment)
1231             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1232
1233         ie_entries = ie_result['entries']
1234
1235         def make_playlistitems_entries(list_ie_entries):
1236             num_entries = len(list_ie_entries)
1237             return [
1238                 list_ie_entries[i - 1] for i in playlistitems
1239                 if -num_entries <= i - 1 < num_entries]
1240
1241         def report_download(num_entries):
1242             self.to_screen(
1243                 '[%s] playlist %s: Downloading %d videos' %
1244                 (ie_result['extractor'], playlist, num_entries))
1245
1246         if isinstance(ie_entries, list):
1247             n_all_entries = len(ie_entries)
1248             if playlistitems:
1249                 entries = make_playlistitems_entries(ie_entries)
1250             else:
1251                 entries = ie_entries[playliststart:playlistend]
1252             n_entries = len(entries)
1253             self.to_screen(
1254                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1255                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1256         elif isinstance(ie_entries, PagedList):
1257             if playlistitems:
1258                 entries = []
1259                 for item in playlistitems:
1260                     entries.extend(ie_entries.getslice(
1261                         item - 1, item
1262                     ))
1263             else:
1264                 entries = ie_entries.getslice(
1265                     playliststart, playlistend)
1266             n_entries = len(entries)
1267             report_download(n_entries)
1268         else:  # iterable
1269             if playlistitems:
1270                 entries = make_playlistitems_entries(list(itertools.islice(
1271                     ie_entries, 0, max(playlistitems))))
1272             else:
1273                 entries = list(itertools.islice(
1274                     ie_entries, playliststart, playlistend))
1275             n_entries = len(entries)
1276             report_download(n_entries)
1277
1278         if self.params.get('playlistreverse', False):
1279             entries = entries[::-1]
1280
1281         if self.params.get('playlistrandom', False):
1282             random.shuffle(entries)
1283
1284         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1285
1286         for i, entry in enumerate(entries, 1):
1287             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1288             # This __x_forwarded_for_ip thing is a bit ugly but requires
1289             # minimal changes
1290             if x_forwarded_for:
1291                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1292             extra = {
1293                 'n_entries': n_entries,
1294                 'playlist': playlist,
1295                 'playlist_id': ie_result.get('id'),
1296                 'playlist_title': ie_result.get('title'),
1297                 'playlist_uploader': ie_result.get('uploader'),
1298                 'playlist_uploader_id': ie_result.get('uploader_id'),
1299                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1300                 'extractor': ie_result['extractor'],
1301                 'webpage_url': ie_result['webpage_url'],
1302                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1303                 'extractor_key': ie_result['extractor_key'],
1304             }
1305
1306             if self._match_entry(entry, incomplete=True) is not None:
1307                 continue
1308
1309             entry_result = self.__process_iterable_entry(entry, download, extra)
1310             # TODO: skip failed (empty) entries?
1311             playlist_results.append(entry_result)
1312         ie_result['entries'] = playlist_results
1313         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1314         return ie_result
1315
1316     @__handle_extraction_exceptions
1317     def __process_iterable_entry(self, entry, download, extra_info):
1318         return self.process_ie_result(
1319             entry, download=download, extra_info=extra_info)
1320
1321     def _build_format_filter(self, filter_spec):
1322         " Returns a function to filter the formats according to the filter_spec "
1323
1324         OPERATORS = {
1325             '<': operator.lt,
1326             '<=': operator.le,
1327             '>': operator.gt,
1328             '>=': operator.ge,
1329             '=': operator.eq,
1330             '!=': operator.ne,
1331         }
1332         operator_rex = re.compile(r'''(?x)\s*
1333             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1334             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1335             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1336             $
1337             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1338         m = operator_rex.search(filter_spec)
1339         if m:
1340             try:
1341                 comparison_value = int(m.group('value'))
1342             except ValueError:
1343                 comparison_value = parse_filesize(m.group('value'))
1344                 if comparison_value is None:
1345                     comparison_value = parse_filesize(m.group('value') + 'B')
1346                 if comparison_value is None:
1347                     raise ValueError(
1348                         'Invalid value %r in format specification %r' % (
1349                             m.group('value'), filter_spec))
1350             op = OPERATORS[m.group('op')]
1351
1352         if not m:
1353             STR_OPERATORS = {
1354                 '=': operator.eq,
1355                 '^=': lambda attr, value: attr.startswith(value),
1356                 '$=': lambda attr, value: attr.endswith(value),
1357                 '*=': lambda attr, value: value in attr,
1358             }
1359             str_operator_rex = re.compile(r'''(?x)
1360                 \s*(?P<key>[a-zA-Z0-9._-]+)
1361                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1362                 \s*(?P<value>[a-zA-Z0-9._-]+)
1363                 \s*$
1364                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1365             m = str_operator_rex.search(filter_spec)
1366             if m:
1367                 comparison_value = m.group('value')
1368                 str_op = STR_OPERATORS[m.group('op')]
1369                 if m.group('negation'):
1370                     op = lambda attr, value: not str_op(attr, value)
1371                 else:
1372                     op = str_op
1373
1374         if not m:
1375             raise ValueError('Invalid filter specification %r' % filter_spec)
1376
1377         def _filter(f):
1378             actual_value = f.get(m.group('key'))
1379             if actual_value is None:
1380                 return m.group('none_inclusive')
1381             return op(actual_value, comparison_value)
1382         return _filter
1383
1384     def _default_format_spec(self, info_dict, download=True):
1385
1386         def can_merge():
1387             merger = FFmpegMergerPP(self)
1388             return merger.available and merger.can_merge()
1389
1390         prefer_best = (
1391             not self.params.get('simulate', False)
1392             and download
1393             and (
1394                 not can_merge()
1395                 or info_dict.get('is_live', False)
1396                 or self.outtmpl_dict['default'] == '-'))
1397
1398         return (
1399             'best/bestvideo+bestaudio'
1400             if prefer_best
1401             else 'bestvideo*+bestaudio/best'
1402             if not self.params.get('allow_multiple_audio_streams', False)
1403             else 'bestvideo+bestaudio/best')
1404
1405     def build_format_selector(self, format_spec):
1406         def syntax_error(note, start):
1407             message = (
1408                 'Invalid format specification: '
1409                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1410             return SyntaxError(message)
1411
1412         PICKFIRST = 'PICKFIRST'
1413         MERGE = 'MERGE'
1414         SINGLE = 'SINGLE'
1415         GROUP = 'GROUP'
1416         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1417
1418         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1419                                   'video': self.params.get('allow_multiple_video_streams', False)}
1420
1421         def _parse_filter(tokens):
1422             filter_parts = []
1423             for type, string, start, _, _ in tokens:
1424                 if type == tokenize.OP and string == ']':
1425                     return ''.join(filter_parts)
1426                 else:
1427                     filter_parts.append(string)
1428
1429         def _remove_unused_ops(tokens):
1430             # Remove operators that we don't use and join them with the surrounding strings
1431             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1432             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1433             last_string, last_start, last_end, last_line = None, None, None, None
1434             for type, string, start, end, line in tokens:
1435                 if type == tokenize.OP and string == '[':
1436                     if last_string:
1437                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1438                         last_string = None
1439                     yield type, string, start, end, line
1440                     # everything inside brackets will be handled by _parse_filter
1441                     for type, string, start, end, line in tokens:
1442                         yield type, string, start, end, line
1443                         if type == tokenize.OP and string == ']':
1444                             break
1445                 elif type == tokenize.OP and string in ALLOWED_OPS:
1446                     if last_string:
1447                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1448                         last_string = None
1449                     yield type, string, start, end, line
1450                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1451                     if not last_string:
1452                         last_string = string
1453                         last_start = start
1454                         last_end = end
1455                     else:
1456                         last_string += string
1457             if last_string:
1458                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1459
1460         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1461             selectors = []
1462             current_selector = None
1463             for type, string, start, _, _ in tokens:
1464                 # ENCODING is only defined in python 3.x
1465                 if type == getattr(tokenize, 'ENCODING', None):
1466                     continue
1467                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1468                     current_selector = FormatSelector(SINGLE, string, [])
1469                 elif type == tokenize.OP:
1470                     if string == ')':
1471                         if not inside_group:
1472                             # ')' will be handled by the parentheses group
1473                             tokens.restore_last_token()
1474                         break
1475                     elif inside_merge and string in ['/', ',']:
1476                         tokens.restore_last_token()
1477                         break
1478                     elif inside_choice and string == ',':
1479                         tokens.restore_last_token()
1480                         break
1481                     elif string == ',':
1482                         if not current_selector:
1483                             raise syntax_error('"," must follow a format selector', start)
1484                         selectors.append(current_selector)
1485                         current_selector = None
1486                     elif string == '/':
1487                         if not current_selector:
1488                             raise syntax_error('"/" must follow a format selector', start)
1489                         first_choice = current_selector
1490                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1491                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1492                     elif string == '[':
1493                         if not current_selector:
1494                             current_selector = FormatSelector(SINGLE, 'best', [])
1495                         format_filter = _parse_filter(tokens)
1496                         current_selector.filters.append(format_filter)
1497                     elif string == '(':
1498                         if current_selector:
1499                             raise syntax_error('Unexpected "("', start)
1500                         group = _parse_format_selection(tokens, inside_group=True)
1501                         current_selector = FormatSelector(GROUP, group, [])
1502                     elif string == '+':
1503                         if not current_selector:
1504                             raise syntax_error('Unexpected "+"', start)
1505                         selector_1 = current_selector
1506                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1507                         if not selector_2:
1508                             raise syntax_error('Expected a selector', start)
1509                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1510                     else:
1511                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1512                 elif type == tokenize.ENDMARKER:
1513                     break
1514             if current_selector:
1515                 selectors.append(current_selector)
1516             return selectors
1517
1518         def _build_selector_function(selector):
1519             if isinstance(selector, list):  # ,
1520                 fs = [_build_selector_function(s) for s in selector]
1521
1522                 def selector_function(ctx):
1523                     for f in fs:
1524                         for format in f(ctx):
1525                             yield format
1526                 return selector_function
1527
1528             elif selector.type == GROUP:  # ()
1529                 selector_function = _build_selector_function(selector.selector)
1530
1531             elif selector.type == PICKFIRST:  # /
1532                 fs = [_build_selector_function(s) for s in selector.selector]
1533
1534                 def selector_function(ctx):
1535                     for f in fs:
1536                         picked_formats = list(f(ctx))
1537                         if picked_formats:
1538                             return picked_formats
1539                     return []
1540
1541             elif selector.type == SINGLE:  # atom
1542                 format_spec = selector.selector if selector.selector is not None else 'best'
1543
1544                 if format_spec == 'all':
1545                     def selector_function(ctx):
1546                         formats = list(ctx['formats'])
1547                         if formats:
1548                             for f in formats:
1549                                 yield f
1550
1551                 else:
1552                     format_fallback = False
1553                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1554                     if format_spec_obj is not None:
1555                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1556                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1557                         not_format_type = 'v' if format_type == 'a' else 'a'
1558                         format_modified = format_spec_obj.group(3) is not None
1559
1560                         format_fallback = not format_type and not format_modified  # for b, w
1561                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1562                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1563                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1564                                     if format_type  # bv, ba, wv, wa
1565                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1566                                     if not format_modified  # b, w
1567                                     else None)  # b*, w*
1568                     else:
1569                         format_idx = -1
1570                         filter_f = ((lambda f: f.get('ext') == format_spec)
1571                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1572                                     else (lambda f: f.get('format_id') == format_spec))  # id
1573
1574                     def selector_function(ctx):
1575                         formats = list(ctx['formats'])
1576                         if not formats:
1577                             return
1578                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1579                         if matches:
1580                             yield matches[format_idx]
1581                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1582                             # for extractors with incomplete formats (audio only (soundcloud)
1583                             # or video only (imgur)) best/worst will fallback to
1584                             # best/worst {video,audio}-only format
1585                             yield formats[format_idx]
1586
1587             elif selector.type == MERGE:        # +
1588                 def _merge(formats_pair):
1589                     format_1, format_2 = formats_pair
1590
1591                     formats_info = []
1592                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1593                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1594
1595                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1596                         get_no_more = {"video": False, "audio": False}
1597                         for (i, fmt_info) in enumerate(formats_info):
1598                             for aud_vid in ["audio", "video"]:
1599                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1600                                     if get_no_more[aud_vid]:
1601                                         formats_info.pop(i)
1602                                     get_no_more[aud_vid] = True
1603
1604                     if len(formats_info) == 1:
1605                         return formats_info[0]
1606
1607                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1608                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1609
1610                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1611                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1612
1613                     output_ext = self.params.get('merge_output_format')
1614                     if not output_ext:
1615                         if the_only_video:
1616                             output_ext = the_only_video['ext']
1617                         elif the_only_audio and not video_fmts:
1618                             output_ext = the_only_audio['ext']
1619                         else:
1620                             output_ext = 'mkv'
1621
1622                     new_dict = {
1623                         'requested_formats': formats_info,
1624                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1625                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1626                         'ext': output_ext,
1627                     }
1628
1629                     if the_only_video:
1630                         new_dict.update({
1631                             'width': the_only_video.get('width'),
1632                             'height': the_only_video.get('height'),
1633                             'resolution': the_only_video.get('resolution'),
1634                             'fps': the_only_video.get('fps'),
1635                             'vcodec': the_only_video.get('vcodec'),
1636                             'vbr': the_only_video.get('vbr'),
1637                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1638                         })
1639
1640                     if the_only_audio:
1641                         new_dict.update({
1642                             'acodec': the_only_audio.get('acodec'),
1643                             'abr': the_only_audio.get('abr'),
1644                         })
1645
1646                     return new_dict
1647
1648                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1649
1650                 def selector_function(ctx):
1651                     for pair in itertools.product(
1652                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1653                         yield _merge(pair)
1654
1655             filters = [self._build_format_filter(f) for f in selector.filters]
1656
1657             def final_selector(ctx):
1658                 ctx_copy = copy.deepcopy(ctx)
1659                 for _filter in filters:
1660                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1661                 return selector_function(ctx_copy)
1662             return final_selector
1663
1664         stream = io.BytesIO(format_spec.encode('utf-8'))
1665         try:
1666             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1667         except tokenize.TokenError:
1668             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1669
1670         class TokenIterator(object):
1671             def __init__(self, tokens):
1672                 self.tokens = tokens
1673                 self.counter = 0
1674
1675             def __iter__(self):
1676                 return self
1677
1678             def __next__(self):
1679                 if self.counter >= len(self.tokens):
1680                     raise StopIteration()
1681                 value = self.tokens[self.counter]
1682                 self.counter += 1
1683                 return value
1684
1685             next = __next__
1686
1687             def restore_last_token(self):
1688                 self.counter -= 1
1689
1690         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1691         return _build_selector_function(parsed_selector)
1692
1693     def _calc_headers(self, info_dict):
1694         res = std_headers.copy()
1695
1696         add_headers = info_dict.get('http_headers')
1697         if add_headers:
1698             res.update(add_headers)
1699
1700         cookies = self._calc_cookies(info_dict)
1701         if cookies:
1702             res['Cookie'] = cookies
1703
1704         if 'X-Forwarded-For' not in res:
1705             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1706             if x_forwarded_for_ip:
1707                 res['X-Forwarded-For'] = x_forwarded_for_ip
1708
1709         return res
1710
1711     def _calc_cookies(self, info_dict):
1712         pr = sanitized_Request(info_dict['url'])
1713         self.cookiejar.add_cookie_header(pr)
1714         return pr.get_header('Cookie')
1715
1716     def process_video_result(self, info_dict, download=True):
1717         assert info_dict.get('_type', 'video') == 'video'
1718
1719         if 'id' not in info_dict:
1720             raise ExtractorError('Missing "id" field in extractor result')
1721         if 'title' not in info_dict:
1722             raise ExtractorError('Missing "title" field in extractor result')
1723
1724         def report_force_conversion(field, field_not, conversion):
1725             self.report_warning(
1726                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1727                 % (field, field_not, conversion))
1728
1729         def sanitize_string_field(info, string_field):
1730             field = info.get(string_field)
1731             if field is None or isinstance(field, compat_str):
1732                 return
1733             report_force_conversion(string_field, 'a string', 'string')
1734             info[string_field] = compat_str(field)
1735
1736         def sanitize_numeric_fields(info):
1737             for numeric_field in self._NUMERIC_FIELDS:
1738                 field = info.get(numeric_field)
1739                 if field is None or isinstance(field, compat_numeric_types):
1740                     continue
1741                 report_force_conversion(numeric_field, 'numeric', 'int')
1742                 info[numeric_field] = int_or_none(field)
1743
1744         sanitize_string_field(info_dict, 'id')
1745         sanitize_numeric_fields(info_dict)
1746
1747         if 'playlist' not in info_dict:
1748             # It isn't part of a playlist
1749             info_dict['playlist'] = None
1750             info_dict['playlist_index'] = None
1751
1752         thumbnails = info_dict.get('thumbnails')
1753         if thumbnails is None:
1754             thumbnail = info_dict.get('thumbnail')
1755             if thumbnail:
1756                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1757         if thumbnails:
1758             thumbnails.sort(key=lambda t: (
1759                 t.get('preference') if t.get('preference') is not None else -1,
1760                 t.get('width') if t.get('width') is not None else -1,
1761                 t.get('height') if t.get('height') is not None else -1,
1762                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1763             for i, t in enumerate(thumbnails):
1764                 t['url'] = sanitize_url(t['url'])
1765                 if t.get('width') and t.get('height'):
1766                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1767                 if t.get('id') is None:
1768                     t['id'] = '%d' % i
1769
1770         if self.params.get('list_thumbnails'):
1771             self.list_thumbnails(info_dict)
1772             return
1773
1774         thumbnail = info_dict.get('thumbnail')
1775         if thumbnail:
1776             info_dict['thumbnail'] = sanitize_url(thumbnail)
1777         elif thumbnails:
1778             info_dict['thumbnail'] = thumbnails[-1]['url']
1779
1780         if 'display_id' not in info_dict and 'id' in info_dict:
1781             info_dict['display_id'] = info_dict['id']
1782
1783         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1784             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1785             # see http://bugs.python.org/issue1646728)
1786             try:
1787                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1788                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1789             except (ValueError, OverflowError, OSError):
1790                 pass
1791
1792         # Auto generate title fields corresponding to the *_number fields when missing
1793         # in order to always have clean titles. This is very common for TV series.
1794         for field in ('chapter', 'season', 'episode'):
1795             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1796                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1797
1798         for cc_kind in ('subtitles', 'automatic_captions'):
1799             cc = info_dict.get(cc_kind)
1800             if cc:
1801                 for _, subtitle in cc.items():
1802                     for subtitle_format in subtitle:
1803                         if subtitle_format.get('url'):
1804                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1805                         if subtitle_format.get('ext') is None:
1806                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1807
1808         automatic_captions = info_dict.get('automatic_captions')
1809         subtitles = info_dict.get('subtitles')
1810
1811         if self.params.get('listsubtitles', False):
1812             if 'automatic_captions' in info_dict:
1813                 self.list_subtitles(
1814                     info_dict['id'], automatic_captions, 'automatic captions')
1815             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1816             return
1817
1818         info_dict['requested_subtitles'] = self.process_subtitles(
1819             info_dict['id'], subtitles, automatic_captions)
1820
1821         # We now pick which formats have to be downloaded
1822         if info_dict.get('formats') is None:
1823             # There's only one format available
1824             formats = [info_dict]
1825         else:
1826             formats = info_dict['formats']
1827
1828         if not formats:
1829             raise ExtractorError('No video formats found!')
1830
1831         def is_wellformed(f):
1832             url = f.get('url')
1833             if not url:
1834                 self.report_warning(
1835                     '"url" field is missing or empty - skipping format, '
1836                     'there is an error in extractor')
1837                 return False
1838             if isinstance(url, bytes):
1839                 sanitize_string_field(f, 'url')
1840             return True
1841
1842         # Filter out malformed formats for better extraction robustness
1843         formats = list(filter(is_wellformed, formats))
1844
1845         formats_dict = {}
1846
1847         # We check that all the formats have the format and format_id fields
1848         for i, format in enumerate(formats):
1849             sanitize_string_field(format, 'format_id')
1850             sanitize_numeric_fields(format)
1851             format['url'] = sanitize_url(format['url'])
1852             if not format.get('format_id'):
1853                 format['format_id'] = compat_str(i)
1854             else:
1855                 # Sanitize format_id from characters used in format selector expression
1856                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1857             format_id = format['format_id']
1858             if format_id not in formats_dict:
1859                 formats_dict[format_id] = []
1860             formats_dict[format_id].append(format)
1861
1862         # Make sure all formats have unique format_id
1863         for format_id, ambiguous_formats in formats_dict.items():
1864             if len(ambiguous_formats) > 1:
1865                 for i, format in enumerate(ambiguous_formats):
1866                     format['format_id'] = '%s-%d' % (format_id, i)
1867
1868         for i, format in enumerate(formats):
1869             if format.get('format') is None:
1870                 format['format'] = '{id} - {res}{note}'.format(
1871                     id=format['format_id'],
1872                     res=self.format_resolution(format),
1873                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1874                 )
1875             # Automatically determine file extension if missing
1876             if format.get('ext') is None:
1877                 format['ext'] = determine_ext(format['url']).lower()
1878             # Automatically determine protocol if missing (useful for format
1879             # selection purposes)
1880             if format.get('protocol') is None:
1881                 format['protocol'] = determine_protocol(format)
1882             # Add HTTP headers, so that external programs can use them from the
1883             # json output
1884             full_format_info = info_dict.copy()
1885             full_format_info.update(format)
1886             format['http_headers'] = self._calc_headers(full_format_info)
1887         # Remove private housekeeping stuff
1888         if '__x_forwarded_for_ip' in info_dict:
1889             del info_dict['__x_forwarded_for_ip']
1890
1891         # TODO Central sorting goes here
1892
1893         if formats[0] is not info_dict:
1894             # only set the 'formats' fields if the original info_dict list them
1895             # otherwise we end up with a circular reference, the first (and unique)
1896             # element in the 'formats' field in info_dict is info_dict itself,
1897             # which can't be exported to json
1898             info_dict['formats'] = formats
1899         if self.params.get('listformats'):
1900             self.list_formats(info_dict)
1901             return
1902
1903         req_format = self.params.get('format')
1904         if req_format is None:
1905             req_format = self._default_format_spec(info_dict, download=download)
1906             if self.params.get('verbose'):
1907                 self.to_screen('[debug] Default format spec: %s' % req_format)
1908
1909         format_selector = self.build_format_selector(req_format)
1910
1911         # While in format selection we may need to have an access to the original
1912         # format set in order to calculate some metrics or do some processing.
1913         # For now we need to be able to guess whether original formats provided
1914         # by extractor are incomplete or not (i.e. whether extractor provides only
1915         # video-only or audio-only formats) for proper formats selection for
1916         # extractors with such incomplete formats (see
1917         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1918         # Since formats may be filtered during format selection and may not match
1919         # the original formats the results may be incorrect. Thus original formats
1920         # or pre-calculated metrics should be passed to format selection routines
1921         # as well.
1922         # We will pass a context object containing all necessary additional data
1923         # instead of just formats.
1924         # This fixes incorrect format selection issue (see
1925         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1926         incomplete_formats = (
1927             # All formats are video-only or
1928             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1929             # all formats are audio-only
1930             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1931
1932         ctx = {
1933             'formats': formats,
1934             'incomplete_formats': incomplete_formats,
1935         }
1936
1937         formats_to_download = list(format_selector(ctx))
1938         if not formats_to_download:
1939             raise ExtractorError('requested format not available',
1940                                  expected=True)
1941
1942         if download:
1943             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1944             if len(formats_to_download) > 1:
1945                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1946             for format in formats_to_download:
1947                 new_info = dict(info_dict)
1948                 new_info.update(format)
1949                 self.process_info(new_info)
1950         # We update the info dict with the best quality format (backwards compatibility)
1951         info_dict.update(formats_to_download[-1])
1952         return info_dict
1953
1954     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1955         """Select the requested subtitles and their format"""
1956         available_subs = {}
1957         if normal_subtitles and self.params.get('writesubtitles'):
1958             available_subs.update(normal_subtitles)
1959         if automatic_captions and self.params.get('writeautomaticsub'):
1960             for lang, cap_info in automatic_captions.items():
1961                 if lang not in available_subs:
1962                     available_subs[lang] = cap_info
1963
1964         if (not self.params.get('writesubtitles') and not
1965                 self.params.get('writeautomaticsub') or not
1966                 available_subs):
1967             return None
1968
1969         if self.params.get('allsubtitles', False):
1970             requested_langs = available_subs.keys()
1971         else:
1972             if self.params.get('subtitleslangs', False):
1973                 requested_langs = self.params.get('subtitleslangs')
1974             elif 'en' in available_subs:
1975                 requested_langs = ['en']
1976             else:
1977                 requested_langs = [list(available_subs.keys())[0]]
1978
1979         formats_query = self.params.get('subtitlesformat', 'best')
1980         formats_preference = formats_query.split('/') if formats_query else []
1981         subs = {}
1982         for lang in requested_langs:
1983             formats = available_subs.get(lang)
1984             if formats is None:
1985                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1986                 continue
1987             for ext in formats_preference:
1988                 if ext == 'best':
1989                     f = formats[-1]
1990                     break
1991                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1992                 if matches:
1993                     f = matches[-1]
1994                     break
1995             else:
1996                 f = formats[-1]
1997                 self.report_warning(
1998                     'No subtitle format found matching "%s" for language %s, '
1999                     'using %s' % (formats_query, lang, f['ext']))
2000             subs[lang] = f
2001         return subs
2002
2003     def __forced_printings(self, info_dict, filename, incomplete):
2004         def print_mandatory(field):
2005             if (self.params.get('force%s' % field, False)
2006                     and (not incomplete or info_dict.get(field) is not None)):
2007                 self.to_stdout(info_dict[field])
2008
2009         def print_optional(field):
2010             if (self.params.get('force%s' % field, False)
2011                     and info_dict.get(field) is not None):
2012                 self.to_stdout(info_dict[field])
2013
2014         print_mandatory('title')
2015         print_mandatory('id')
2016         if self.params.get('forceurl', False) and not incomplete:
2017             if info_dict.get('requested_formats') is not None:
2018                 for f in info_dict['requested_formats']:
2019                     self.to_stdout(f['url'] + f.get('play_path', ''))
2020             else:
2021                 # For RTMP URLs, also include the playpath
2022                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2023         print_optional('thumbnail')
2024         print_optional('description')
2025         if self.params.get('forcefilename', False) and filename is not None:
2026             self.to_stdout(filename)
2027         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2028             self.to_stdout(formatSeconds(info_dict['duration']))
2029         print_mandatory('format')
2030         if self.params.get('forcejson', False):
2031             self.to_stdout(json.dumps(info_dict))
2032
2033     def process_info(self, info_dict):
2034         """Process a single resolved IE result."""
2035
2036         assert info_dict.get('_type', 'video') == 'video'
2037
2038         info_dict.setdefault('__postprocessors', [])
2039
2040         max_downloads = self.params.get('max_downloads')
2041         if max_downloads is not None:
2042             if self._num_downloads >= int(max_downloads):
2043                 raise MaxDownloadsReached()
2044
2045         # TODO: backward compatibility, to be removed
2046         info_dict['fulltitle'] = info_dict['title']
2047
2048         if 'format' not in info_dict:
2049             info_dict['format'] = info_dict['ext']
2050
2051         if self._match_entry(info_dict, incomplete=False) is not None:
2052             return
2053
2054         self._num_downloads += 1
2055
2056         info_dict = self.pre_process(info_dict)
2057
2058         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2059         temp_filename = self.prepare_filename(info_dict, 'temp')
2060         files_to_move = {}
2061         skip_dl = self.params.get('skip_download', False)
2062
2063         # Forced printings
2064         self.__forced_printings(info_dict, full_filename, incomplete=False)
2065
2066         if self.params.get('simulate', False):
2067             if self.params.get('force_write_download_archive', False):
2068                 self.record_download_archive(info_dict)
2069
2070             # Do nothing else if in simulate mode
2071             return
2072
2073         if full_filename is None:
2074             return
2075
2076         def ensure_dir_exists(path):
2077             return make_dir(path, self.report_error)
2078
2079         if not ensure_dir_exists(encodeFilename(full_filename)):
2080             return
2081         if not ensure_dir_exists(encodeFilename(temp_filename)):
2082             return
2083
2084         if self.params.get('writedescription', False):
2085             descfn = self.prepare_filename(info_dict, 'description')
2086             if not ensure_dir_exists(encodeFilename(descfn)):
2087                 return
2088             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2089                 self.to_screen('[info] Video description is already present')
2090             elif info_dict.get('description') is None:
2091                 self.report_warning('There\'s no description to write.')
2092             else:
2093                 try:
2094                     self.to_screen('[info] Writing video description to: ' + descfn)
2095                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2096                         descfile.write(info_dict['description'])
2097                 except (OSError, IOError):
2098                     self.report_error('Cannot write description file ' + descfn)
2099                     return
2100
2101         if self.params.get('writeannotations', False):
2102             annofn = self.prepare_filename(info_dict, 'annotation')
2103             if not ensure_dir_exists(encodeFilename(annofn)):
2104                 return
2105             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2106                 self.to_screen('[info] Video annotations are already present')
2107             elif not info_dict.get('annotations'):
2108                 self.report_warning('There are no annotations to write.')
2109             else:
2110                 try:
2111                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2112                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2113                         annofile.write(info_dict['annotations'])
2114                 except (KeyError, TypeError):
2115                     self.report_warning('There are no annotations to write.')
2116                 except (OSError, IOError):
2117                     self.report_error('Cannot write annotations file: ' + annofn)
2118                     return
2119
2120         def dl(name, info, subtitle=False):
2121             fd = get_suitable_downloader(info, self.params)(self, self.params)
2122             for ph in self._progress_hooks:
2123                 fd.add_progress_hook(ph)
2124             if self.params.get('verbose'):
2125                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2126             return fd.download(name, info, subtitle)
2127
2128         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2129                                        self.params.get('writeautomaticsub')])
2130
2131         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2132             # subtitles download errors are already managed as troubles in relevant IE
2133             # that way it will silently go on when used with unsupporting IE
2134             subtitles = info_dict['requested_subtitles']
2135             # ie = self.get_info_extractor(info_dict['extractor_key'])
2136             for sub_lang, sub_info in subtitles.items():
2137                 sub_format = sub_info['ext']
2138                 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2139                 sub_filename = subtitles_filename(
2140                     temp_filename if not skip_dl else sub_fn,
2141                     sub_lang, sub_format, info_dict.get('ext'))
2142                 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
2143                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2144                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2145                     files_to_move[sub_filename] = sub_filename_final
2146                 else:
2147                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2148                     if sub_info.get('data') is not None:
2149                         try:
2150                             # Use newline='' to prevent conversion of newline characters
2151                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2152                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2153                                 subfile.write(sub_info['data'])
2154                             files_to_move[sub_filename] = sub_filename_final
2155                         except (OSError, IOError):
2156                             self.report_error('Cannot write subtitles file ' + sub_filename)
2157                             return
2158                     else:
2159                         try:
2160                             dl(sub_filename, sub_info, subtitle=True)
2161                             '''
2162                             if self.params.get('sleep_interval_subtitles', False):
2163                                 dl(sub_filename, sub_info)
2164                             else:
2165                                 sub_data = ie._request_webpage(
2166                                     sub_info['url'], info_dict['id'], note=False).read()
2167                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2168                                     subfile.write(sub_data)
2169                             '''
2170                             files_to_move[sub_filename] = sub_filename_final
2171                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2172                             self.report_warning('Unable to download subtitle for "%s": %s' %
2173                                                 (sub_lang, error_to_compat_str(err)))
2174                             continue
2175
2176         if skip_dl:
2177             if self.params.get('convertsubtitles', False):
2178                 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2179                 filename_real_ext = os.path.splitext(full_filename)[1][1:]
2180                 filename_wo_ext = (
2181                     os.path.splitext(full_filename)[0]
2182                     if filename_real_ext == info_dict['ext']
2183                     else full_filename)
2184                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2185                 # if subconv.available:
2186                 #     info_dict['__postprocessors'].append(subconv)
2187                 if os.path.exists(encodeFilename(afilename)):
2188                     self.to_screen(
2189                         '[download] %s has already been downloaded and '
2190                         'converted' % afilename)
2191                 else:
2192                     try:
2193                         self.post_process(full_filename, info_dict, files_to_move)
2194                     except PostProcessingError as err:
2195                         self.report_error('Postprocessing: %s' % str(err))
2196                         return
2197
2198         if self.params.get('writeinfojson', False):
2199             infofn = self.prepare_filename(info_dict, 'infojson')
2200             if not ensure_dir_exists(encodeFilename(infofn)):
2201                 return
2202             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2203                 self.to_screen('[info] Video metadata is already present')
2204             else:
2205                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2206                 try:
2207                     write_json_file(self.filter_requested_info(info_dict), infofn)
2208                 except (OSError, IOError):
2209                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2210                     return
2211             info_dict['__infojson_filename'] = infofn
2212
2213         thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2214         thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2215         for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2216             thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2217             thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2218             files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
2219
2220         # Write internet shortcut files
2221         url_link = webloc_link = desktop_link = False
2222         if self.params.get('writelink', False):
2223             if sys.platform == "darwin":  # macOS.
2224                 webloc_link = True
2225             elif sys.platform.startswith("linux"):
2226                 desktop_link = True
2227             else:  # if sys.platform in ['win32', 'cygwin']:
2228                 url_link = True
2229         if self.params.get('writeurllink', False):
2230             url_link = True
2231         if self.params.get('writewebloclink', False):
2232             webloc_link = True
2233         if self.params.get('writedesktoplink', False):
2234             desktop_link = True
2235
2236         if url_link or webloc_link or desktop_link:
2237             if 'webpage_url' not in info_dict:
2238                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2239                 return
2240             ascii_url = iri_to_uri(info_dict['webpage_url'])
2241
2242         def _write_link_file(extension, template, newline, embed_filename):
2243             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2244             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2245                 self.to_screen('[info] Internet shortcut is already present')
2246             else:
2247                 try:
2248                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2249                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2250                         template_vars = {'url': ascii_url}
2251                         if embed_filename:
2252                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2253                         linkfile.write(template % template_vars)
2254                 except (OSError, IOError):
2255                     self.report_error('Cannot write internet shortcut ' + linkfn)
2256                     return False
2257             return True
2258
2259         if url_link:
2260             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2261                 return
2262         if webloc_link:
2263             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2264                 return
2265         if desktop_link:
2266             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2267                 return
2268
2269         # Download
2270         must_record_download_archive = False
2271         if not skip_dl:
2272             try:
2273
2274                 def existing_file(*filepaths):
2275                     ext = info_dict.get('ext')
2276                     final_ext = self.params.get('final_ext', ext)
2277                     existing_files = []
2278                     for file in orderedSet(filepaths):
2279                         if final_ext != ext:
2280                             converted = replace_extension(file, final_ext, ext)
2281                             if os.path.exists(encodeFilename(converted)):
2282                                 existing_files.append(converted)
2283                         if os.path.exists(encodeFilename(file)):
2284                             existing_files.append(file)
2285
2286                     if not existing_files or self.params.get('overwrites', False):
2287                         for file in orderedSet(existing_files):
2288                             self.report_file_delete(file)
2289                             os.remove(encodeFilename(file))
2290                         return None
2291
2292                     self.report_file_already_downloaded(existing_files[0])
2293                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2294                     return existing_files[0]
2295
2296                 success = True
2297                 if info_dict.get('requested_formats') is not None:
2298                     downloaded = []
2299                     merger = FFmpegMergerPP(self)
2300                     if self.params.get('allow_unplayable_formats'):
2301                         self.report_warning(
2302                             'You have requested merging of multiple formats '
2303                             'while also allowing unplayable formats to be downloaded. '
2304                             'The formats won\'t be merged to prevent data corruption.')
2305                     elif not merger.available:
2306                         self.report_warning(
2307                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2308                             'The formats won\'t be merged.')
2309
2310                     def compatible_formats(formats):
2311                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2312                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2313                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2314                         if len(video_formats) > 2 or len(audio_formats) > 2:
2315                             return False
2316
2317                         # Check extension
2318                         exts = set(format.get('ext') for format in formats)
2319                         COMPATIBLE_EXTS = (
2320                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2321                             set(('webm',)),
2322                         )
2323                         for ext_sets in COMPATIBLE_EXTS:
2324                             if ext_sets.issuperset(exts):
2325                                 return True
2326                         # TODO: Check acodec/vcodec
2327                         return False
2328
2329                     requested_formats = info_dict['requested_formats']
2330                     old_ext = info_dict['ext']
2331                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2332                         info_dict['ext'] = 'mkv'
2333                         self.report_warning(
2334                             'Requested formats are incompatible for merge and will be merged into mkv.')
2335
2336                     def correct_ext(filename):
2337                         filename_real_ext = os.path.splitext(filename)[1][1:]
2338                         filename_wo_ext = (
2339                             os.path.splitext(filename)[0]
2340                             if filename_real_ext == old_ext
2341                             else filename)
2342                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2343
2344                     # Ensure filename always has a correct extension for successful merge
2345                     full_filename = correct_ext(full_filename)
2346                     temp_filename = correct_ext(temp_filename)
2347                     dl_filename = existing_file(full_filename, temp_filename)
2348                     info_dict['__real_download'] = False
2349                     if dl_filename is None:
2350                         for f in requested_formats:
2351                             new_info = dict(info_dict)
2352                             new_info.update(f)
2353                             fname = prepend_extension(
2354                                 self.prepare_filename(new_info, 'temp'),
2355                                 'f%s' % f['format_id'], new_info['ext'])
2356                             if not ensure_dir_exists(fname):
2357                                 return
2358                             downloaded.append(fname)
2359                             partial_success, real_download = dl(fname, new_info)
2360                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2361                             success = success and partial_success
2362                         if merger.available and not self.params.get('allow_unplayable_formats'):
2363                             info_dict['__postprocessors'].append(merger)
2364                             info_dict['__files_to_merge'] = downloaded
2365                             # Even if there were no downloads, it is being merged only now
2366                             info_dict['__real_download'] = True
2367                         else:
2368                             for file in downloaded:
2369                                 files_to_move[file] = None
2370                 else:
2371                     # Just a single file
2372                     dl_filename = existing_file(full_filename, temp_filename)
2373                     if dl_filename is None:
2374                         success, real_download = dl(temp_filename, info_dict)
2375                         info_dict['__real_download'] = real_download
2376
2377                 dl_filename = dl_filename or temp_filename
2378                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2379
2380             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2381                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2382                 return
2383             except (OSError, IOError) as err:
2384                 raise UnavailableVideoError(err)
2385             except (ContentTooShortError, ) as err:
2386                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2387                 return
2388
2389             if success and full_filename != '-':
2390                 # Fixup content
2391                 fixup_policy = self.params.get('fixup')
2392                 if fixup_policy is None:
2393                     fixup_policy = 'detect_or_warn'
2394
2395                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2396
2397                 stretched_ratio = info_dict.get('stretched_ratio')
2398                 if stretched_ratio is not None and stretched_ratio != 1:
2399                     if fixup_policy == 'warn':
2400                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2401                             info_dict['id'], stretched_ratio))
2402                     elif fixup_policy == 'detect_or_warn':
2403                         stretched_pp = FFmpegFixupStretchedPP(self)
2404                         if stretched_pp.available:
2405                             info_dict['__postprocessors'].append(stretched_pp)
2406                         else:
2407                             self.report_warning(
2408                                 '%s: Non-uniform pixel ratio (%s). %s'
2409                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2410                     else:
2411                         assert fixup_policy in ('ignore', 'never')
2412
2413                 if (info_dict.get('requested_formats') is None
2414                         and info_dict.get('container') == 'm4a_dash'
2415                         and info_dict.get('ext') == 'm4a'):
2416                     if fixup_policy == 'warn':
2417                         self.report_warning(
2418                             '%s: writing DASH m4a. '
2419                             'Only some players support this container.'
2420                             % info_dict['id'])
2421                     elif fixup_policy == 'detect_or_warn':
2422                         fixup_pp = FFmpegFixupM4aPP(self)
2423                         if fixup_pp.available:
2424                             info_dict['__postprocessors'].append(fixup_pp)
2425                         else:
2426                             self.report_warning(
2427                                 '%s: writing DASH m4a. '
2428                                 'Only some players support this container. %s'
2429                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2430                     else:
2431                         assert fixup_policy in ('ignore', 'never')
2432
2433                 if (info_dict.get('protocol') == 'm3u8_native'
2434                         or info_dict.get('protocol') == 'm3u8'
2435                         and self.params.get('hls_prefer_native')):
2436                     if fixup_policy == 'warn':
2437                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2438                             info_dict['id']))
2439                     elif fixup_policy == 'detect_or_warn':
2440                         fixup_pp = FFmpegFixupM3u8PP(self)
2441                         if fixup_pp.available:
2442                             info_dict['__postprocessors'].append(fixup_pp)
2443                         else:
2444                             self.report_warning(
2445                                 '%s: malformed AAC bitstream detected. %s'
2446                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2447                     else:
2448                         assert fixup_policy in ('ignore', 'never')
2449
2450                 try:
2451                     self.post_process(dl_filename, info_dict, files_to_move)
2452                 except PostProcessingError as err:
2453                     self.report_error('Postprocessing: %s' % str(err))
2454                     return
2455                 try:
2456                     for ph in self._post_hooks:
2457                         ph(full_filename)
2458                 except Exception as err:
2459                     self.report_error('post hooks: %s' % str(err))
2460                     return
2461                 must_record_download_archive = True
2462
2463         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2464             self.record_download_archive(info_dict)
2465         max_downloads = self.params.get('max_downloads')
2466         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2467             raise MaxDownloadsReached()
2468
2469     def download(self, url_list):
2470         """Download a given list of URLs."""
2471         outtmpl = self.outtmpl_dict['default']
2472         if (len(url_list) > 1
2473                 and outtmpl != '-'
2474                 and '%' not in outtmpl
2475                 and self.params.get('max_downloads') != 1):
2476             raise SameFileError(outtmpl)
2477
2478         for url in url_list:
2479             try:
2480                 # It also downloads the videos
2481                 res = self.extract_info(
2482                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2483             except UnavailableVideoError:
2484                 self.report_error('unable to download video')
2485             except MaxDownloadsReached:
2486                 self.to_screen('[info] Maximum number of downloaded files reached')
2487                 raise
2488             except ExistingVideoReached:
2489                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2490                 raise
2491             except RejectedVideoReached:
2492                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2493                 raise
2494             else:
2495                 if self.params.get('dump_single_json', False):
2496                     self.to_stdout(json.dumps(res))
2497
2498         return self._download_retcode
2499
2500     def download_with_info_file(self, info_filename):
2501         with contextlib.closing(fileinput.FileInput(
2502                 [info_filename], mode='r',
2503                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2504             # FileInput doesn't have a read method, we can't call json.load
2505             info = self.filter_requested_info(json.loads('\n'.join(f)))
2506         try:
2507             self.process_ie_result(info, download=True)
2508         except DownloadError:
2509             webpage_url = info.get('webpage_url')
2510             if webpage_url is not None:
2511                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2512                 return self.download([webpage_url])
2513             else:
2514                 raise
2515         return self._download_retcode
2516
2517     @staticmethod
2518     def filter_requested_info(info_dict):
2519         fields_to_remove = ('requested_formats', 'requested_subtitles')
2520         return dict(
2521             (k, v) for k, v in info_dict.items()
2522             if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
2523
2524     def run_pp(self, pp, infodict, files_to_move={}):
2525         files_to_delete = []
2526         files_to_delete, infodict = pp.run(infodict)
2527         if not files_to_delete:
2528             return files_to_move, infodict
2529
2530         if self.params.get('keepvideo', False):
2531             for f in files_to_delete:
2532                 files_to_move.setdefault(f, '')
2533         else:
2534             for old_filename in set(files_to_delete):
2535                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2536                 try:
2537                     os.remove(encodeFilename(old_filename))
2538                 except (IOError, OSError):
2539                     self.report_warning('Unable to remove downloaded original file')
2540                 if old_filename in files_to_move:
2541                     del files_to_move[old_filename]
2542         return files_to_move, infodict
2543
2544     def pre_process(self, ie_info):
2545         info = dict(ie_info)
2546         for pp in self._pps['beforedl']:
2547             info = self.run_pp(pp, info)[1]
2548         return info
2549
2550     def post_process(self, filename, ie_info, files_to_move={}):
2551         """Run all the postprocessors on the given file."""
2552         info = dict(ie_info)
2553         info['filepath'] = filename
2554         info['__files_to_move'] = {}
2555
2556         for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2557             files_to_move, info = self.run_pp(pp, info, files_to_move)
2558         info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
2559         for pp in self._pps['aftermove']:
2560             info = self.run_pp(pp, info, {})[1]
2561
2562     def _make_archive_id(self, info_dict):
2563         video_id = info_dict.get('id')
2564         if not video_id:
2565             return
2566         # Future-proof against any change in case
2567         # and backwards compatibility with prior versions
2568         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2569         if extractor is None:
2570             url = str_or_none(info_dict.get('url'))
2571             if not url:
2572                 return
2573             # Try to find matching extractor for the URL and take its ie_key
2574             for ie in self._ies:
2575                 if ie.suitable(url):
2576                     extractor = ie.ie_key()
2577                     break
2578             else:
2579                 return
2580         return '%s %s' % (extractor.lower(), video_id)
2581
2582     def in_download_archive(self, info_dict):
2583         fn = self.params.get('download_archive')
2584         if fn is None:
2585             return False
2586
2587         vid_id = self._make_archive_id(info_dict)
2588         if not vid_id:
2589             return False  # Incomplete video information
2590
2591         return vid_id in self.archive
2592
2593     def record_download_archive(self, info_dict):
2594         fn = self.params.get('download_archive')
2595         if fn is None:
2596             return
2597         vid_id = self._make_archive_id(info_dict)
2598         assert vid_id
2599         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2600             archive_file.write(vid_id + '\n')
2601         self.archive.add(vid_id)
2602
2603     @staticmethod
2604     def format_resolution(format, default='unknown'):
2605         if format.get('vcodec') == 'none':
2606             return 'audio only'
2607         if format.get('resolution') is not None:
2608             return format['resolution']
2609         if format.get('height') is not None:
2610             if format.get('width') is not None:
2611                 res = '%sx%s' % (format['width'], format['height'])
2612             else:
2613                 res = '%sp' % format['height']
2614         elif format.get('width') is not None:
2615             res = '%dx?' % format['width']
2616         else:
2617             res = default
2618         return res
2619
2620     def _format_note(self, fdict):
2621         res = ''
2622         if fdict.get('ext') in ['f4f', 'f4m']:
2623             res += '(unsupported) '
2624         if fdict.get('language'):
2625             if res:
2626                 res += ' '
2627             res += '[%s] ' % fdict['language']
2628         if fdict.get('format_note') is not None:
2629             res += fdict['format_note'] + ' '
2630         if fdict.get('tbr') is not None:
2631             res += '%4dk ' % fdict['tbr']
2632         if fdict.get('container') is not None:
2633             if res:
2634                 res += ', '
2635             res += '%s container' % fdict['container']
2636         if (fdict.get('vcodec') is not None
2637                 and fdict.get('vcodec') != 'none'):
2638             if res:
2639                 res += ', '
2640             res += fdict['vcodec']
2641             if fdict.get('vbr') is not None:
2642                 res += '@'
2643         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2644             res += 'video@'
2645         if fdict.get('vbr') is not None:
2646             res += '%4dk' % fdict['vbr']
2647         if fdict.get('fps') is not None:
2648             if res:
2649                 res += ', '
2650             res += '%sfps' % fdict['fps']
2651         if fdict.get('acodec') is not None:
2652             if res:
2653                 res += ', '
2654             if fdict['acodec'] == 'none':
2655                 res += 'video only'
2656             else:
2657                 res += '%-5s' % fdict['acodec']
2658         elif fdict.get('abr') is not None:
2659             if res:
2660                 res += ', '
2661             res += 'audio'
2662         if fdict.get('abr') is not None:
2663             res += '@%3dk' % fdict['abr']
2664         if fdict.get('asr') is not None:
2665             res += ' (%5dHz)' % fdict['asr']
2666         if fdict.get('filesize') is not None:
2667             if res:
2668                 res += ', '
2669             res += format_bytes(fdict['filesize'])
2670         elif fdict.get('filesize_approx') is not None:
2671             if res:
2672                 res += ', '
2673             res += '~' + format_bytes(fdict['filesize_approx'])
2674         return res
2675
2676     def _format_note_table(self, f):
2677         def join_fields(*vargs):
2678             return ', '.join((val for val in vargs if val != ''))
2679
2680         return join_fields(
2681             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2682             format_field(f, 'language', '[%s]'),
2683             format_field(f, 'format_note'),
2684             format_field(f, 'container', ignore=(None, f.get('ext'))),
2685             format_field(f, 'asr', '%5dHz'))
2686
2687     def list_formats(self, info_dict):
2688         formats = info_dict.get('formats', [info_dict])
2689         new_format = self.params.get('listformats_table', False)
2690         if new_format:
2691             table = [
2692                 [
2693                     format_field(f, 'format_id'),
2694                     format_field(f, 'ext'),
2695                     self.format_resolution(f),
2696                     format_field(f, 'fps', '%d'),
2697                     '|',
2698                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2699                     format_field(f, 'tbr', '%4dk'),
2700                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
2701                     '|',
2702                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2703                     format_field(f, 'vbr', '%4dk'),
2704                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2705                     format_field(f, 'abr', '%3dk'),
2706                     format_field(f, 'asr', '%5dHz'),
2707                     self._format_note_table(f)]
2708                 for f in formats
2709                 if f.get('preference') is None or f['preference'] >= -1000]
2710             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2711                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2712         else:
2713             table = [
2714                 [
2715                     format_field(f, 'format_id'),
2716                     format_field(f, 'ext'),
2717                     self.format_resolution(f),
2718                     self._format_note(f)]
2719                 for f in formats
2720                 if f.get('preference') is None or f['preference'] >= -1000]
2721             header_line = ['format code', 'extension', 'resolution', 'note']
2722
2723         self.to_screen(
2724             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2725                 header_line,
2726                 table,
2727                 delim=new_format,
2728                 extraGap=(0 if new_format else 1),
2729                 hideEmpty=new_format)))
2730
2731     def list_thumbnails(self, info_dict):
2732         thumbnails = info_dict.get('thumbnails')
2733         if not thumbnails:
2734             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2735             return
2736
2737         self.to_screen(
2738             '[info] Thumbnails for %s:' % info_dict['id'])
2739         self.to_screen(render_table(
2740             ['ID', 'width', 'height', 'URL'],
2741             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2742
2743     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2744         if not subtitles:
2745             self.to_screen('%s has no %s' % (video_id, name))
2746             return
2747         self.to_screen(
2748             'Available %s for %s:' % (name, video_id))
2749         self.to_screen(render_table(
2750             ['Language', 'formats'],
2751             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2752                 for lang, formats in subtitles.items()]))
2753
2754     def urlopen(self, req):
2755         """ Start an HTTP download """
2756         if isinstance(req, compat_basestring):
2757             req = sanitized_Request(req)
2758         return self._opener.open(req, timeout=self._socket_timeout)
2759
2760     def print_debug_header(self):
2761         if not self.params.get('verbose'):
2762             return
2763
2764         if type('') is not compat_str:
2765             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2766             self.report_warning(
2767                 'Your Python is broken! Update to a newer and supported version')
2768
2769         stdout_encoding = getattr(
2770             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2771         encoding_str = (
2772             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2773                 locale.getpreferredencoding(),
2774                 sys.getfilesystemencoding(),
2775                 stdout_encoding,
2776                 self.get_encoding()))
2777         write_string(encoding_str, encoding=None)
2778
2779         source = (
2780             '(exe)' if hasattr(sys, 'frozen')
2781             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2782             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2783             else '')
2784         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
2785         if _LAZY_LOADER:
2786             self._write_string('[debug] Lazy loading extractors enabled\n')
2787         if _PLUGIN_CLASSES:
2788             self._write_string(
2789                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2790         try:
2791             sp = subprocess.Popen(
2792                 ['git', 'rev-parse', '--short', 'HEAD'],
2793                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2794                 cwd=os.path.dirname(os.path.abspath(__file__)))
2795             out, err = process_communicate_or_kill(sp)
2796             out = out.decode().strip()
2797             if re.match('[0-9a-f]+', out):
2798                 self._write_string('[debug] Git HEAD: %s\n' % out)
2799         except Exception:
2800             try:
2801                 sys.exc_clear()
2802             except Exception:
2803                 pass
2804
2805         def python_implementation():
2806             impl_name = platform.python_implementation()
2807             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2808                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2809             return impl_name
2810
2811         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2812             platform.python_version(),
2813             python_implementation(),
2814             platform.architecture()[0],
2815             platform_name()))
2816
2817         exe_versions = FFmpegPostProcessor.get_versions(self)
2818         exe_versions['rtmpdump'] = rtmpdump_version()
2819         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2820         exe_str = ', '.join(
2821             '%s %s' % (exe, v)
2822             for exe, v in sorted(exe_versions.items())
2823             if v
2824         )
2825         if not exe_str:
2826             exe_str = 'none'
2827         self._write_string('[debug] exe versions: %s\n' % exe_str)
2828
2829         proxy_map = {}
2830         for handler in self._opener.handlers:
2831             if hasattr(handler, 'proxies'):
2832                 proxy_map.update(handler.proxies)
2833         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2834
2835         if self.params.get('call_home', False):
2836             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2837             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2838             return
2839             latest_version = self.urlopen(
2840                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2841             if version_tuple(latest_version) > version_tuple(__version__):
2842                 self.report_warning(
2843                     'You are using an outdated version (newest version: %s)! '
2844                     'See https://yt-dl.org/update if you need help updating.' %
2845                     latest_version)
2846
2847     def _setup_opener(self):
2848         timeout_val = self.params.get('socket_timeout')
2849         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2850
2851         opts_cookiefile = self.params.get('cookiefile')
2852         opts_proxy = self.params.get('proxy')
2853
2854         if opts_cookiefile is None:
2855             self.cookiejar = compat_cookiejar.CookieJar()
2856         else:
2857             opts_cookiefile = expand_path(opts_cookiefile)
2858             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2859             if os.access(opts_cookiefile, os.R_OK):
2860                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2861
2862         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2863         if opts_proxy is not None:
2864             if opts_proxy == '':
2865                 proxies = {}
2866             else:
2867                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2868         else:
2869             proxies = compat_urllib_request.getproxies()
2870             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2871             if 'http' in proxies and 'https' not in proxies:
2872                 proxies['https'] = proxies['http']
2873         proxy_handler = PerRequestProxyHandler(proxies)
2874
2875         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2876         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2877         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2878         redirect_handler = YoutubeDLRedirectHandler()
2879         data_handler = compat_urllib_request_DataHandler()
2880
2881         # When passing our own FileHandler instance, build_opener won't add the
2882         # default FileHandler and allows us to disable the file protocol, which
2883         # can be used for malicious purposes (see
2884         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2885         file_handler = compat_urllib_request.FileHandler()
2886
2887         def file_open(*args, **kwargs):
2888             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2889         file_handler.file_open = file_open
2890
2891         opener = compat_urllib_request.build_opener(
2892             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2893
2894         # Delete the default user-agent header, which would otherwise apply in
2895         # cases where our custom HTTP handler doesn't come into play
2896         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2897         opener.addheaders = []
2898         self._opener = opener
2899
2900     def encode(self, s):
2901         if isinstance(s, bytes):
2902             return s  # Already encoded
2903
2904         try:
2905             return s.encode(self.get_encoding())
2906         except UnicodeEncodeError as err:
2907             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2908             raise
2909
2910     def get_encoding(self):
2911         encoding = self.params.get('encoding')
2912         if encoding is None:
2913             encoding = preferredencoding()
2914         return encoding
2915
2916     def _write_thumbnails(self, info_dict, filename):  # return the extensions
2917         write_all = self.params.get('write_all_thumbnails', False)
2918         thumbnails = []
2919         if write_all or self.params.get('writethumbnail', False):
2920             thumbnails = info_dict.get('thumbnails') or []
2921         multiple = write_all and len(thumbnails) > 1
2922
2923         ret = []
2924         for t in thumbnails[::1 if write_all else -1]:
2925             thumb_ext = determine_ext(t['url'], 'jpg')
2926             suffix = '%s.' % t['id'] if multiple else ''
2927             thumb_display_id = '%s ' % t['id'] if multiple else ''
2928             t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
2929
2930             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2931                 ret.append(suffix + thumb_ext)
2932                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2933                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2934             else:
2935                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2936                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2937                 try:
2938                     uf = self.urlopen(t['url'])
2939                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2940                         shutil.copyfileobj(uf, thumbf)
2941                     ret.append(suffix + thumb_ext)
2942                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2943                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2944                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2945                     self.report_warning('Unable to download thumbnail "%s": %s' %
2946                                         (t['url'], error_to_compat_str(err)))
2947             if ret and not write_all:
2948                 break
2949         return ret