yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_http_client,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     OUTTMPL_TYPES,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     int_or_none,
  72     iri_to_uri,
  73     ISO3166Utils,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     orderedSet,
  79     PagedList,
  80     parse_filesize,
  81     PerRequestProxyHandler,
  82     platform_name,
  83     PostProcessingError,
  84     preferredencoding,
  85     prepend_extension,
  86     register_socks_protocols,
  87     render_table,
  88     replace_extension,
  89     RejectedVideoReached,
  90     SameFileError,
  91     sanitize_filename,
  92     sanitize_path,
  93     sanitize_url,
  94     sanitized_Request,
  95     std_headers,
  96     str_or_none,
  97     strftime_or_none,
  98     subtitles_filename,
  99     to_high_limit_path,
 100     UnavailableVideoError,
 101     url_basename,
 102     version_tuple,
 103     write_json_file,
 104     write_string,
 105     YoutubeDLCookieJar,
 106     YoutubeDLCookieProcessor,
 107     YoutubeDLHandler,
 108     YoutubeDLRedirectHandler,
 109     process_communicate_or_kill,
 110 )
 111 from .cache import Cache
 112 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
 113 from .extractor.openload import PhantomJSwrapper
 114 from .downloader import get_suitable_downloader
 115 from .downloader.rtmp import rtmpdump_version
 116 from .postprocessor import (
 117     FFmpegFixupM3u8PP,
 118     FFmpegFixupM4aPP,
 119     FFmpegFixupStretchedPP,
 120     FFmpegMergerPP,
 121     FFmpegPostProcessor,
 122     # FFmpegSubtitlesConvertorPP,
 123     get_postprocessor,
 124     MoveFilesAfterDownloadPP,
 125 )
 126 from .version import __version__
 127
 128 if compat_os_name == 'nt':
 129     import ctypes
 130
 131
 132 class YoutubeDL(object):
 133     """YoutubeDL class.
 134
 135     YoutubeDL objects are the ones responsible of downloading the
 136     actual video file and writing it to disk if the user has requested
 137     it, among some other tasks. In most cases there should be one per
 138     program. As, given a video URL, the downloader doesn't know how to
 139     extract all the needed information, task that InfoExtractors do, it
 140     has to pass the URL to one of them.
 141
 142     For this, YoutubeDL objects have a method that allows
 143     InfoExtractors to be registered in a given order. When it is passed
 144     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 145     finds that reports being able to handle it. The InfoExtractor extracts
 146     all the information about the video or videos the URL refers to, and
 147     YoutubeDL process the extracted information, possibly using a File
 148     Downloader to download the video.
 149
 150     YoutubeDL objects accept a lot of parameters. In order not to saturate
 151     the object constructor with arguments, it receives a dictionary of
 152     options instead. These options are available through the params
 153     attribute for the InfoExtractors to use. The YoutubeDL also
 154     registers itself as the downloader in charge for the InfoExtractors
 155     that are added to it, so this is a "mutual registration".
 156
 157     Available options:
 158
 159     username:          Username for authentication purposes.
 160     password:          Password for authentication purposes.
 161     videopassword:     Password for accessing a video.
 162     ap_mso:            Adobe Pass multiple-system operator identifier.
 163     ap_username:       Multiple-system operator account username.
 164     ap_password:       Multiple-system operator account password.
 165     usenetrc:          Use netrc for authentication instead.
 166     verbose:           Print additional info to stdout.
 167     quiet:             Do not print messages to stdout.
 168     no_warnings:       Do not print out anything for warnings.
 169     forceurl:          Force printing final URL.
 170     forcetitle:        Force printing title.
 171     forceid:           Force printing ID.
 172     forcethumbnail:    Force printing thumbnail URL.
 173     forcedescription:  Force printing description.
 174     forcefilename:     Force printing final filename.
 175     forceduration:     Force printing duration.
 176     forcejson:         Force printing info_dict as JSON.
 177     dump_single_json:  Force printing the info_dict of the whole playlist
 178                        (or video) as a single JSON line.
 179     force_write_download_archive: Force writing download archive regardless
 180                        of 'skip_download' or 'simulate'.
 181     simulate:          Do not download the video files.
 182     format:            Video format code. see "FORMAT SELECTION" for more details.
 183     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 184     format_sort:       How to sort the video formats. see "Sorting Formats"
 185                        for more details.
 186     format_sort_force: Force the given format_sort. see "Sorting Formats"
 187                        for more details.
 188     allow_multiple_video_streams:   Allow multiple video streams to be merged
 189                        into a single file
 190     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 191                        into a single file
 192     paths:             Dictionary of output paths. The allowed keys are 'home'
 193                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 194     outtmpl:           Dictionary of templates for output names. Allowed keys
 195                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 196                        A string a also accepted for backward compatibility
 197     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 198     restrictfilenames: Do not allow "&" and spaces in file names
 199     trim_file_name:    Limit length of filename (extension excluded)
 200     windowsfilenames:  Force the filenames to be windows compatible
 201     ignoreerrors:      Do not stop on download errors
 202                        (Default True when running yt-dlp,
 203                        but False when directly accessing YoutubeDL class)
 204     force_generic_extractor: Force downloader to use the generic extractor
 205     overwrites:        Overwrite all video and metadata files if True,
 206                        overwrite only non-video files if None
 207                        and don't overwrite any file if False
 208     playliststart:     Playlist item to start at.
 209     playlistend:       Playlist item to end at.
 210     playlist_items:    Specific indices of playlist to download.
 211     playlistreverse:   Download playlist items in reverse order.
 212     playlistrandom:    Download playlist items in random order.
 213     matchtitle:        Download only matching titles.
 214     rejecttitle:       Reject downloads for matching titles.
 215     logger:            Log messages to a logging.Logger instance.
 216     logtostderr:       Log messages to stderr instead of stdout.
 217     writedescription:  Write the video description to a .description file
 218     writeinfojson:     Write the video description to a .info.json file
 219     writecomments:     Extract video comments. This will not be written to disk
 220                        unless writeinfojson is also given
 221     writeannotations:  Write the video annotations to a .annotations.xml file
 222     writethumbnail:    Write the thumbnail image to a file
 223     allow_playlist_files: Whether to write playlists' description, infojson etc
 224                        also to disk when using the 'write*' options
 225     write_all_thumbnails:  Write all thumbnail formats to files
 226     writelink:         Write an internet shortcut file, depending on the
 227                        current platform (.url/.webloc/.desktop)
 228     writeurllink:      Write a Windows internet shortcut file (.url)
 229     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 230     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 231     writesubtitles:    Write the video subtitles to a file
 232     writeautomaticsub: Write the automatically generated subtitles to a file
 233     allsubtitles:      Downloads all the subtitles of the video
 234                        (requires writesubtitles or writeautomaticsub)
 235     listsubtitles:     Lists all available subtitles for the video
 236     subtitlesformat:   The format code for subtitles
 237     subtitleslangs:    List of languages of the subtitles to download
 238     keepvideo:         Keep the video file after post-processing
 239     daterange:         A DateRange object, download only if the upload_date is in the range.
 240     skip_download:     Skip the actual download of the video file
 241     cachedir:          Location of the cache files in the filesystem.
 242                        False to disable filesystem cache.
 243     noplaylist:        Download single video instead of a playlist if in doubt.
 244     age_limit:         An integer representing the user's age in years.
 245                        Unsuitable videos for the given age are skipped.
 246     min_views:         An integer representing the minimum view count the video
 247                        must have in order to not be skipped.
 248                        Videos without view count information are always
 249                        downloaded. None for no limit.
 250     max_views:         An integer representing the maximum view count.
 251                        Videos that are more popular than that are not
 252                        downloaded.
 253                        Videos without view count information are always
 254                        downloaded. None for no limit.
 255     download_archive:  File name of a file where all downloads are recorded.
 256                        Videos already present in the file are not downloaded
 257                        again.
 258     break_on_existing: Stop the download process after attempting to download a
 259                        file that is in the archive.
 260     break_on_reject:   Stop the download process when encountering a video that
 261                        has been filtered out.
 262     cookiefile:        File name where cookies should be read from and dumped to
 263     nocheckcertificate:Do not verify SSL certificates
 264     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 265                        At the moment, this is only supported by YouTube.
 266     proxy:             URL of the proxy server to use
 267     geo_verification_proxy:  URL of the proxy to use for IP address verification
 268                        on geo-restricted sites.
 269     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 270     bidi_workaround:   Work around buggy terminals without bidirectional text
 271                        support, using fridibi
 272     debug_printtraffic:Print out sent and received HTTP traffic
 273     include_ads:       Download ads as well
 274     default_search:    Prepend this string if an input url is not valid.
 275                        'auto' for elaborate guessing
 276     encoding:          Use this encoding instead of the system-specified.
 277     extract_flat:      Do not resolve URLs, return the immediate result.
 278                        Pass in 'in_playlist' to only show this behavior for
 279                        playlist items.
 280     postprocessors:    A list of dictionaries, each with an entry
 281                        * key:  The name of the postprocessor. See
 282                                yt_dlp/postprocessor/__init__.py for a list.
 283                        * _after_move: Optional. If True, run this post_processor
 284                                after 'MoveFilesAfterDownload'
 285                        as well as any further keyword arguments for the
 286                        postprocessor.
 287     post_hooks:        A list of functions that get called as the final step
 288                        for each video file, after all postprocessors have been
 289                        called. The filename will be passed as the only argument.
 290     progress_hooks:    A list of functions that get called on download
 291                        progress, with a dictionary with the entries
 292                        * status: One of "downloading", "error", or "finished".
 293                                  Check this first and ignore unknown values.
 294
 295                        If status is one of "downloading", or "finished", the
 296                        following properties may also be present:
 297                        * filename: The final filename (always present)
 298                        * tmpfilename: The filename we're currently writing to
 299                        * downloaded_bytes: Bytes on disk
 300                        * total_bytes: Size of the whole file, None if unknown
 301                        * total_bytes_estimate: Guess of the eventual file size,
 302                                                None if unavailable.
 303                        * elapsed: The number of seconds since download started.
 304                        * eta: The estimated time in seconds, None if unknown
 305                        * speed: The download speed in bytes/second, None if
 306                                 unknown
 307                        * fragment_index: The counter of the currently
 308                                          downloaded video fragment.
 309                        * fragment_count: The number of fragments (= individual
 310                                          files that will be merged)
 311
 312                        Progress hooks are guaranteed to be called at least once
 313                        (with status "finished") if the download is successful.
 314     merge_output_format: Extension to use when merging formats.
 315     final_ext:         Expected final extension; used to detect when the file was
 316                        already downloaded and converted. "merge_output_format" is
 317                        replaced by this extension when given
 318     fixup:             Automatically correct known faults of the file.
 319                        One of:
 320                        - "never": do nothing
 321                        - "warn": only emit a warning
 322                        - "detect_or_warn": check whether we can do anything
 323                                            about it, warn otherwise (default)
 324     source_address:    Client-side IP address to bind to.
 325     call_home:         Boolean, true iff we are allowed to contact the
 326                        yt-dlp servers for debugging. (BROKEN)
 327     sleep_interval:    Number of seconds to sleep before each download when
 328                        used alone or a lower bound of a range for randomized
 329                        sleep before each download (minimum possible number
 330                        of seconds to sleep) when used along with
 331                        max_sleep_interval.
 332     max_sleep_interval:Upper bound of a range for randomized sleep before each
 333                        download (maximum possible number of seconds to sleep).
 334                        Must only be used along with sleep_interval.
 335                        Actual sleep time will be a random float from range
 336                        [sleep_interval; max_sleep_interval].
 337     listformats:       Print an overview of available video formats and exit.
 338     list_thumbnails:   Print a table of all thumbnails and exit.
 339     match_filter:      A function that gets called with the info_dict of
 340                        every video.
 341                        If it returns a message, the video is ignored.
 342                        If it returns None, the video is downloaded.
 343                        match_filter_func in utils.py is one example for this.
 344     no_color:          Do not emit color codes in output.
 345     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 346                        HTTP header
 347     geo_bypass_country:
 348                        Two-letter ISO 3166-2 country code that will be used for
 349                        explicit geographic restriction bypassing via faking
 350                        X-Forwarded-For HTTP header
 351     geo_bypass_ip_block:
 352                        IP range in CIDR notation that will be used similarly to
 353                        geo_bypass_country
 354
 355     The following options determine which downloader is picked:
 356     external_downloader: Executable of the external downloader to call.
 357                        None or unset for standard (built-in) downloader.
 358     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 359                        if True, otherwise use ffmpeg/avconv if False, otherwise
 360                        use downloader suggested by extractor if None.
 361
 362     The following parameters are not used by YoutubeDL itself, they are used by
 363     the downloader (see yt_dlp/downloader/common.py):
 364     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 365     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 366     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 367     http_chunk_size.
 368
 369     The following options are used by the post processors:
 370     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 371                        otherwise prefer ffmpeg. (avconv support is deprecated)
 372     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 373                        to the binary or its containing directory.
 374     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 375                         and a list of additional command-line arguments for the
 376                         postprocessor/executable. The dict can also have "PP+EXE" keys
 377                         which are used when the given exe is used by the given PP.
 378                         Use 'default' as the name for arguments to passed to all PP
 379
 380     The following options are used by the extractors:
 381     dynamic_mpd:        Whether to process dynamic DASH manifests (default: True)
 382     hls_split_discontinuity: Split HLS playlists to different formats at
 383                         discontinuities such as ad breaks (default: False)
 384     youtube_include_dash_manifest: If True (default), DASH manifests and related
 385                         data will be downloaded and processed by extractor.
 386                         You can reduce network I/O by disabling it if you don't
 387                         care about DASH. (only for youtube)
 388     youtube_include_hls_manifest: If True (default), HLS manifests and related
 389                         data will be downloaded and processed by extractor.
 390                         You can reduce network I/O by disabling it if you don't
 391                         care about HLS. (only for youtube)
 392     """
 393
 394     _NUMERIC_FIELDS = set((
 395         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 396         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 397         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 398         'average_rating', 'comment_count', 'age_limit',
 399         'start_time', 'end_time',
 400         'chapter_number', 'season_number', 'episode_number',
 401         'track_number', 'disc_number', 'release_year',
 402         'playlist_index',
 403     ))
 404
 405     params = None
 406     _ies = []
 407     _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 408     __prepare_filename_warned = False
 409     _download_retcode = None
 410     _num_downloads = None
 411     _playlist_level = 0
 412     _playlist_urls = set()
 413     _screen_file = None
 414
 415     def __init__(self, params=None, auto_init=True):
 416         """Create a FileDownloader object with the given options."""
 417         if params is None:
 418             params = {}
 419         self._ies = []
 420         self._ies_instances = {}
 421         self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 422         self.__prepare_filename_warned = False
 423         self._post_hooks = []
 424         self._progress_hooks = []
 425         self._download_retcode = 0
 426         self._num_downloads = 0
 427         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 428         self._err_file = sys.stderr
 429         self.params = {
 430             # Default parameters
 431             'nocheckcertificate': False,
 432         }
 433         self.params.update(params)
 434         self.cache = Cache(self)
 435         self.archive = set()
 436
 437         """Preload the archive, if any is specified"""
 438         def preload_download_archive(self):
 439             fn = self.params.get('download_archive')
 440             if fn is None:
 441                 return False
 442             try:
 443                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 444                     for line in archive_file:
 445                         self.archive.add(line.strip())
 446             except IOError as ioe:
 447                 if ioe.errno != errno.ENOENT:
 448                     raise
 449                 return False
 450             return True
 451
 452         def check_deprecated(param, option, suggestion):
 453             if self.params.get(param) is not None:
 454                 self.report_warning(
 455                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 456                 return True
 457             return False
 458
 459         if self.params.get('verbose'):
 460             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 461
 462         preload_download_archive(self)
 463
 464         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 465             if self.params.get('geo_verification_proxy') is None:
 466                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 467
 468         if self.params.get('final_ext'):
 469             if self.params.get('merge_output_format'):
 470                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 471             self.params['merge_output_format'] = self.params['final_ext']
 472
 473         if 'overwrites' in self.params and self.params['overwrites'] is None:
 474             del self.params['overwrites']
 475
 476         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 477         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 478         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 479
 480         if params.get('bidi_workaround', False):
 481             try:
 482                 import pty
 483                 master, slave = pty.openpty()
 484                 width = compat_get_terminal_size().columns
 485                 if width is None:
 486                     width_args = []
 487                 else:
 488                     width_args = ['-w', str(width)]
 489                 sp_kwargs = dict(
 490                     stdin=subprocess.PIPE,
 491                     stdout=slave,
 492                     stderr=self._err_file)
 493                 try:
 494                     self._output_process = subprocess.Popen(
 495                         ['bidiv'] + width_args, **sp_kwargs
 496                     )
 497                 except OSError:
 498                     self._output_process = subprocess.Popen(
 499                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 500                 self._output_channel = os.fdopen(master, 'rb')
 501             except OSError as ose:
 502                 if ose.errno == errno.ENOENT:
 503                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 504                 else:
 505                     raise
 506
 507         if (sys.platform != 'win32'
 508                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 509                 and not params.get('restrictfilenames', False)):
 510             # Unicode filesystem API will throw errors (#1474, #13027)
 511             self.report_warning(
 512                 'Assuming --restrict-filenames since file system encoding '
 513                 'cannot encode all characters. '
 514                 'Set the LC_ALL environment variable to fix this.')
 515             self.params['restrictfilenames'] = True
 516
 517         self.outtmpl_dict = self.parse_outtmpl()
 518
 519         self._setup_opener()
 520
 521         if auto_init:
 522             self.print_debug_header()
 523             self.add_default_info_extractors()
 524
 525         for pp_def_raw in self.params.get('postprocessors', []):
 526             pp_class = get_postprocessor(pp_def_raw['key'])
 527             pp_def = dict(pp_def_raw)
 528             del pp_def['key']
 529             if 'when' in pp_def:
 530                 when = pp_def['when']
 531                 del pp_def['when']
 532             else:
 533                 when = 'normal'
 534             pp = pp_class(self, **compat_kwargs(pp_def))
 535             self.add_post_processor(pp, when=when)
 536
 537         for ph in self.params.get('post_hooks', []):
 538             self.add_post_hook(ph)
 539
 540         for ph in self.params.get('progress_hooks', []):
 541             self.add_progress_hook(ph)
 542
 543         register_socks_protocols()
 544
 545     def warn_if_short_id(self, argv):
 546         # short YouTube ID starting with dash?
 547         idxs = [
 548             i for i, a in enumerate(argv)
 549             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 550         if idxs:
 551             correct_argv = (
 552                 ['yt-dlp']
 553                 + [a for i, a in enumerate(argv) if i not in idxs]
 554                 + ['--'] + [argv[i] for i in idxs]
 555             )
 556             self.report_warning(
 557                 'Long argument string detected. '
 558                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 559                 args_to_str(correct_argv))
 560
 561     def add_info_extractor(self, ie):
 562         """Add an InfoExtractor object to the end of the list."""
 563         self._ies.append(ie)
 564         if not isinstance(ie, type):
 565             self._ies_instances[ie.ie_key()] = ie
 566             ie.set_downloader(self)
 567
 568     def get_info_extractor(self, ie_key):
 569         """
 570         Get an instance of an IE with name ie_key, it will try to get one from
 571         the _ies list, if there's no instance it will create a new one and add
 572         it to the extractor list.
 573         """
 574         ie = self._ies_instances.get(ie_key)
 575         if ie is None:
 576             ie = get_info_extractor(ie_key)()
 577             self.add_info_extractor(ie)
 578         return ie
 579
 580     def add_default_info_extractors(self):
 581         """
 582         Add the InfoExtractors returned by gen_extractors to the end of the list
 583         """
 584         for ie in gen_extractor_classes():
 585             self.add_info_extractor(ie)
 586
 587     def add_post_processor(self, pp, when='normal'):
 588         """Add a PostProcessor object to the end of the chain."""
 589         self._pps[when].append(pp)
 590         pp.set_downloader(self)
 591
 592     def add_post_hook(self, ph):
 593         """Add the post hook"""
 594         self._post_hooks.append(ph)
 595
 596     def add_progress_hook(self, ph):
 597         """Add the progress hook (currently only for the file downloader)"""
 598         self._progress_hooks.append(ph)
 599
 600     def _bidi_workaround(self, message):
 601         if not hasattr(self, '_output_channel'):
 602             return message
 603
 604         assert hasattr(self, '_output_process')
 605         assert isinstance(message, compat_str)
 606         line_count = message.count('\n') + 1
 607         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 608         self._output_process.stdin.flush()
 609         res = ''.join(self._output_channel.readline().decode('utf-8')
 610                       for _ in range(line_count))
 611         return res[:-len('\n')]
 612
 613     def to_screen(self, message, skip_eol=False):
 614         """Print message to stdout if not in quiet mode."""
 615         return self.to_stdout(message, skip_eol, check_quiet=True)
 616
 617     def _write_string(self, s, out=None):
 618         write_string(s, out=out, encoding=self.params.get('encoding'))
 619
 620     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 621         """Print message to stdout if not in quiet mode."""
 622         if self.params.get('logger'):
 623             self.params['logger'].debug(message)
 624         elif not check_quiet or not self.params.get('quiet', False):
 625             message = self._bidi_workaround(message)
 626             terminator = ['\n', ''][skip_eol]
 627             output = message + terminator
 628
 629             self._write_string(output, self._screen_file)
 630
 631     def to_stderr(self, message):
 632         """Print message to stderr."""
 633         assert isinstance(message, compat_str)
 634         if self.params.get('logger'):
 635             self.params['logger'].error(message)
 636         else:
 637             message = self._bidi_workaround(message)
 638             output = message + '\n'
 639             self._write_string(output, self._err_file)
 640
 641     def to_console_title(self, message):
 642         if not self.params.get('consoletitle', False):
 643             return
 644         if compat_os_name == 'nt':
 645             if ctypes.windll.kernel32.GetConsoleWindow():
 646                 # c_wchar_p() might not be necessary if `message` is
 647                 # already of type unicode()
 648                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 649         elif 'TERM' in os.environ:
 650             self._write_string('\033]0;%s\007' % message, self._screen_file)
 651
 652     def save_console_title(self):
 653         if not self.params.get('consoletitle', False):
 654             return
 655         if self.params.get('simulate', False):
 656             return
 657         if compat_os_name != 'nt' and 'TERM' in os.environ:
 658             # Save the title on stack
 659             self._write_string('\033[22;0t', self._screen_file)
 660
 661     def restore_console_title(self):
 662         if not self.params.get('consoletitle', False):
 663             return
 664         if self.params.get('simulate', False):
 665             return
 666         if compat_os_name != 'nt' and 'TERM' in os.environ:
 667             # Restore the title from stack
 668             self._write_string('\033[23;0t', self._screen_file)
 669
 670     def __enter__(self):
 671         self.save_console_title()
 672         return self
 673
 674     def __exit__(self, *args):
 675         self.restore_console_title()
 676
 677         if self.params.get('cookiefile') is not None:
 678             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 679
 680     def trouble(self, message=None, tb=None):
 681         """Determine action to take when a download problem appears.
 682
 683         Depending on if the downloader has been configured to ignore
 684         download errors or not, this method may throw an exception or
 685         not when errors are found, after printing the message.
 686
 687         tb, if given, is additional traceback information.
 688         """
 689         if message is not None:
 690             self.to_stderr(message)
 691         if self.params.get('verbose'):
 692             if tb is None:
 693                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 694                     tb = ''
 695                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 696                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 697                     tb += encode_compat_str(traceback.format_exc())
 698                 else:
 699                     tb_data = traceback.format_list(traceback.extract_stack())
 700                     tb = ''.join(tb_data)
 701             self.to_stderr(tb)
 702         if not self.params.get('ignoreerrors', False):
 703             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 704                 exc_info = sys.exc_info()[1].exc_info
 705             else:
 706                 exc_info = sys.exc_info()
 707             raise DownloadError(message, exc_info)
 708         self._download_retcode = 1
 709
 710     def report_warning(self, message):
 711         '''
 712         Print the message to stderr, it will be prefixed with 'WARNING:'
 713         If stderr is a tty file the 'WARNING:' will be colored
 714         '''
 715         if self.params.get('logger') is not None:
 716             self.params['logger'].warning(message)
 717         else:
 718             if self.params.get('no_warnings'):
 719                 return
 720             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 721                 _msg_header = '\033[0;33mWARNING:\033[0m'
 722             else:
 723                 _msg_header = 'WARNING:'
 724             warning_message = '%s %s' % (_msg_header, message)
 725             self.to_stderr(warning_message)
 726
 727     def report_error(self, message, tb=None):
 728         '''
 729         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 730         in red if stderr is a tty file.
 731         '''
 732         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 733             _msg_header = '\033[0;31mERROR:\033[0m'
 734         else:
 735             _msg_header = 'ERROR:'
 736         error_message = '%s %s' % (_msg_header, message)
 737         self.trouble(error_message, tb)
 738
 739     def report_file_already_downloaded(self, file_name):
 740         """Report file has already been fully downloaded."""
 741         try:
 742             self.to_screen('[download] %s has already been downloaded' % file_name)
 743         except UnicodeEncodeError:
 744             self.to_screen('[download] The file has already been downloaded')
 745
 746     def report_file_delete(self, file_name):
 747         """Report that existing file will be deleted."""
 748         try:
 749             self.to_screen('Deleting existing file %s' % file_name)
 750         except UnicodeEncodeError:
 751             self.to_screen('Deleting existing file')
 752
 753     def parse_outtmpl(self):
 754         outtmpl_dict = self.params.get('outtmpl', {})
 755         if not isinstance(outtmpl_dict, dict):
 756             outtmpl_dict = {'default': outtmpl_dict}
 757         outtmpl_dict.update({
 758             k: v for k, v in DEFAULT_OUTTMPL.items()
 759             if not outtmpl_dict.get(k)})
 760         for key, val in outtmpl_dict.items():
 761             if isinstance(val, bytes):
 762                 self.report_warning(
 763                     'Parameter outtmpl is bytes, but should be a unicode string. '
 764                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 765         return outtmpl_dict
 766
 767     def _prepare_filename(self, info_dict, tmpl_type='default'):
 768         try:
 769             template_dict = dict(info_dict)
 770
 771             template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 772                 formatSeconds(info_dict['duration'], '-')
 773                 if info_dict.get('duration', None) is not None
 774                 else None)
 775
 776             template_dict['epoch'] = int(time.time())
 777             autonumber_size = self.params.get('autonumber_size')
 778             if autonumber_size is None:
 779                 autonumber_size = 5
 780             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 781             if template_dict.get('resolution') is None:
 782                 if template_dict.get('width') and template_dict.get('height'):
 783                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 784                 elif template_dict.get('height'):
 785                     template_dict['resolution'] = '%sp' % template_dict['height']
 786                 elif template_dict.get('width'):
 787                     template_dict['resolution'] = '%dx?' % template_dict['width']
 788
 789             sanitize = lambda k, v: sanitize_filename(
 790                 compat_str(v),
 791                 restricted=self.params.get('restrictfilenames'),
 792                 is_id=(k == 'id' or k.endswith('_id')))
 793             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 794                                  for k, v in template_dict.items()
 795                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 796             na = self.params.get('outtmpl_na_placeholder', 'NA')
 797             template_dict = collections.defaultdict(lambda: na, template_dict)
 798
 799             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 800             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 801
 802             # For fields playlist_index and autonumber convert all occurrences
 803             # of %(field)s to %(field)0Nd for backward compatibility
 804             field_size_compat_map = {
 805                 'playlist_index': len(str(template_dict['n_entries'])),
 806                 'autonumber': autonumber_size,
 807             }
 808             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 809             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 810             if mobj:
 811                 outtmpl = re.sub(
 812                     FIELD_SIZE_COMPAT_RE,
 813                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 814                     outtmpl)
 815
 816             # As of [1] format syntax is:
 817             #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 818             # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 819             FORMAT_RE = r'''(?x)
 820                 (?<!%)
 821                 %
 822                 \({0}\)  # mapping key
 823                 (?:[#0\-+ ]+)?  # conversion flags (optional)
 824                 (?:\d+)?  # minimum field width (optional)
 825                 (?:\.\d+)?  # precision (optional)
 826                 [hlL]?  # length modifier (optional)
 827                 (?P<type>[diouxXeEfFgGcrs%])  # conversion type
 828             '''
 829
 830             numeric_fields = list(self._NUMERIC_FIELDS)
 831
 832             # Format date
 833             FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
 834             for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
 835                 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
 836                 if key in template_dict:
 837                     continue
 838                 value = strftime_or_none(template_dict.get(field), frmt, na)
 839                 if conv_type in 'crs':  # string
 840                     value = sanitize(field, value)
 841                 else:  # number
 842                     numeric_fields.append(key)
 843                     value = float_or_none(value, default=None)
 844                 if value is not None:
 845                     template_dict[key] = value
 846
 847             # Missing numeric fields used together with integer presentation types
 848             # in format specification will break the argument substitution since
 849             # string NA placeholder is returned for missing fields. We will patch
 850             # output template for missing fields to meet string presentation type.
 851             for numeric_field in numeric_fields:
 852                 if numeric_field not in template_dict:
 853                     outtmpl = re.sub(
 854                         FORMAT_RE.format(re.escape(numeric_field)),
 855                         r'%({0})s'.format(numeric_field), outtmpl)
 856
 857             # expand_path translates '%%' into '%' and '$$' into '$'
 858             # correspondingly that is not what we want since we need to keep
 859             # '%%' intact for template dict substitution step. Working around
 860             # with boundary-alike separator hack.
 861             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 862             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 863
 864             # outtmpl should be expand_path'ed before template dict substitution
 865             # because meta fields may contain env variables we don't want to
 866             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 867             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 868             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 869
 870             if force_ext is not None:
 871                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 872
 873             # https://github.com/blackjack4494/youtube-dlc/issues/85
 874             trim_file_name = self.params.get('trim_file_name', False)
 875             if trim_file_name:
 876                 fn_groups = filename.rsplit('.')
 877                 ext = fn_groups[-1]
 878                 sub_ext = ''
 879                 if len(fn_groups) > 2:
 880                     sub_ext = fn_groups[-2]
 881                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 882
 883             return filename
 884         except ValueError as err:
 885             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 886             return None
 887
 888     def prepare_filename(self, info_dict, dir_type='', warn=False):
 889         """Generate the output filename."""
 890         paths = self.params.get('paths', {})
 891         assert isinstance(paths, dict)
 892         filename = self._prepare_filename(info_dict, dir_type or 'default')
 893
 894         if warn and not self.__prepare_filename_warned:
 895             if not paths:
 896                 pass
 897             elif filename == '-':
 898                 self.report_warning('--paths is ignored when an outputting to stdout')
 899             elif os.path.isabs(filename):
 900                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 901             self.__prepare_filename_warned = True
 902         if filename == '-' or not filename:
 903             return filename
 904
 905         homepath = expand_path(paths.get('home', '').strip())
 906         assert isinstance(homepath, compat_str)
 907         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 908         assert isinstance(subdir, compat_str)
 909         path = os.path.join(homepath, subdir, filename)
 910
 911         # Temporary fix for #4787
 912         # 'Treat' all problem characters by passing filename through preferredencoding
 913         # to workaround encoding issues with subprocess on python2 @ Windows
 914         if sys.version_info < (3, 0) and sys.platform == 'win32':
 915             path = encodeFilename(path, True).decode(preferredencoding())
 916         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 917
 918     def _match_entry(self, info_dict, incomplete):
 919         """ Returns None if the file should be downloaded """
 920
 921         def check_filter():
 922             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 923             if 'title' in info_dict:
 924                 # This can happen when we're just evaluating the playlist
 925                 title = info_dict['title']
 926                 matchtitle = self.params.get('matchtitle', False)
 927                 if matchtitle:
 928                     if not re.search(matchtitle, title, re.IGNORECASE):
 929                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 930                 rejecttitle = self.params.get('rejecttitle', False)
 931                 if rejecttitle:
 932                     if re.search(rejecttitle, title, re.IGNORECASE):
 933                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 934             date = info_dict.get('upload_date')
 935             if date is not None:
 936                 dateRange = self.params.get('daterange', DateRange())
 937                 if date not in dateRange:
 938                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 939             view_count = info_dict.get('view_count')
 940             if view_count is not None:
 941                 min_views = self.params.get('min_views')
 942                 if min_views is not None and view_count < min_views:
 943                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 944                 max_views = self.params.get('max_views')
 945                 if max_views is not None and view_count > max_views:
 946                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 947             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 948                 return 'Skipping "%s" because it is age restricted' % video_title
 949             if self.in_download_archive(info_dict):
 950                 return '%s has already been recorded in archive' % video_title
 951
 952             if not incomplete:
 953                 match_filter = self.params.get('match_filter')
 954                 if match_filter is not None:
 955                     ret = match_filter(info_dict)
 956                     if ret is not None:
 957                         return ret
 958             return None
 959
 960         reason = check_filter()
 961         if reason is not None:
 962             self.to_screen('[download] ' + reason)
 963             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 964                 raise ExistingVideoReached()
 965             elif self.params.get('break_on_reject', False):
 966                 raise RejectedVideoReached()
 967         return reason
 968
 969     @staticmethod
 970     def add_extra_info(info_dict, extra_info):
 971         '''Set the keys from extra_info in info dict if they are missing'''
 972         for key, value in extra_info.items():
 973             info_dict.setdefault(key, value)
 974
 975     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 976                      process=True, force_generic_extractor=False):
 977         '''
 978         Returns a list with a dictionary for each video we find.
 979         If 'download', also downloads the videos.
 980         extra_info is a dict containing the extra values to add to each result
 981         '''
 982
 983         if not ie_key and force_generic_extractor:
 984             ie_key = 'Generic'
 985
 986         if ie_key:
 987             ies = [self.get_info_extractor(ie_key)]
 988         else:
 989             ies = self._ies
 990
 991         for ie in ies:
 992             if not ie.suitable(url):
 993                 continue
 994
 995             ie_key = ie.ie_key()
 996             ie = self.get_info_extractor(ie_key)
 997             if not ie.working():
 998                 self.report_warning('The program functionality for this site has been marked as broken, '
 999                                     'and will probably not work.')
1000
1001             try:
1002                 temp_id = str_or_none(
1003                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1004                     else ie._match_id(url))
1005             except (AssertionError, IndexError, AttributeError):
1006                 temp_id = None
1007             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1008                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1009                                ie_key, temp_id))
1010                 break
1011             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
1012         else:
1013             self.report_error('no suitable InfoExtractor for URL %s' % url)
1014
1015     def __handle_extraction_exceptions(func):
1016         def wrapper(self, *args, **kwargs):
1017             try:
1018                 return func(self, *args, **kwargs)
1019             except GeoRestrictedError as e:
1020                 msg = e.msg
1021                 if e.countries:
1022                     msg += '\nThis video is available in %s.' % ', '.join(
1023                         map(ISO3166Utils.short2full, e.countries))
1024                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1025                 self.report_error(msg)
1026             except ExtractorError as e:  # An error we somewhat expected
1027                 self.report_error(compat_str(e), e.format_traceback())
1028             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1029                 raise
1030             except Exception as e:
1031                 if self.params.get('ignoreerrors', False):
1032                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1033                 else:
1034                     raise
1035         return wrapper
1036
1037     @__handle_extraction_exceptions
1038     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1039         ie_result = ie.extract(url)
1040         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1041             return
1042         if isinstance(ie_result, list):
1043             # Backwards compatibility: old IE result format
1044             ie_result = {
1045                 '_type': 'compat_list',
1046                 'entries': ie_result,
1047             }
1048         if info_dict:
1049             if info_dict.get('id'):
1050                 ie_result['id'] = info_dict['id']
1051             if info_dict.get('title'):
1052                 ie_result['title'] = info_dict['title']
1053         self.add_default_extra_info(ie_result, ie, url)
1054         if process:
1055             return self.process_ie_result(ie_result, download, extra_info)
1056         else:
1057             return ie_result
1058
1059     def add_default_extra_info(self, ie_result, ie, url):
1060         self.add_extra_info(ie_result, {
1061             'extractor': ie.IE_NAME,
1062             'webpage_url': url,
1063             'webpage_url_basename': url_basename(url),
1064             'extractor_key': ie.ie_key(),
1065         })
1066
1067     def process_ie_result(self, ie_result, download=True, extra_info={}):
1068         """
1069         Take the result of the ie(may be modified) and resolve all unresolved
1070         references (URLs, playlist items).
1071
1072         It will also download the videos if 'download'.
1073         Returns the resolved ie_result.
1074         """
1075         result_type = ie_result.get('_type', 'video')
1076
1077         if result_type in ('url', 'url_transparent'):
1078             ie_result['url'] = sanitize_url(ie_result['url'])
1079             extract_flat = self.params.get('extract_flat', False)
1080             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1081                     or extract_flat is True):
1082                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1083                 return ie_result
1084
1085         if result_type == 'video':
1086             self.add_extra_info(ie_result, extra_info)
1087             return self.process_video_result(ie_result, download=download)
1088         elif result_type == 'url':
1089             # We have to add extra_info to the results because it may be
1090             # contained in a playlist
1091             return self.extract_info(ie_result['url'],
1092                                      download, info_dict=ie_result,
1093                                      ie_key=ie_result.get('ie_key'),
1094                                      extra_info=extra_info)
1095         elif result_type == 'url_transparent':
1096             # Use the information from the embedding page
1097             info = self.extract_info(
1098                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1099                 extra_info=extra_info, download=False, process=False)
1100
1101             # extract_info may return None when ignoreerrors is enabled and
1102             # extraction failed with an error, don't crash and return early
1103             # in this case
1104             if not info:
1105                 return info
1106
1107             force_properties = dict(
1108                 (k, v) for k, v in ie_result.items() if v is not None)
1109             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1110                 if f in force_properties:
1111                     del force_properties[f]
1112             new_result = info.copy()
1113             new_result.update(force_properties)
1114
1115             # Extracted info may not be a video result (i.e.
1116             # info.get('_type', 'video') != video) but rather an url or
1117             # url_transparent. In such cases outer metadata (from ie_result)
1118             # should be propagated to inner one (info). For this to happen
1119             # _type of info should be overridden with url_transparent. This
1120             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1121             if new_result.get('_type') == 'url':
1122                 new_result['_type'] = 'url_transparent'
1123
1124             return self.process_ie_result(
1125                 new_result, download=download, extra_info=extra_info)
1126         elif result_type in ('playlist', 'multi_video'):
1127             # Protect from infinite recursion due to recursively nested playlists
1128             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1129             webpage_url = ie_result['webpage_url']
1130             if webpage_url in self._playlist_urls:
1131                 self.to_screen(
1132                     '[download] Skipping already downloaded playlist: %s'
1133                     % ie_result.get('title') or ie_result.get('id'))
1134                 return
1135
1136             self._playlist_level += 1
1137             self._playlist_urls.add(webpage_url)
1138             try:
1139                 return self.__process_playlist(ie_result, download)
1140             finally:
1141                 self._playlist_level -= 1
1142                 if not self._playlist_level:
1143                     self._playlist_urls.clear()
1144         elif result_type == 'compat_list':
1145             self.report_warning(
1146                 'Extractor %s returned a compat_list result. '
1147                 'It needs to be updated.' % ie_result.get('extractor'))
1148
1149             def _fixup(r):
1150                 self.add_extra_info(
1151                     r,
1152                     {
1153                         'extractor': ie_result['extractor'],
1154                         'webpage_url': ie_result['webpage_url'],
1155                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1156                         'extractor_key': ie_result['extractor_key'],
1157                     }
1158                 )
1159                 return r
1160             ie_result['entries'] = [
1161                 self.process_ie_result(_fixup(r), download, extra_info)
1162                 for r in ie_result['entries']
1163             ]
1164             return ie_result
1165         else:
1166             raise Exception('Invalid result type: %s' % result_type)
1167
1168     def __process_playlist(self, ie_result, download):
1169         # We process each entry in the playlist
1170         playlist = ie_result.get('title') or ie_result.get('id')
1171         self.to_screen('[download] Downloading playlist: %s' % playlist)
1172
1173         if self.params.get('allow_playlist_files', True):
1174             ie_copy = {
1175                 'playlist': playlist,
1176                 'playlist_id': ie_result.get('id'),
1177                 'playlist_title': ie_result.get('title'),
1178                 'playlist_uploader': ie_result.get('uploader'),
1179                 'playlist_uploader_id': ie_result.get('uploader_id'),
1180                 'playlist_index': 0
1181             }
1182             ie_copy.update(dict(ie_result))
1183
1184             def ensure_dir_exists(path):
1185                 return make_dir(path, self.report_error)
1186
1187             if self.params.get('writeinfojson', False):
1188                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1189                 if not ensure_dir_exists(encodeFilename(infofn)):
1190                     return
1191                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1192                     self.to_screen('[info] Playlist metadata is already present')
1193                 else:
1194                     playlist_info = dict(ie_result)
1195                     # playlist_info['entries'] = list(playlist_info['entries'])  # Entries is a generator which shouldnot be resolved here
1196                     del playlist_info['entries']
1197                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1198                     try:
1199                         write_json_file(self.filter_requested_info(playlist_info), infofn)
1200                     except (OSError, IOError):
1201                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1202
1203             if self.params.get('writedescription', False):
1204                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1205                 if not ensure_dir_exists(encodeFilename(descfn)):
1206                     return
1207                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1208                     self.to_screen('[info] Playlist description is already present')
1209                 elif ie_result.get('description') is None:
1210                     self.report_warning('There\'s no playlist description to write.')
1211                 else:
1212                     try:
1213                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1214                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1215                             descfile.write(ie_result['description'])
1216                     except (OSError, IOError):
1217                         self.report_error('Cannot write playlist description file ' + descfn)
1218                         return
1219
1220         playlist_results = []
1221
1222         playliststart = self.params.get('playliststart', 1) - 1
1223         playlistend = self.params.get('playlistend')
1224         # For backwards compatibility, interpret -1 as whole list
1225         if playlistend == -1:
1226             playlistend = None
1227
1228         playlistitems_str = self.params.get('playlist_items')
1229         playlistitems = None
1230         if playlistitems_str is not None:
1231             def iter_playlistitems(format):
1232                 for string_segment in format.split(','):
1233                     if '-' in string_segment:
1234                         start, end = string_segment.split('-')
1235                         for item in range(int(start), int(end) + 1):
1236                             yield int(item)
1237                     else:
1238                         yield int(string_segment)
1239             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1240
1241         ie_entries = ie_result['entries']
1242
1243         def make_playlistitems_entries(list_ie_entries):
1244             num_entries = len(list_ie_entries)
1245             return [
1246                 list_ie_entries[i - 1] for i in playlistitems
1247                 if -num_entries <= i - 1 < num_entries]
1248
1249         def report_download(num_entries):
1250             self.to_screen(
1251                 '[%s] playlist %s: Downloading %d videos' %
1252                 (ie_result['extractor'], playlist, num_entries))
1253
1254         if isinstance(ie_entries, list):
1255             n_all_entries = len(ie_entries)
1256             if playlistitems:
1257                 entries = make_playlistitems_entries(ie_entries)
1258             else:
1259                 entries = ie_entries[playliststart:playlistend]
1260             n_entries = len(entries)
1261             self.to_screen(
1262                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1263                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1264         elif isinstance(ie_entries, PagedList):
1265             if playlistitems:
1266                 entries = []
1267                 for item in playlistitems:
1268                     entries.extend(ie_entries.getslice(
1269                         item - 1, item
1270                     ))
1271             else:
1272                 entries = ie_entries.getslice(
1273                     playliststart, playlistend)
1274             n_entries = len(entries)
1275             report_download(n_entries)
1276         else:  # iterable
1277             if playlistitems:
1278                 entries = make_playlistitems_entries(list(itertools.islice(
1279                     ie_entries, 0, max(playlistitems))))
1280             else:
1281                 entries = list(itertools.islice(
1282                     ie_entries, playliststart, playlistend))
1283             n_entries = len(entries)
1284             report_download(n_entries)
1285
1286         if self.params.get('playlistreverse', False):
1287             entries = entries[::-1]
1288
1289         if self.params.get('playlistrandom', False):
1290             random.shuffle(entries)
1291
1292         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1293
1294         for i, entry in enumerate(entries, 1):
1295             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1296             # This __x_forwarded_for_ip thing is a bit ugly but requires
1297             # minimal changes
1298             if x_forwarded_for:
1299                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1300             extra = {
1301                 'n_entries': n_entries,
1302                 'playlist': playlist,
1303                 'playlist_id': ie_result.get('id'),
1304                 'playlist_title': ie_result.get('title'),
1305                 'playlist_uploader': ie_result.get('uploader'),
1306                 'playlist_uploader_id': ie_result.get('uploader_id'),
1307                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1308                 'extractor': ie_result['extractor'],
1309                 'webpage_url': ie_result['webpage_url'],
1310                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1311                 'extractor_key': ie_result['extractor_key'],
1312             }
1313
1314             if self._match_entry(entry, incomplete=True) is not None:
1315                 continue
1316
1317             entry_result = self.__process_iterable_entry(entry, download, extra)
1318             # TODO: skip failed (empty) entries?
1319             playlist_results.append(entry_result)
1320         ie_result['entries'] = playlist_results
1321         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1322         return ie_result
1323
1324     @__handle_extraction_exceptions
1325     def __process_iterable_entry(self, entry, download, extra_info):
1326         return self.process_ie_result(
1327             entry, download=download, extra_info=extra_info)
1328
1329     def _build_format_filter(self, filter_spec):
1330         " Returns a function to filter the formats according to the filter_spec "
1331
1332         OPERATORS = {
1333             '<': operator.lt,
1334             '<=': operator.le,
1335             '>': operator.gt,
1336             '>=': operator.ge,
1337             '=': operator.eq,
1338             '!=': operator.ne,
1339         }
1340         operator_rex = re.compile(r'''(?x)\s*
1341             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1342             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1343             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1344             $
1345             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1346         m = operator_rex.search(filter_spec)
1347         if m:
1348             try:
1349                 comparison_value = int(m.group('value'))
1350             except ValueError:
1351                 comparison_value = parse_filesize(m.group('value'))
1352                 if comparison_value is None:
1353                     comparison_value = parse_filesize(m.group('value') + 'B')
1354                 if comparison_value is None:
1355                     raise ValueError(
1356                         'Invalid value %r in format specification %r' % (
1357                             m.group('value'), filter_spec))
1358             op = OPERATORS[m.group('op')]
1359
1360         if not m:
1361             STR_OPERATORS = {
1362                 '=': operator.eq,
1363                 '^=': lambda attr, value: attr.startswith(value),
1364                 '$=': lambda attr, value: attr.endswith(value),
1365                 '*=': lambda attr, value: value in attr,
1366             }
1367             str_operator_rex = re.compile(r'''(?x)
1368                 \s*(?P<key>[a-zA-Z0-9._-]+)
1369                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1370                 \s*(?P<value>[a-zA-Z0-9._-]+)
1371                 \s*$
1372                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1373             m = str_operator_rex.search(filter_spec)
1374             if m:
1375                 comparison_value = m.group('value')
1376                 str_op = STR_OPERATORS[m.group('op')]
1377                 if m.group('negation'):
1378                     op = lambda attr, value: not str_op(attr, value)
1379                 else:
1380                     op = str_op
1381
1382         if not m:
1383             raise ValueError('Invalid filter specification %r' % filter_spec)
1384
1385         def _filter(f):
1386             actual_value = f.get(m.group('key'))
1387             if actual_value is None:
1388                 return m.group('none_inclusive')
1389             return op(actual_value, comparison_value)
1390         return _filter
1391
1392     def _default_format_spec(self, info_dict, download=True):
1393
1394         def can_merge():
1395             merger = FFmpegMergerPP(self)
1396             return merger.available and merger.can_merge()
1397
1398         prefer_best = (
1399             not self.params.get('simulate', False)
1400             and download
1401             and (
1402                 not can_merge()
1403                 or info_dict.get('is_live', False)
1404                 or self.outtmpl_dict['default'] == '-'))
1405
1406         return (
1407             'best/bestvideo+bestaudio'
1408             if prefer_best
1409             else 'bestvideo*+bestaudio/best'
1410             if not self.params.get('allow_multiple_audio_streams', False)
1411             else 'bestvideo+bestaudio/best')
1412
1413     def build_format_selector(self, format_spec):
1414         def syntax_error(note, start):
1415             message = (
1416                 'Invalid format specification: '
1417                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1418             return SyntaxError(message)
1419
1420         PICKFIRST = 'PICKFIRST'
1421         MERGE = 'MERGE'
1422         SINGLE = 'SINGLE'
1423         GROUP = 'GROUP'
1424         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1425
1426         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1427                                   'video': self.params.get('allow_multiple_video_streams', False)}
1428
1429         def _parse_filter(tokens):
1430             filter_parts = []
1431             for type, string, start, _, _ in tokens:
1432                 if type == tokenize.OP and string == ']':
1433                     return ''.join(filter_parts)
1434                 else:
1435                     filter_parts.append(string)
1436
1437         def _remove_unused_ops(tokens):
1438             # Remove operators that we don't use and join them with the surrounding strings
1439             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1440             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1441             last_string, last_start, last_end, last_line = None, None, None, None
1442             for type, string, start, end, line in tokens:
1443                 if type == tokenize.OP and string == '[':
1444                     if last_string:
1445                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1446                         last_string = None
1447                     yield type, string, start, end, line
1448                     # everything inside brackets will be handled by _parse_filter
1449                     for type, string, start, end, line in tokens:
1450                         yield type, string, start, end, line
1451                         if type == tokenize.OP and string == ']':
1452                             break
1453                 elif type == tokenize.OP and string in ALLOWED_OPS:
1454                     if last_string:
1455                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1456                         last_string = None
1457                     yield type, string, start, end, line
1458                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1459                     if not last_string:
1460                         last_string = string
1461                         last_start = start
1462                         last_end = end
1463                     else:
1464                         last_string += string
1465             if last_string:
1466                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1467
1468         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1469             selectors = []
1470             current_selector = None
1471             for type, string, start, _, _ in tokens:
1472                 # ENCODING is only defined in python 3.x
1473                 if type == getattr(tokenize, 'ENCODING', None):
1474                     continue
1475                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1476                     current_selector = FormatSelector(SINGLE, string, [])
1477                 elif type == tokenize.OP:
1478                     if string == ')':
1479                         if not inside_group:
1480                             # ')' will be handled by the parentheses group
1481                             tokens.restore_last_token()
1482                         break
1483                     elif inside_merge and string in ['/', ',']:
1484                         tokens.restore_last_token()
1485                         break
1486                     elif inside_choice and string == ',':
1487                         tokens.restore_last_token()
1488                         break
1489                     elif string == ',':
1490                         if not current_selector:
1491                             raise syntax_error('"," must follow a format selector', start)
1492                         selectors.append(current_selector)
1493                         current_selector = None
1494                     elif string == '/':
1495                         if not current_selector:
1496                             raise syntax_error('"/" must follow a format selector', start)
1497                         first_choice = current_selector
1498                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1499                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1500                     elif string == '[':
1501                         if not current_selector:
1502                             current_selector = FormatSelector(SINGLE, 'best', [])
1503                         format_filter = _parse_filter(tokens)
1504                         current_selector.filters.append(format_filter)
1505                     elif string == '(':
1506                         if current_selector:
1507                             raise syntax_error('Unexpected "("', start)
1508                         group = _parse_format_selection(tokens, inside_group=True)
1509                         current_selector = FormatSelector(GROUP, group, [])
1510                     elif string == '+':
1511                         if not current_selector:
1512                             raise syntax_error('Unexpected "+"', start)
1513                         selector_1 = current_selector
1514                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1515                         if not selector_2:
1516                             raise syntax_error('Expected a selector', start)
1517                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1518                     else:
1519                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1520                 elif type == tokenize.ENDMARKER:
1521                     break
1522             if current_selector:
1523                 selectors.append(current_selector)
1524             return selectors
1525
1526         def _build_selector_function(selector):
1527             if isinstance(selector, list):  # ,
1528                 fs = [_build_selector_function(s) for s in selector]
1529
1530                 def selector_function(ctx):
1531                     for f in fs:
1532                         for format in f(ctx):
1533                             yield format
1534                 return selector_function
1535
1536             elif selector.type == GROUP:  # ()
1537                 selector_function = _build_selector_function(selector.selector)
1538
1539             elif selector.type == PICKFIRST:  # /
1540                 fs = [_build_selector_function(s) for s in selector.selector]
1541
1542                 def selector_function(ctx):
1543                     for f in fs:
1544                         picked_formats = list(f(ctx))
1545                         if picked_formats:
1546                             return picked_formats
1547                     return []
1548
1549             elif selector.type == SINGLE:  # atom
1550                 format_spec = selector.selector if selector.selector is not None else 'best'
1551
1552                 if format_spec == 'all':
1553                     def selector_function(ctx):
1554                         formats = list(ctx['formats'])
1555                         if formats:
1556                             for f in formats:
1557                                 yield f
1558
1559                 else:
1560                     format_fallback = False
1561                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1562                     if format_spec_obj is not None:
1563                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1564                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1565                         not_format_type = 'v' if format_type == 'a' else 'a'
1566                         format_modified = format_spec_obj.group(3) is not None
1567
1568                         format_fallback = not format_type and not format_modified  # for b, w
1569                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1570                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1571                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1572                                     if format_type  # bv, ba, wv, wa
1573                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1574                                     if not format_modified  # b, w
1575                                     else None)  # b*, w*
1576                     else:
1577                         format_idx = -1
1578                         filter_f = ((lambda f: f.get('ext') == format_spec)
1579                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1580                                     else (lambda f: f.get('format_id') == format_spec))  # id
1581
1582                     def selector_function(ctx):
1583                         formats = list(ctx['formats'])
1584                         if not formats:
1585                             return
1586                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1587                         if matches:
1588                             yield matches[format_idx]
1589                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1590                             # for extractors with incomplete formats (audio only (soundcloud)
1591                             # or video only (imgur)) best/worst will fallback to
1592                             # best/worst {video,audio}-only format
1593                             yield formats[format_idx]
1594
1595             elif selector.type == MERGE:        # +
1596                 def _merge(formats_pair):
1597                     format_1, format_2 = formats_pair
1598
1599                     formats_info = []
1600                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1601                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1602
1603                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1604                         get_no_more = {"video": False, "audio": False}
1605                         for (i, fmt_info) in enumerate(formats_info):
1606                             for aud_vid in ["audio", "video"]:
1607                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1608                                     if get_no_more[aud_vid]:
1609                                         formats_info.pop(i)
1610                                     get_no_more[aud_vid] = True
1611
1612                     if len(formats_info) == 1:
1613                         return formats_info[0]
1614
1615                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1616                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1617
1618                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1619                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1620
1621                     output_ext = self.params.get('merge_output_format')
1622                     if not output_ext:
1623                         if the_only_video:
1624                             output_ext = the_only_video['ext']
1625                         elif the_only_audio and not video_fmts:
1626                             output_ext = the_only_audio['ext']
1627                         else:
1628                             output_ext = 'mkv'
1629
1630                     new_dict = {
1631                         'requested_formats': formats_info,
1632                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1633                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1634                         'ext': output_ext,
1635                     }
1636
1637                     if the_only_video:
1638                         new_dict.update({
1639                             'width': the_only_video.get('width'),
1640                             'height': the_only_video.get('height'),
1641                             'resolution': the_only_video.get('resolution'),
1642                             'fps': the_only_video.get('fps'),
1643                             'vcodec': the_only_video.get('vcodec'),
1644                             'vbr': the_only_video.get('vbr'),
1645                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1646                         })
1647
1648                     if the_only_audio:
1649                         new_dict.update({
1650                             'acodec': the_only_audio.get('acodec'),
1651                             'abr': the_only_audio.get('abr'),
1652                         })
1653
1654                     return new_dict
1655
1656                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1657
1658                 def selector_function(ctx):
1659                     for pair in itertools.product(
1660                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1661                         yield _merge(pair)
1662
1663             filters = [self._build_format_filter(f) for f in selector.filters]
1664
1665             def final_selector(ctx):
1666                 ctx_copy = copy.deepcopy(ctx)
1667                 for _filter in filters:
1668                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1669                 return selector_function(ctx_copy)
1670             return final_selector
1671
1672         stream = io.BytesIO(format_spec.encode('utf-8'))
1673         try:
1674             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1675         except tokenize.TokenError:
1676             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1677
1678         class TokenIterator(object):
1679             def __init__(self, tokens):
1680                 self.tokens = tokens
1681                 self.counter = 0
1682
1683             def __iter__(self):
1684                 return self
1685
1686             def __next__(self):
1687                 if self.counter >= len(self.tokens):
1688                     raise StopIteration()
1689                 value = self.tokens[self.counter]
1690                 self.counter += 1
1691                 return value
1692
1693             next = __next__
1694
1695             def restore_last_token(self):
1696                 self.counter -= 1
1697
1698         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1699         return _build_selector_function(parsed_selector)
1700
1701     def _calc_headers(self, info_dict):
1702         res = std_headers.copy()
1703
1704         add_headers = info_dict.get('http_headers')
1705         if add_headers:
1706             res.update(add_headers)
1707
1708         cookies = self._calc_cookies(info_dict)
1709         if cookies:
1710             res['Cookie'] = cookies
1711
1712         if 'X-Forwarded-For' not in res:
1713             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1714             if x_forwarded_for_ip:
1715                 res['X-Forwarded-For'] = x_forwarded_for_ip
1716
1717         return res
1718
1719     def _calc_cookies(self, info_dict):
1720         pr = sanitized_Request(info_dict['url'])
1721         self.cookiejar.add_cookie_header(pr)
1722         return pr.get_header('Cookie')
1723
1724     def process_video_result(self, info_dict, download=True):
1725         assert info_dict.get('_type', 'video') == 'video'
1726
1727         if 'id' not in info_dict:
1728             raise ExtractorError('Missing "id" field in extractor result')
1729         if 'title' not in info_dict:
1730             raise ExtractorError('Missing "title" field in extractor result')
1731
1732         def report_force_conversion(field, field_not, conversion):
1733             self.report_warning(
1734                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1735                 % (field, field_not, conversion))
1736
1737         def sanitize_string_field(info, string_field):
1738             field = info.get(string_field)
1739             if field is None or isinstance(field, compat_str):
1740                 return
1741             report_force_conversion(string_field, 'a string', 'string')
1742             info[string_field] = compat_str(field)
1743
1744         def sanitize_numeric_fields(info):
1745             for numeric_field in self._NUMERIC_FIELDS:
1746                 field = info.get(numeric_field)
1747                 if field is None or isinstance(field, compat_numeric_types):
1748                     continue
1749                 report_force_conversion(numeric_field, 'numeric', 'int')
1750                 info[numeric_field] = int_or_none(field)
1751
1752         sanitize_string_field(info_dict, 'id')
1753         sanitize_numeric_fields(info_dict)
1754
1755         if 'playlist' not in info_dict:
1756             # It isn't part of a playlist
1757             info_dict['playlist'] = None
1758             info_dict['playlist_index'] = None
1759
1760         thumbnails = info_dict.get('thumbnails')
1761         if thumbnails is None:
1762             thumbnail = info_dict.get('thumbnail')
1763             if thumbnail:
1764                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1765         if thumbnails:
1766             thumbnails.sort(key=lambda t: (
1767                 t.get('preference') if t.get('preference') is not None else -1,
1768                 t.get('width') if t.get('width') is not None else -1,
1769                 t.get('height') if t.get('height') is not None else -1,
1770                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1771             for i, t in enumerate(thumbnails):
1772                 t['url'] = sanitize_url(t['url'])
1773                 if t.get('width') and t.get('height'):
1774                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1775                 if t.get('id') is None:
1776                     t['id'] = '%d' % i
1777
1778         if self.params.get('list_thumbnails'):
1779             self.list_thumbnails(info_dict)
1780             return
1781
1782         thumbnail = info_dict.get('thumbnail')
1783         if thumbnail:
1784             info_dict['thumbnail'] = sanitize_url(thumbnail)
1785         elif thumbnails:
1786             info_dict['thumbnail'] = thumbnails[-1]['url']
1787
1788         if 'display_id' not in info_dict and 'id' in info_dict:
1789             info_dict['display_id'] = info_dict['id']
1790
1791         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1792             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1793             # see http://bugs.python.org/issue1646728)
1794             try:
1795                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1796                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1797             except (ValueError, OverflowError, OSError):
1798                 pass
1799
1800         # Auto generate title fields corresponding to the *_number fields when missing
1801         # in order to always have clean titles. This is very common for TV series.
1802         for field in ('chapter', 'season', 'episode'):
1803             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1804                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1805
1806         for cc_kind in ('subtitles', 'automatic_captions'):
1807             cc = info_dict.get(cc_kind)
1808             if cc:
1809                 for _, subtitle in cc.items():
1810                     for subtitle_format in subtitle:
1811                         if subtitle_format.get('url'):
1812                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1813                         if subtitle_format.get('ext') is None:
1814                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1815
1816         automatic_captions = info_dict.get('automatic_captions')
1817         subtitles = info_dict.get('subtitles')
1818
1819         if self.params.get('listsubtitles', False):
1820             if 'automatic_captions' in info_dict:
1821                 self.list_subtitles(
1822                     info_dict['id'], automatic_captions, 'automatic captions')
1823             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1824             return
1825
1826         info_dict['requested_subtitles'] = self.process_subtitles(
1827             info_dict['id'], subtitles, automatic_captions)
1828
1829         # We now pick which formats have to be downloaded
1830         if info_dict.get('formats') is None:
1831             # There's only one format available
1832             formats = [info_dict]
1833         else:
1834             formats = info_dict['formats']
1835
1836         if not formats:
1837             raise ExtractorError('No video formats found!')
1838
1839         def is_wellformed(f):
1840             url = f.get('url')
1841             if not url:
1842                 self.report_warning(
1843                     '"url" field is missing or empty - skipping format, '
1844                     'there is an error in extractor')
1845                 return False
1846             if isinstance(url, bytes):
1847                 sanitize_string_field(f, 'url')
1848             return True
1849
1850         # Filter out malformed formats for better extraction robustness
1851         formats = list(filter(is_wellformed, formats))
1852
1853         formats_dict = {}
1854
1855         # We check that all the formats have the format and format_id fields
1856         for i, format in enumerate(formats):
1857             sanitize_string_field(format, 'format_id')
1858             sanitize_numeric_fields(format)
1859             format['url'] = sanitize_url(format['url'])
1860             if not format.get('format_id'):
1861                 format['format_id'] = compat_str(i)
1862             else:
1863                 # Sanitize format_id from characters used in format selector expression
1864                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1865             format_id = format['format_id']
1866             if format_id not in formats_dict:
1867                 formats_dict[format_id] = []
1868             formats_dict[format_id].append(format)
1869
1870         # Make sure all formats have unique format_id
1871         for format_id, ambiguous_formats in formats_dict.items():
1872             if len(ambiguous_formats) > 1:
1873                 for i, format in enumerate(ambiguous_formats):
1874                     format['format_id'] = '%s-%d' % (format_id, i)
1875
1876         for i, format in enumerate(formats):
1877             if format.get('format') is None:
1878                 format['format'] = '{id} - {res}{note}'.format(
1879                     id=format['format_id'],
1880                     res=self.format_resolution(format),
1881                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1882                 )
1883             # Automatically determine file extension if missing
1884             if format.get('ext') is None:
1885                 format['ext'] = determine_ext(format['url']).lower()
1886             # Automatically determine protocol if missing (useful for format
1887             # selection purposes)
1888             if format.get('protocol') is None:
1889                 format['protocol'] = determine_protocol(format)
1890             # Add HTTP headers, so that external programs can use them from the
1891             # json output
1892             full_format_info = info_dict.copy()
1893             full_format_info.update(format)
1894             format['http_headers'] = self._calc_headers(full_format_info)
1895         # Remove private housekeeping stuff
1896         if '__x_forwarded_for_ip' in info_dict:
1897             del info_dict['__x_forwarded_for_ip']
1898
1899         # TODO Central sorting goes here
1900
1901         if formats[0] is not info_dict:
1902             # only set the 'formats' fields if the original info_dict list them
1903             # otherwise we end up with a circular reference, the first (and unique)
1904             # element in the 'formats' field in info_dict is info_dict itself,
1905             # which can't be exported to json
1906             info_dict['formats'] = formats
1907         if self.params.get('listformats'):
1908             self.list_formats(info_dict)
1909             return
1910
1911         req_format = self.params.get('format')
1912         if req_format is None:
1913             req_format = self._default_format_spec(info_dict, download=download)
1914             if self.params.get('verbose'):
1915                 self.to_screen('[debug] Default format spec: %s' % req_format)
1916
1917         format_selector = self.build_format_selector(req_format)
1918
1919         # While in format selection we may need to have an access to the original
1920         # format set in order to calculate some metrics or do some processing.
1921         # For now we need to be able to guess whether original formats provided
1922         # by extractor are incomplete or not (i.e. whether extractor provides only
1923         # video-only or audio-only formats) for proper formats selection for
1924         # extractors with such incomplete formats (see
1925         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1926         # Since formats may be filtered during format selection and may not match
1927         # the original formats the results may be incorrect. Thus original formats
1928         # or pre-calculated metrics should be passed to format selection routines
1929         # as well.
1930         # We will pass a context object containing all necessary additional data
1931         # instead of just formats.
1932         # This fixes incorrect format selection issue (see
1933         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1934         incomplete_formats = (
1935             # All formats are video-only or
1936             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1937             # all formats are audio-only
1938             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1939
1940         ctx = {
1941             'formats': formats,
1942             'incomplete_formats': incomplete_formats,
1943         }
1944
1945         formats_to_download = list(format_selector(ctx))
1946         if not formats_to_download:
1947             raise ExtractorError('requested format not available',
1948                                  expected=True)
1949
1950         if download:
1951             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1952             if len(formats_to_download) > 1:
1953                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1954             for format in formats_to_download:
1955                 new_info = dict(info_dict)
1956                 new_info.update(format)
1957                 self.process_info(new_info)
1958         # We update the info dict with the best quality format (backwards compatibility)
1959         info_dict.update(formats_to_download[-1])
1960         return info_dict
1961
1962     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1963         """Select the requested subtitles and their format"""
1964         available_subs = {}
1965         if normal_subtitles and self.params.get('writesubtitles'):
1966             available_subs.update(normal_subtitles)
1967         if automatic_captions and self.params.get('writeautomaticsub'):
1968             for lang, cap_info in automatic_captions.items():
1969                 if lang not in available_subs:
1970                     available_subs[lang] = cap_info
1971
1972         if (not self.params.get('writesubtitles') and not
1973                 self.params.get('writeautomaticsub') or not
1974                 available_subs):
1975             return None
1976
1977         if self.params.get('allsubtitles', False):
1978             requested_langs = available_subs.keys()
1979         else:
1980             if self.params.get('subtitleslangs', False):
1981                 requested_langs = self.params.get('subtitleslangs')
1982             elif 'en' in available_subs:
1983                 requested_langs = ['en']
1984             else:
1985                 requested_langs = [list(available_subs.keys())[0]]
1986
1987         formats_query = self.params.get('subtitlesformat', 'best')
1988         formats_preference = formats_query.split('/') if formats_query else []
1989         subs = {}
1990         for lang in requested_langs:
1991             formats = available_subs.get(lang)
1992             if formats is None:
1993                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1994                 continue
1995             for ext in formats_preference:
1996                 if ext == 'best':
1997                     f = formats[-1]
1998                     break
1999                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2000                 if matches:
2001                     f = matches[-1]
2002                     break
2003             else:
2004                 f = formats[-1]
2005                 self.report_warning(
2006                     'No subtitle format found matching "%s" for language %s, '
2007                     'using %s' % (formats_query, lang, f['ext']))
2008             subs[lang] = f
2009         return subs
2010
2011     def __forced_printings(self, info_dict, filename, incomplete):
2012         def print_mandatory(field):
2013             if (self.params.get('force%s' % field, False)
2014                     and (not incomplete or info_dict.get(field) is not None)):
2015                 self.to_stdout(info_dict[field])
2016
2017         def print_optional(field):
2018             if (self.params.get('force%s' % field, False)
2019                     and info_dict.get(field) is not None):
2020                 self.to_stdout(info_dict[field])
2021
2022         print_mandatory('title')
2023         print_mandatory('id')
2024         if self.params.get('forceurl', False) and not incomplete:
2025             if info_dict.get('requested_formats') is not None:
2026                 for f in info_dict['requested_formats']:
2027                     self.to_stdout(f['url'] + f.get('play_path', ''))
2028             else:
2029                 # For RTMP URLs, also include the playpath
2030                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2031         print_optional('thumbnail')
2032         print_optional('description')
2033         if self.params.get('forcefilename', False) and filename is not None:
2034             self.to_stdout(filename)
2035         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2036             self.to_stdout(formatSeconds(info_dict['duration']))
2037         print_mandatory('format')
2038         if self.params.get('forcejson', False):
2039             self.to_stdout(json.dumps(info_dict))
2040
2041     def process_info(self, info_dict):
2042         """Process a single resolved IE result."""
2043
2044         assert info_dict.get('_type', 'video') == 'video'
2045
2046         info_dict.setdefault('__postprocessors', [])
2047
2048         max_downloads = self.params.get('max_downloads')
2049         if max_downloads is not None:
2050             if self._num_downloads >= int(max_downloads):
2051                 raise MaxDownloadsReached()
2052
2053         # TODO: backward compatibility, to be removed
2054         info_dict['fulltitle'] = info_dict['title']
2055
2056         if 'format' not in info_dict:
2057             info_dict['format'] = info_dict['ext']
2058
2059         if self._match_entry(info_dict, incomplete=False) is not None:
2060             return
2061
2062         self._num_downloads += 1
2063
2064         info_dict = self.pre_process(info_dict)
2065
2066         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2067         temp_filename = self.prepare_filename(info_dict, 'temp')
2068         files_to_move = {}
2069         skip_dl = self.params.get('skip_download', False)
2070
2071         # Forced printings
2072         self.__forced_printings(info_dict, full_filename, incomplete=False)
2073
2074         if self.params.get('simulate', False):
2075             if self.params.get('force_write_download_archive', False):
2076                 self.record_download_archive(info_dict)
2077
2078             # Do nothing else if in simulate mode
2079             return
2080
2081         if full_filename is None:
2082             return
2083
2084         def ensure_dir_exists(path):
2085             return make_dir(path, self.report_error)
2086
2087         if not ensure_dir_exists(encodeFilename(full_filename)):
2088             return
2089         if not ensure_dir_exists(encodeFilename(temp_filename)):
2090             return
2091
2092         if self.params.get('writedescription', False):
2093             descfn = self.prepare_filename(info_dict, 'description')
2094             if not ensure_dir_exists(encodeFilename(descfn)):
2095                 return
2096             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2097                 self.to_screen('[info] Video description is already present')
2098             elif info_dict.get('description') is None:
2099                 self.report_warning('There\'s no description to write.')
2100             else:
2101                 try:
2102                     self.to_screen('[info] Writing video description to: ' + descfn)
2103                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2104                         descfile.write(info_dict['description'])
2105                 except (OSError, IOError):
2106                     self.report_error('Cannot write description file ' + descfn)
2107                     return
2108
2109         if self.params.get('writeannotations', False):
2110             annofn = self.prepare_filename(info_dict, 'annotation')
2111             if not ensure_dir_exists(encodeFilename(annofn)):
2112                 return
2113             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2114                 self.to_screen('[info] Video annotations are already present')
2115             elif not info_dict.get('annotations'):
2116                 self.report_warning('There are no annotations to write.')
2117             else:
2118                 try:
2119                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2120                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2121                         annofile.write(info_dict['annotations'])
2122                 except (KeyError, TypeError):
2123                     self.report_warning('There are no annotations to write.')
2124                 except (OSError, IOError):
2125                     self.report_error('Cannot write annotations file: ' + annofn)
2126                     return
2127
2128         def dl(name, info, subtitle=False):
2129             fd = get_suitable_downloader(info, self.params)(self, self.params)
2130             for ph in self._progress_hooks:
2131                 fd.add_progress_hook(ph)
2132             if self.params.get('verbose'):
2133                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2134             return fd.download(name, info, subtitle)
2135
2136         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2137                                        self.params.get('writeautomaticsub')])
2138
2139         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2140             # subtitles download errors are already managed as troubles in relevant IE
2141             # that way it will silently go on when used with unsupporting IE
2142             subtitles = info_dict['requested_subtitles']
2143             # ie = self.get_info_extractor(info_dict['extractor_key'])
2144             for sub_lang, sub_info in subtitles.items():
2145                 sub_format = sub_info['ext']
2146                 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2147                 sub_filename = subtitles_filename(
2148                     temp_filename if not skip_dl else sub_fn,
2149                     sub_lang, sub_format, info_dict.get('ext'))
2150                 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
2151                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2152                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2153                     files_to_move[sub_filename] = sub_filename_final
2154                 else:
2155                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2156                     if sub_info.get('data') is not None:
2157                         try:
2158                             # Use newline='' to prevent conversion of newline characters
2159                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2160                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2161                                 subfile.write(sub_info['data'])
2162                             files_to_move[sub_filename] = sub_filename_final
2163                         except (OSError, IOError):
2164                             self.report_error('Cannot write subtitles file ' + sub_filename)
2165                             return
2166                     else:
2167                         try:
2168                             dl(sub_filename, sub_info, subtitle=True)
2169                             '''
2170                             if self.params.get('sleep_interval_subtitles', False):
2171                                 dl(sub_filename, sub_info)
2172                             else:
2173                                 sub_data = ie._request_webpage(
2174                                     sub_info['url'], info_dict['id'], note=False).read()
2175                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2176                                     subfile.write(sub_data)
2177                             '''
2178                             files_to_move[sub_filename] = sub_filename_final
2179                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2180                             self.report_warning('Unable to download subtitle for "%s": %s' %
2181                                                 (sub_lang, error_to_compat_str(err)))
2182                             continue
2183
2184         if skip_dl:
2185             if self.params.get('convertsubtitles', False):
2186                 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2187                 filename_real_ext = os.path.splitext(full_filename)[1][1:]
2188                 filename_wo_ext = (
2189                     os.path.splitext(full_filename)[0]
2190                     if filename_real_ext == info_dict['ext']
2191                     else full_filename)
2192                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2193                 # if subconv.available:
2194                 #     info_dict['__postprocessors'].append(subconv)
2195                 if os.path.exists(encodeFilename(afilename)):
2196                     self.to_screen(
2197                         '[download] %s has already been downloaded and '
2198                         'converted' % afilename)
2199                 else:
2200                     try:
2201                         self.post_process(full_filename, info_dict, files_to_move)
2202                     except PostProcessingError as err:
2203                         self.report_error('Postprocessing: %s' % str(err))
2204                         return
2205
2206         if self.params.get('writeinfojson', False):
2207             infofn = self.prepare_filename(info_dict, 'infojson')
2208             if not ensure_dir_exists(encodeFilename(infofn)):
2209                 return
2210             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2211                 self.to_screen('[info] Video metadata is already present')
2212             else:
2213                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2214                 try:
2215                     write_json_file(self.filter_requested_info(info_dict), infofn)
2216                 except (OSError, IOError):
2217                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2218                     return
2219             info_dict['__infojson_filename'] = infofn
2220
2221         thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2222         thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2223         for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2224             thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2225             thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2226             files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
2227
2228         # Write internet shortcut files
2229         url_link = webloc_link = desktop_link = False
2230         if self.params.get('writelink', False):
2231             if sys.platform == "darwin":  # macOS.
2232                 webloc_link = True
2233             elif sys.platform.startswith("linux"):
2234                 desktop_link = True
2235             else:  # if sys.platform in ['win32', 'cygwin']:
2236                 url_link = True
2237         if self.params.get('writeurllink', False):
2238             url_link = True
2239         if self.params.get('writewebloclink', False):
2240             webloc_link = True
2241         if self.params.get('writedesktoplink', False):
2242             desktop_link = True
2243
2244         if url_link or webloc_link or desktop_link:
2245             if 'webpage_url' not in info_dict:
2246                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2247                 return
2248             ascii_url = iri_to_uri(info_dict['webpage_url'])
2249
2250         def _write_link_file(extension, template, newline, embed_filename):
2251             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2252             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2253                 self.to_screen('[info] Internet shortcut is already present')
2254             else:
2255                 try:
2256                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2257                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2258                         template_vars = {'url': ascii_url}
2259                         if embed_filename:
2260                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2261                         linkfile.write(template % template_vars)
2262                 except (OSError, IOError):
2263                     self.report_error('Cannot write internet shortcut ' + linkfn)
2264                     return False
2265             return True
2266
2267         if url_link:
2268             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2269                 return
2270         if webloc_link:
2271             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2272                 return
2273         if desktop_link:
2274             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2275                 return
2276
2277         # Download
2278         must_record_download_archive = False
2279         if not skip_dl:
2280             try:
2281
2282                 def existing_file(*filepaths):
2283                     ext = info_dict.get('ext')
2284                     final_ext = self.params.get('final_ext', ext)
2285                     existing_files = []
2286                     for file in orderedSet(filepaths):
2287                         if final_ext != ext:
2288                             converted = replace_extension(file, final_ext, ext)
2289                             if os.path.exists(encodeFilename(converted)):
2290                                 existing_files.append(converted)
2291                         if os.path.exists(encodeFilename(file)):
2292                             existing_files.append(file)
2293
2294                     if not existing_files or self.params.get('overwrites', False):
2295                         for file in orderedSet(existing_files):
2296                             self.report_file_delete(file)
2297                             os.remove(encodeFilename(file))
2298                         return None
2299
2300                     self.report_file_already_downloaded(existing_files[0])
2301                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2302                     return existing_files[0]
2303
2304                 success = True
2305                 if info_dict.get('requested_formats') is not None:
2306                     downloaded = []
2307                     merger = FFmpegMergerPP(self)
2308                     if self.params.get('allow_unplayable_formats'):
2309                         self.report_warning(
2310                             'You have requested merging of multiple formats '
2311                             'while also allowing unplayable formats to be downloaded. '
2312                             'The formats won\'t be merged to prevent data corruption.')
2313                     elif not merger.available:
2314                         self.report_warning(
2315                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2316                             'The formats won\'t be merged.')
2317
2318                     def compatible_formats(formats):
2319                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2320                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2321                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2322                         if len(video_formats) > 2 or len(audio_formats) > 2:
2323                             return False
2324
2325                         # Check extension
2326                         exts = set(format.get('ext') for format in formats)
2327                         COMPATIBLE_EXTS = (
2328                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2329                             set(('webm',)),
2330                         )
2331                         for ext_sets in COMPATIBLE_EXTS:
2332                             if ext_sets.issuperset(exts):
2333                                 return True
2334                         # TODO: Check acodec/vcodec
2335                         return False
2336
2337                     requested_formats = info_dict['requested_formats']
2338                     old_ext = info_dict['ext']
2339                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2340                         info_dict['ext'] = 'mkv'
2341                         self.report_warning(
2342                             'Requested formats are incompatible for merge and will be merged into mkv.')
2343
2344                     def correct_ext(filename):
2345                         filename_real_ext = os.path.splitext(filename)[1][1:]
2346                         filename_wo_ext = (
2347                             os.path.splitext(filename)[0]
2348                             if filename_real_ext == old_ext
2349                             else filename)
2350                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2351
2352                     # Ensure filename always has a correct extension for successful merge
2353                     full_filename = correct_ext(full_filename)
2354                     temp_filename = correct_ext(temp_filename)
2355                     dl_filename = existing_file(full_filename, temp_filename)
2356                     info_dict['__real_download'] = False
2357                     if dl_filename is None:
2358                         for f in requested_formats:
2359                             new_info = dict(info_dict)
2360                             new_info.update(f)
2361                             fname = prepend_extension(
2362                                 self.prepare_filename(new_info, 'temp'),
2363                                 'f%s' % f['format_id'], new_info['ext'])
2364                             if not ensure_dir_exists(fname):
2365                                 return
2366                             downloaded.append(fname)
2367                             partial_success, real_download = dl(fname, new_info)
2368                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2369                             success = success and partial_success
2370                         if merger.available and not self.params.get('allow_unplayable_formats'):
2371                             info_dict['__postprocessors'].append(merger)
2372                             info_dict['__files_to_merge'] = downloaded
2373                             # Even if there were no downloads, it is being merged only now
2374                             info_dict['__real_download'] = True
2375                         else:
2376                             for file in downloaded:
2377                                 files_to_move[file] = None
2378                 else:
2379                     # Just a single file
2380                     dl_filename = existing_file(full_filename, temp_filename)
2381                     if dl_filename is None:
2382                         success, real_download = dl(temp_filename, info_dict)
2383                         info_dict['__real_download'] = real_download
2384
2385                 dl_filename = dl_filename or temp_filename
2386                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2387
2388             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2389                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2390                 return
2391             except (OSError, IOError) as err:
2392                 raise UnavailableVideoError(err)
2393             except (ContentTooShortError, ) as err:
2394                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2395                 return
2396
2397             if success and full_filename != '-':
2398                 # Fixup content
2399                 fixup_policy = self.params.get('fixup')
2400                 if fixup_policy is None:
2401                     fixup_policy = 'detect_or_warn'
2402
2403                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2404
2405                 stretched_ratio = info_dict.get('stretched_ratio')
2406                 if stretched_ratio is not None and stretched_ratio != 1:
2407                     if fixup_policy == 'warn':
2408                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2409                             info_dict['id'], stretched_ratio))
2410                     elif fixup_policy == 'detect_or_warn':
2411                         stretched_pp = FFmpegFixupStretchedPP(self)
2412                         if stretched_pp.available:
2413                             info_dict['__postprocessors'].append(stretched_pp)
2414                         else:
2415                             self.report_warning(
2416                                 '%s: Non-uniform pixel ratio (%s). %s'
2417                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2418                     else:
2419                         assert fixup_policy in ('ignore', 'never')
2420
2421                 if (info_dict.get('requested_formats') is None
2422                         and info_dict.get('container') == 'm4a_dash'
2423                         and info_dict.get('ext') == 'm4a'):
2424                     if fixup_policy == 'warn':
2425                         self.report_warning(
2426                             '%s: writing DASH m4a. '
2427                             'Only some players support this container.'
2428                             % info_dict['id'])
2429                     elif fixup_policy == 'detect_or_warn':
2430                         fixup_pp = FFmpegFixupM4aPP(self)
2431                         if fixup_pp.available:
2432                             info_dict['__postprocessors'].append(fixup_pp)
2433                         else:
2434                             self.report_warning(
2435                                 '%s: writing DASH m4a. '
2436                                 'Only some players support this container. %s'
2437                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2438                     else:
2439                         assert fixup_policy in ('ignore', 'never')
2440
2441                 if (info_dict.get('protocol') == 'm3u8_native'
2442                         or info_dict.get('protocol') == 'm3u8'
2443                         and self.params.get('hls_prefer_native')):
2444                     if fixup_policy == 'warn':
2445                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2446                             info_dict['id']))
2447                     elif fixup_policy == 'detect_or_warn':
2448                         fixup_pp = FFmpegFixupM3u8PP(self)
2449                         if fixup_pp.available:
2450                             info_dict['__postprocessors'].append(fixup_pp)
2451                         else:
2452                             self.report_warning(
2453                                 '%s: malformed AAC bitstream detected. %s'
2454                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2455                     else:
2456                         assert fixup_policy in ('ignore', 'never')
2457
2458                 try:
2459                     self.post_process(dl_filename, info_dict, files_to_move)
2460                 except PostProcessingError as err:
2461                     self.report_error('Postprocessing: %s' % str(err))
2462                     return
2463                 try:
2464                     for ph in self._post_hooks:
2465                         ph(full_filename)
2466                 except Exception as err:
2467                     self.report_error('post hooks: %s' % str(err))
2468                     return
2469                 must_record_download_archive = True
2470
2471         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2472             self.record_download_archive(info_dict)
2473         max_downloads = self.params.get('max_downloads')
2474         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2475             raise MaxDownloadsReached()
2476
2477     def download(self, url_list):
2478         """Download a given list of URLs."""
2479         outtmpl = self.outtmpl_dict['default']
2480         if (len(url_list) > 1
2481                 and outtmpl != '-'
2482                 and '%' not in outtmpl
2483                 and self.params.get('max_downloads') != 1):
2484             raise SameFileError(outtmpl)
2485
2486         for url in url_list:
2487             try:
2488                 # It also downloads the videos
2489                 res = self.extract_info(
2490                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2491             except UnavailableVideoError:
2492                 self.report_error('unable to download video')
2493             except MaxDownloadsReached:
2494                 self.to_screen('[info] Maximum number of downloaded files reached')
2495                 raise
2496             except ExistingVideoReached:
2497                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2498                 raise
2499             except RejectedVideoReached:
2500                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2501                 raise
2502             else:
2503                 if self.params.get('dump_single_json', False):
2504                     self.to_stdout(json.dumps(res))
2505
2506         return self._download_retcode
2507
2508     def download_with_info_file(self, info_filename):
2509         with contextlib.closing(fileinput.FileInput(
2510                 [info_filename], mode='r',
2511                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2512             # FileInput doesn't have a read method, we can't call json.load
2513             info = self.filter_requested_info(json.loads('\n'.join(f)))
2514         try:
2515             self.process_ie_result(info, download=True)
2516         except DownloadError:
2517             webpage_url = info.get('webpage_url')
2518             if webpage_url is not None:
2519                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2520                 return self.download([webpage_url])
2521             else:
2522                 raise
2523         return self._download_retcode
2524
2525     @staticmethod
2526     def filter_requested_info(info_dict):
2527         fields_to_remove = ('requested_formats', 'requested_subtitles')
2528         return dict(
2529             (k, v) for k, v in info_dict.items()
2530             if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
2531
2532     def run_pp(self, pp, infodict, files_to_move={}):
2533         files_to_delete = []
2534         files_to_delete, infodict = pp.run(infodict)
2535         if not files_to_delete:
2536             return files_to_move, infodict
2537
2538         if self.params.get('keepvideo', False):
2539             for f in files_to_delete:
2540                 files_to_move.setdefault(f, '')
2541         else:
2542             for old_filename in set(files_to_delete):
2543                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2544                 try:
2545                     os.remove(encodeFilename(old_filename))
2546                 except (IOError, OSError):
2547                     self.report_warning('Unable to remove downloaded original file')
2548                 if old_filename in files_to_move:
2549                     del files_to_move[old_filename]
2550         return files_to_move, infodict
2551
2552     def pre_process(self, ie_info):
2553         info = dict(ie_info)
2554         for pp in self._pps['beforedl']:
2555             info = self.run_pp(pp, info)[1]
2556         return info
2557
2558     def post_process(self, filename, ie_info, files_to_move={}):
2559         """Run all the postprocessors on the given file."""
2560         info = dict(ie_info)
2561         info['filepath'] = filename
2562         info['__files_to_move'] = {}
2563
2564         for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2565             files_to_move, info = self.run_pp(pp, info, files_to_move)
2566         info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
2567         for pp in self._pps['aftermove']:
2568             info = self.run_pp(pp, info, {})[1]
2569
2570     def _make_archive_id(self, info_dict):
2571         video_id = info_dict.get('id')
2572         if not video_id:
2573             return
2574         # Future-proof against any change in case
2575         # and backwards compatibility with prior versions
2576         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2577         if extractor is None:
2578             url = str_or_none(info_dict.get('url'))
2579             if not url:
2580                 return
2581             # Try to find matching extractor for the URL and take its ie_key
2582             for ie in self._ies:
2583                 if ie.suitable(url):
2584                     extractor = ie.ie_key()
2585                     break
2586             else:
2587                 return
2588         return '%s %s' % (extractor.lower(), video_id)
2589
2590     def in_download_archive(self, info_dict):
2591         fn = self.params.get('download_archive')
2592         if fn is None:
2593             return False
2594
2595         vid_id = self._make_archive_id(info_dict)
2596         if not vid_id:
2597             return False  # Incomplete video information
2598
2599         return vid_id in self.archive
2600
2601     def record_download_archive(self, info_dict):
2602         fn = self.params.get('download_archive')
2603         if fn is None:
2604             return
2605         vid_id = self._make_archive_id(info_dict)
2606         assert vid_id
2607         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2608             archive_file.write(vid_id + '\n')
2609         self.archive.add(vid_id)
2610
2611     @staticmethod
2612     def format_resolution(format, default='unknown'):
2613         if format.get('vcodec') == 'none':
2614             return 'audio only'
2615         if format.get('resolution') is not None:
2616             return format['resolution']
2617         if format.get('height') is not None:
2618             if format.get('width') is not None:
2619                 res = '%sx%s' % (format['width'], format['height'])
2620             else:
2621                 res = '%sp' % format['height']
2622         elif format.get('width') is not None:
2623             res = '%dx?' % format['width']
2624         else:
2625             res = default
2626         return res
2627
2628     def _format_note(self, fdict):
2629         res = ''
2630         if fdict.get('ext') in ['f4f', 'f4m']:
2631             res += '(unsupported) '
2632         if fdict.get('language'):
2633             if res:
2634                 res += ' '
2635             res += '[%s] ' % fdict['language']
2636         if fdict.get('format_note') is not None:
2637             res += fdict['format_note'] + ' '
2638         if fdict.get('tbr') is not None:
2639             res += '%4dk ' % fdict['tbr']
2640         if fdict.get('container') is not None:
2641             if res:
2642                 res += ', '
2643             res += '%s container' % fdict['container']
2644         if (fdict.get('vcodec') is not None
2645                 and fdict.get('vcodec') != 'none'):
2646             if res:
2647                 res += ', '
2648             res += fdict['vcodec']
2649             if fdict.get('vbr') is not None:
2650                 res += '@'
2651         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2652             res += 'video@'
2653         if fdict.get('vbr') is not None:
2654             res += '%4dk' % fdict['vbr']
2655         if fdict.get('fps') is not None:
2656             if res:
2657                 res += ', '
2658             res += '%sfps' % fdict['fps']
2659         if fdict.get('acodec') is not None:
2660             if res:
2661                 res += ', '
2662             if fdict['acodec'] == 'none':
2663                 res += 'video only'
2664             else:
2665                 res += '%-5s' % fdict['acodec']
2666         elif fdict.get('abr') is not None:
2667             if res:
2668                 res += ', '
2669             res += 'audio'
2670         if fdict.get('abr') is not None:
2671             res += '@%3dk' % fdict['abr']
2672         if fdict.get('asr') is not None:
2673             res += ' (%5dHz)' % fdict['asr']
2674         if fdict.get('filesize') is not None:
2675             if res:
2676                 res += ', '
2677             res += format_bytes(fdict['filesize'])
2678         elif fdict.get('filesize_approx') is not None:
2679             if res:
2680                 res += ', '
2681             res += '~' + format_bytes(fdict['filesize_approx'])
2682         return res
2683
2684     def _format_note_table(self, f):
2685         def join_fields(*vargs):
2686             return ', '.join((val for val in vargs if val != ''))
2687
2688         return join_fields(
2689             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2690             format_field(f, 'language', '[%s]'),
2691             format_field(f, 'format_note'),
2692             format_field(f, 'container', ignore=(None, f.get('ext'))),
2693             format_field(f, 'asr', '%5dHz'))
2694
2695     def list_formats(self, info_dict):
2696         formats = info_dict.get('formats', [info_dict])
2697         new_format = self.params.get('listformats_table', False)
2698         if new_format:
2699             table = [
2700                 [
2701                     format_field(f, 'format_id'),
2702                     format_field(f, 'ext'),
2703                     self.format_resolution(f),
2704                     format_field(f, 'fps', '%d'),
2705                     '|',
2706                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2707                     format_field(f, 'tbr', '%4dk'),
2708                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
2709                     '|',
2710                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2711                     format_field(f, 'vbr', '%4dk'),
2712                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2713                     format_field(f, 'abr', '%3dk'),
2714                     format_field(f, 'asr', '%5dHz'),
2715                     self._format_note_table(f)]
2716                 for f in formats
2717                 if f.get('preference') is None or f['preference'] >= -1000]
2718             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2719                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2720         else:
2721             table = [
2722                 [
2723                     format_field(f, 'format_id'),
2724                     format_field(f, 'ext'),
2725                     self.format_resolution(f),
2726                     self._format_note(f)]
2727                 for f in formats
2728                 if f.get('preference') is None or f['preference'] >= -1000]
2729             header_line = ['format code', 'extension', 'resolution', 'note']
2730
2731         self.to_screen(
2732             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2733                 header_line,
2734                 table,
2735                 delim=new_format,
2736                 extraGap=(0 if new_format else 1),
2737                 hideEmpty=new_format)))
2738
2739     def list_thumbnails(self, info_dict):
2740         thumbnails = info_dict.get('thumbnails')
2741         if not thumbnails:
2742             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2743             return
2744
2745         self.to_screen(
2746             '[info] Thumbnails for %s:' % info_dict['id'])
2747         self.to_screen(render_table(
2748             ['ID', 'width', 'height', 'URL'],
2749             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2750
2751     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2752         if not subtitles:
2753             self.to_screen('%s has no %s' % (video_id, name))
2754             return
2755         self.to_screen(
2756             'Available %s for %s:' % (name, video_id))
2757         self.to_screen(render_table(
2758             ['Language', 'formats'],
2759             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2760                 for lang, formats in subtitles.items()]))
2761
2762     def urlopen(self, req):
2763         """ Start an HTTP download """
2764         if isinstance(req, compat_basestring):
2765             req = sanitized_Request(req)
2766         return self._opener.open(req, timeout=self._socket_timeout)
2767
2768     def print_debug_header(self):
2769         if not self.params.get('verbose'):
2770             return
2771
2772         if type('') is not compat_str:
2773             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2774             self.report_warning(
2775                 'Your Python is broken! Update to a newer and supported version')
2776
2777         stdout_encoding = getattr(
2778             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2779         encoding_str = (
2780             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2781                 locale.getpreferredencoding(),
2782                 sys.getfilesystemencoding(),
2783                 stdout_encoding,
2784                 self.get_encoding()))
2785         write_string(encoding_str, encoding=None)
2786
2787         source = (
2788             '(exe)' if hasattr(sys, 'frozen')
2789             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2790             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2791             else '')
2792         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
2793         if _LAZY_LOADER:
2794             self._write_string('[debug] Lazy loading extractors enabled\n')
2795         if _PLUGIN_CLASSES:
2796             self._write_string(
2797                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2798         try:
2799             sp = subprocess.Popen(
2800                 ['git', 'rev-parse', '--short', 'HEAD'],
2801                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2802                 cwd=os.path.dirname(os.path.abspath(__file__)))
2803             out, err = process_communicate_or_kill(sp)
2804             out = out.decode().strip()
2805             if re.match('[0-9a-f]+', out):
2806                 self._write_string('[debug] Git HEAD: %s\n' % out)
2807         except Exception:
2808             try:
2809                 sys.exc_clear()
2810             except Exception:
2811                 pass
2812
2813         def python_implementation():
2814             impl_name = platform.python_implementation()
2815             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2816                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2817             return impl_name
2818
2819         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2820             platform.python_version(),
2821             python_implementation(),
2822             platform.architecture()[0],
2823             platform_name()))
2824
2825         exe_versions = FFmpegPostProcessor.get_versions(self)
2826         exe_versions['rtmpdump'] = rtmpdump_version()
2827         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2828         exe_str = ', '.join(
2829             '%s %s' % (exe, v)
2830             for exe, v in sorted(exe_versions.items())
2831             if v
2832         )
2833         if not exe_str:
2834             exe_str = 'none'
2835         self._write_string('[debug] exe versions: %s\n' % exe_str)
2836
2837         proxy_map = {}
2838         for handler in self._opener.handlers:
2839             if hasattr(handler, 'proxies'):
2840                 proxy_map.update(handler.proxies)
2841         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2842
2843         if self.params.get('call_home', False):
2844             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2845             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2846             return
2847             latest_version = self.urlopen(
2848                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2849             if version_tuple(latest_version) > version_tuple(__version__):
2850                 self.report_warning(
2851                     'You are using an outdated version (newest version: %s)! '
2852                     'See https://yt-dl.org/update if you need help updating.' %
2853                     latest_version)
2854
2855     def _setup_opener(self):
2856         timeout_val = self.params.get('socket_timeout')
2857         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2858
2859         opts_cookiefile = self.params.get('cookiefile')
2860         opts_proxy = self.params.get('proxy')
2861
2862         if opts_cookiefile is None:
2863             self.cookiejar = compat_cookiejar.CookieJar()
2864         else:
2865             opts_cookiefile = expand_path(opts_cookiefile)
2866             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2867             if os.access(opts_cookiefile, os.R_OK):
2868                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2869
2870         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2871         if opts_proxy is not None:
2872             if opts_proxy == '':
2873                 proxies = {}
2874             else:
2875                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2876         else:
2877             proxies = compat_urllib_request.getproxies()
2878             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2879             if 'http' in proxies and 'https' not in proxies:
2880                 proxies['https'] = proxies['http']
2881         proxy_handler = PerRequestProxyHandler(proxies)
2882
2883         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2884         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2885         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2886         redirect_handler = YoutubeDLRedirectHandler()
2887         data_handler = compat_urllib_request_DataHandler()
2888
2889         # When passing our own FileHandler instance, build_opener won't add the
2890         # default FileHandler and allows us to disable the file protocol, which
2891         # can be used for malicious purposes (see
2892         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2893         file_handler = compat_urllib_request.FileHandler()
2894
2895         def file_open(*args, **kwargs):
2896             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
2897         file_handler.file_open = file_open
2898
2899         opener = compat_urllib_request.build_opener(
2900             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2901
2902         # Delete the default user-agent header, which would otherwise apply in
2903         # cases where our custom HTTP handler doesn't come into play
2904         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2905         opener.addheaders = []
2906         self._opener = opener
2907
2908     def encode(self, s):
2909         if isinstance(s, bytes):
2910             return s  # Already encoded
2911
2912         try:
2913             return s.encode(self.get_encoding())
2914         except UnicodeEncodeError as err:
2915             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2916             raise
2917
2918     def get_encoding(self):
2919         encoding = self.params.get('encoding')
2920         if encoding is None:
2921             encoding = preferredencoding()
2922         return encoding
2923
2924     def _write_thumbnails(self, info_dict, filename):  # return the extensions
2925         write_all = self.params.get('write_all_thumbnails', False)
2926         thumbnails = []
2927         if write_all or self.params.get('writethumbnail', False):
2928             thumbnails = info_dict.get('thumbnails') or []
2929         multiple = write_all and len(thumbnails) > 1
2930
2931         ret = []
2932         for t in thumbnails[::1 if write_all else -1]:
2933             thumb_ext = determine_ext(t['url'], 'jpg')
2934             suffix = '%s.' % t['id'] if multiple else ''
2935             thumb_display_id = '%s ' % t['id'] if multiple else ''
2936             t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
2937
2938             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2939                 ret.append(suffix + thumb_ext)
2940                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2941                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2942             else:
2943                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2944                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2945                 try:
2946                     uf = self.urlopen(t['url'])
2947                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2948                         shutil.copyfileobj(uf, thumbf)
2949                     ret.append(suffix + thumb_ext)
2950                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2951                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2952                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2953                     self.report_warning('Unable to download thumbnail "%s": %s' %
2954                                         (t['url'], error_to_compat_str(err)))
2955             if ret and not write_all:
2956                 break
2957         return ret