yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_http_client,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     OUTTMPL_TYPES,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     int_or_none,
  72     iri_to_uri,
  73     ISO3166Utils,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     orderedSet,
  79     PagedList,
  80     parse_filesize,
  81     PerRequestProxyHandler,
  82     platform_name,
  83     PostProcessingError,
  84     preferredencoding,
  85     prepend_extension,
  86     register_socks_protocols,
  87     render_table,
  88     replace_extension,
  89     RejectedVideoReached,
  90     SameFileError,
  91     sanitize_filename,
  92     sanitize_path,
  93     sanitize_url,
  94     sanitized_Request,
  95     std_headers,
  96     str_or_none,
  97     strftime_or_none,
  98     subtitles_filename,
  99     to_high_limit_path,
 100     UnavailableVideoError,
 101     url_basename,
 102     version_tuple,
 103     write_json_file,
 104     write_string,
 105     YoutubeDLCookieJar,
 106     YoutubeDLCookieProcessor,
 107     YoutubeDLHandler,
 108     YoutubeDLRedirectHandler,
 109     process_communicate_or_kill,
 110 )
 111 from .cache import Cache
 112 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
 113 from .extractor.openload import PhantomJSwrapper
 114 from .downloader import get_suitable_downloader
 115 from .downloader.rtmp import rtmpdump_version
 116 from .postprocessor import (
 117     FFmpegFixupM3u8PP,
 118     FFmpegFixupM4aPP,
 119     FFmpegFixupStretchedPP,
 120     FFmpegMergerPP,
 121     FFmpegPostProcessor,
 122     # FFmpegSubtitlesConvertorPP,
 123     get_postprocessor,
 124     MoveFilesAfterDownloadPP,
 125 )
 126 from .version import __version__
 127
 128 if compat_os_name == 'nt':
 129     import ctypes
 130
 131
 132 class YoutubeDL(object):
 133     """YoutubeDL class.
 134
 135     YoutubeDL objects are the ones responsible of downloading the
 136     actual video file and writing it to disk if the user has requested
 137     it, among some other tasks. In most cases there should be one per
 138     program. As, given a video URL, the downloader doesn't know how to
 139     extract all the needed information, task that InfoExtractors do, it
 140     has to pass the URL to one of them.
 141
 142     For this, YoutubeDL objects have a method that allows
 143     InfoExtractors to be registered in a given order. When it is passed
 144     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 145     finds that reports being able to handle it. The InfoExtractor extracts
 146     all the information about the video or videos the URL refers to, and
 147     YoutubeDL process the extracted information, possibly using a File
 148     Downloader to download the video.
 149
 150     YoutubeDL objects accept a lot of parameters. In order not to saturate
 151     the object constructor with arguments, it receives a dictionary of
 152     options instead. These options are available through the params
 153     attribute for the InfoExtractors to use. The YoutubeDL also
 154     registers itself as the downloader in charge for the InfoExtractors
 155     that are added to it, so this is a "mutual registration".
 156
 157     Available options:
 158
 159     username:          Username for authentication purposes.
 160     password:          Password for authentication purposes.
 161     videopassword:     Password for accessing a video.
 162     ap_mso:            Adobe Pass multiple-system operator identifier.
 163     ap_username:       Multiple-system operator account username.
 164     ap_password:       Multiple-system operator account password.
 165     usenetrc:          Use netrc for authentication instead.
 166     verbose:           Print additional info to stdout.
 167     quiet:             Do not print messages to stdout.
 168     no_warnings:       Do not print out anything for warnings.
 169     forceurl:          Force printing final URL.
 170     forcetitle:        Force printing title.
 171     forceid:           Force printing ID.
 172     forcethumbnail:    Force printing thumbnail URL.
 173     forcedescription:  Force printing description.
 174     forcefilename:     Force printing final filename.
 175     forceduration:     Force printing duration.
 176     forcejson:         Force printing info_dict as JSON.
 177     dump_single_json:  Force printing the info_dict of the whole playlist
 178                        (or video) as a single JSON line.
 179     force_write_download_archive: Force writing download archive regardless
 180                        of 'skip_download' or 'simulate'.
 181     simulate:          Do not download the video files.
 182     format:            Video format code. see "FORMAT SELECTION" for more details.
 183     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 184     format_sort:       How to sort the video formats. see "Sorting Formats"
 185                        for more details.
 186     format_sort_force: Force the given format_sort. see "Sorting Formats"
 187                        for more details.
 188     allow_multiple_video_streams:   Allow multiple video streams to be merged
 189                        into a single file
 190     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 191                        into a single file
 192     paths:             Dictionary of output paths. The allowed keys are 'home'
 193                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 194     outtmpl:           Dictionary of templates for output names. Allowed keys
 195                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 196                        A string a also accepted for backward compatibility
 197     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 198     restrictfilenames: Do not allow "&" and spaces in file names
 199     trim_file_name:    Limit length of filename (extension excluded)
 200     windowsfilenames:  Force the filenames to be windows compatible
 201     ignoreerrors:      Do not stop on download errors
 202                        (Default True when running yt-dlp,
 203                        but False when directly accessing YoutubeDL class)
 204     force_generic_extractor: Force downloader to use the generic extractor
 205     overwrites:        Overwrite all video and metadata files if True,
 206                        overwrite only non-video files if None
 207                        and don't overwrite any file if False
 208     playliststart:     Playlist item to start at.
 209     playlistend:       Playlist item to end at.
 210     playlist_items:    Specific indices of playlist to download.
 211     playlistreverse:   Download playlist items in reverse order.
 212     playlistrandom:    Download playlist items in random order.
 213     matchtitle:        Download only matching titles.
 214     rejecttitle:       Reject downloads for matching titles.
 215     logger:            Log messages to a logging.Logger instance.
 216     logtostderr:       Log messages to stderr instead of stdout.
 217     writedescription:  Write the video description to a .description file
 218     writeinfojson:     Write the video description to a .info.json file
 219     writecomments:     Extract video comments. This will not be written to disk
 220                        unless writeinfojson is also given
 221     writeannotations:  Write the video annotations to a .annotations.xml file
 222     writethumbnail:    Write the thumbnail image to a file
 223     allow_playlist_files: Whether to write playlists' description, infojson etc
 224                        also to disk when using the 'write*' options
 225     write_all_thumbnails:  Write all thumbnail formats to files
 226     writelink:         Write an internet shortcut file, depending on the
 227                        current platform (.url/.webloc/.desktop)
 228     writeurllink:      Write a Windows internet shortcut file (.url)
 229     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 230     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 231     writesubtitles:    Write the video subtitles to a file
 232     writeautomaticsub: Write the automatically generated subtitles to a file
 233     allsubtitles:      Downloads all the subtitles of the video
 234                        (requires writesubtitles or writeautomaticsub)
 235     listsubtitles:     Lists all available subtitles for the video
 236     subtitlesformat:   The format code for subtitles
 237     subtitleslangs:    List of languages of the subtitles to download
 238     keepvideo:         Keep the video file after post-processing
 239     daterange:         A DateRange object, download only if the upload_date is in the range.
 240     skip_download:     Skip the actual download of the video file
 241     cachedir:          Location of the cache files in the filesystem.
 242                        False to disable filesystem cache.
 243     noplaylist:        Download single video instead of a playlist if in doubt.
 244     age_limit:         An integer representing the user's age in years.
 245                        Unsuitable videos for the given age are skipped.
 246     min_views:         An integer representing the minimum view count the video
 247                        must have in order to not be skipped.
 248                        Videos without view count information are always
 249                        downloaded. None for no limit.
 250     max_views:         An integer representing the maximum view count.
 251                        Videos that are more popular than that are not
 252                        downloaded.
 253                        Videos without view count information are always
 254                        downloaded. None for no limit.
 255     download_archive:  File name of a file where all downloads are recorded.
 256                        Videos already present in the file are not downloaded
 257                        again.
 258     break_on_existing: Stop the download process after attempting to download a
 259                        file that is in the archive.
 260     break_on_reject:   Stop the download process when encountering a video that
 261                        has been filtered out.
 262     cookiefile:        File name where cookies should be read from and dumped to
 263     nocheckcertificate:Do not verify SSL certificates
 264     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 265                        At the moment, this is only supported by YouTube.
 266     proxy:             URL of the proxy server to use
 267     geo_verification_proxy:  URL of the proxy to use for IP address verification
 268                        on geo-restricted sites.
 269     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 270     bidi_workaround:   Work around buggy terminals without bidirectional text
 271                        support, using fridibi
 272     debug_printtraffic:Print out sent and received HTTP traffic
 273     include_ads:       Download ads as well
 274     default_search:    Prepend this string if an input url is not valid.
 275                        'auto' for elaborate guessing
 276     encoding:          Use this encoding instead of the system-specified.
 277     extract_flat:      Do not resolve URLs, return the immediate result.
 278                        Pass in 'in_playlist' to only show this behavior for
 279                        playlist items.
 280     postprocessors:    A list of dictionaries, each with an entry
 281                        * key:  The name of the postprocessor. See
 282                                yt_dlp/postprocessor/__init__.py for a list.
 283                        * _after_move: Optional. If True, run this post_processor
 284                                after 'MoveFilesAfterDownload'
 285                        as well as any further keyword arguments for the
 286                        postprocessor.
 287     post_hooks:        A list of functions that get called as the final step
 288                        for each video file, after all postprocessors have been
 289                        called. The filename will be passed as the only argument.
 290     progress_hooks:    A list of functions that get called on download
 291                        progress, with a dictionary with the entries
 292                        * status: One of "downloading", "error", or "finished".
 293                                  Check this first and ignore unknown values.
 294
 295                        If status is one of "downloading", or "finished", the
 296                        following properties may also be present:
 297                        * filename: The final filename (always present)
 298                        * tmpfilename: The filename we're currently writing to
 299                        * downloaded_bytes: Bytes on disk
 300                        * total_bytes: Size of the whole file, None if unknown
 301                        * total_bytes_estimate: Guess of the eventual file size,
 302                                                None if unavailable.
 303                        * elapsed: The number of seconds since download started.
 304                        * eta: The estimated time in seconds, None if unknown
 305                        * speed: The download speed in bytes/second, None if
 306                                 unknown
 307                        * fragment_index: The counter of the currently
 308                                          downloaded video fragment.
 309                        * fragment_count: The number of fragments (= individual
 310                                          files that will be merged)
 311
 312                        Progress hooks are guaranteed to be called at least once
 313                        (with status "finished") if the download is successful.
 314     merge_output_format: Extension to use when merging formats.
 315     final_ext:         Expected final extension; used to detect when the file was
 316                        already downloaded and converted. "merge_output_format" is
 317                        replaced by this extension when given
 318     fixup:             Automatically correct known faults of the file.
 319                        One of:
 320                        - "never": do nothing
 321                        - "warn": only emit a warning
 322                        - "detect_or_warn": check whether we can do anything
 323                                            about it, warn otherwise (default)
 324     source_address:    Client-side IP address to bind to.
 325     call_home:         Boolean, true iff we are allowed to contact the
 326                        yt-dlp servers for debugging. (BROKEN)
 327     sleep_interval_requests: Number of seconds to sleep between requests
 328                        during extraction
 329     sleep_interval:    Number of seconds to sleep before each download when
 330                        used alone or a lower bound of a range for randomized
 331                        sleep before each download (minimum possible number
 332                        of seconds to sleep) when used along with
 333                        max_sleep_interval.
 334     max_sleep_interval:Upper bound of a range for randomized sleep before each
 335                        download (maximum possible number of seconds to sleep).
 336                        Must only be used along with sleep_interval.
 337                        Actual sleep time will be a random float from range
 338                        [sleep_interval; max_sleep_interval].
 339     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 340     listformats:       Print an overview of available video formats and exit.
 341     list_thumbnails:   Print a table of all thumbnails and exit.
 342     match_filter:      A function that gets called with the info_dict of
 343                        every video.
 344                        If it returns a message, the video is ignored.
 345                        If it returns None, the video is downloaded.
 346                        match_filter_func in utils.py is one example for this.
 347     no_color:          Do not emit color codes in output.
 348     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 349                        HTTP header
 350     geo_bypass_country:
 351                        Two-letter ISO 3166-2 country code that will be used for
 352                        explicit geographic restriction bypassing via faking
 353                        X-Forwarded-For HTTP header
 354     geo_bypass_ip_block:
 355                        IP range in CIDR notation that will be used similarly to
 356                        geo_bypass_country
 357
 358     The following options determine which downloader is picked:
 359     external_downloader: Executable of the external downloader to call.
 360                        None or unset for standard (built-in) downloader.
 361     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 362                        if True, otherwise use ffmpeg/avconv if False, otherwise
 363                        use downloader suggested by extractor if None.
 364
 365     The following parameters are not used by YoutubeDL itself, they are used by
 366     the downloader (see yt_dlp/downloader/common.py):
 367     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 368     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 369     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 370     http_chunk_size.
 371
 372     The following options are used by the post processors:
 373     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 374                        otherwise prefer ffmpeg. (avconv support is deprecated)
 375     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 376                        to the binary or its containing directory.
 377     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 378                         and a list of additional command-line arguments for the
 379                         postprocessor/executable. The dict can also have "PP+EXE" keys
 380                         which are used when the given exe is used by the given PP.
 381                         Use 'default' as the name for arguments to passed to all PP
 382
 383     The following options are used by the extractors:
 384     dynamic_mpd:        Whether to process dynamic DASH manifests (default: True)
 385     hls_split_discontinuity: Split HLS playlists to different formats at
 386                         discontinuities such as ad breaks (default: False)
 387     youtube_include_dash_manifest: If True (default), DASH manifests and related
 388                         data will be downloaded and processed by extractor.
 389                         You can reduce network I/O by disabling it if you don't
 390                         care about DASH. (only for youtube)
 391     youtube_include_hls_manifest: If True (default), HLS manifests and related
 392                         data will be downloaded and processed by extractor.
 393                         You can reduce network I/O by disabling it if you don't
 394                         care about HLS. (only for youtube)
 395     """
 396
 397     _NUMERIC_FIELDS = set((
 398         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 399         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 400         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 401         'average_rating', 'comment_count', 'age_limit',
 402         'start_time', 'end_time',
 403         'chapter_number', 'season_number', 'episode_number',
 404         'track_number', 'disc_number', 'release_year',
 405         'playlist_index',
 406     ))
 407
 408     params = None
 409     _ies = []
 410     _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 411     __prepare_filename_warned = False
 412     _first_webpage_request = True
 413     _download_retcode = None
 414     _num_downloads = None
 415     _playlist_level = 0
 416     _playlist_urls = set()
 417     _screen_file = None
 418
 419     def __init__(self, params=None, auto_init=True):
 420         """Create a FileDownloader object with the given options."""
 421         if params is None:
 422             params = {}
 423         self._ies = []
 424         self._ies_instances = {}
 425         self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 426         self.__prepare_filename_warned = False
 427         self._first_webpage_request = True
 428         self._post_hooks = []
 429         self._progress_hooks = []
 430         self._download_retcode = 0
 431         self._num_downloads = 0
 432         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 433         self._err_file = sys.stderr
 434         self.params = {
 435             # Default parameters
 436             'nocheckcertificate': False,
 437         }
 438         self.params.update(params)
 439         self.cache = Cache(self)
 440         self.archive = set()
 441
 442         """Preload the archive, if any is specified"""
 443         def preload_download_archive(self):
 444             fn = self.params.get('download_archive')
 445             if fn is None:
 446                 return False
 447             try:
 448                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 449                     for line in archive_file:
 450                         self.archive.add(line.strip())
 451             except IOError as ioe:
 452                 if ioe.errno != errno.ENOENT:
 453                     raise
 454                 return False
 455             return True
 456
 457         def check_deprecated(param, option, suggestion):
 458             if self.params.get(param) is not None:
 459                 self.report_warning(
 460                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 461                 return True
 462             return False
 463
 464         if self.params.get('verbose'):
 465             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 466
 467         preload_download_archive(self)
 468
 469         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 470             if self.params.get('geo_verification_proxy') is None:
 471                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 472
 473         if self.params.get('final_ext'):
 474             if self.params.get('merge_output_format'):
 475                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 476             self.params['merge_output_format'] = self.params['final_ext']
 477
 478         if 'overwrites' in self.params and self.params['overwrites'] is None:
 479             del self.params['overwrites']
 480
 481         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 482         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 483         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 484
 485         if params.get('bidi_workaround', False):
 486             try:
 487                 import pty
 488                 master, slave = pty.openpty()
 489                 width = compat_get_terminal_size().columns
 490                 if width is None:
 491                     width_args = []
 492                 else:
 493                     width_args = ['-w', str(width)]
 494                 sp_kwargs = dict(
 495                     stdin=subprocess.PIPE,
 496                     stdout=slave,
 497                     stderr=self._err_file)
 498                 try:
 499                     self._output_process = subprocess.Popen(
 500                         ['bidiv'] + width_args, **sp_kwargs
 501                     )
 502                 except OSError:
 503                     self._output_process = subprocess.Popen(
 504                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 505                 self._output_channel = os.fdopen(master, 'rb')
 506             except OSError as ose:
 507                 if ose.errno == errno.ENOENT:
 508                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 509                 else:
 510                     raise
 511
 512         if (sys.platform != 'win32'
 513                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 514                 and not params.get('restrictfilenames', False)):
 515             # Unicode filesystem API will throw errors (#1474, #13027)
 516             self.report_warning(
 517                 'Assuming --restrict-filenames since file system encoding '
 518                 'cannot encode all characters. '
 519                 'Set the LC_ALL environment variable to fix this.')
 520             self.params['restrictfilenames'] = True
 521
 522         self.outtmpl_dict = self.parse_outtmpl()
 523
 524         self._setup_opener()
 525
 526         if auto_init:
 527             self.print_debug_header()
 528             self.add_default_info_extractors()
 529
 530         for pp_def_raw in self.params.get('postprocessors', []):
 531             pp_class = get_postprocessor(pp_def_raw['key'])
 532             pp_def = dict(pp_def_raw)
 533             del pp_def['key']
 534             if 'when' in pp_def:
 535                 when = pp_def['when']
 536                 del pp_def['when']
 537             else:
 538                 when = 'normal'
 539             pp = pp_class(self, **compat_kwargs(pp_def))
 540             self.add_post_processor(pp, when=when)
 541
 542         for ph in self.params.get('post_hooks', []):
 543             self.add_post_hook(ph)
 544
 545         for ph in self.params.get('progress_hooks', []):
 546             self.add_progress_hook(ph)
 547
 548         register_socks_protocols()
 549
 550     def warn_if_short_id(self, argv):
 551         # short YouTube ID starting with dash?
 552         idxs = [
 553             i for i, a in enumerate(argv)
 554             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 555         if idxs:
 556             correct_argv = (
 557                 ['yt-dlp']
 558                 + [a for i, a in enumerate(argv) if i not in idxs]
 559                 + ['--'] + [argv[i] for i in idxs]
 560             )
 561             self.report_warning(
 562                 'Long argument string detected. '
 563                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 564                 args_to_str(correct_argv))
 565
 566     def add_info_extractor(self, ie):
 567         """Add an InfoExtractor object to the end of the list."""
 568         self._ies.append(ie)
 569         if not isinstance(ie, type):
 570             self._ies_instances[ie.ie_key()] = ie
 571             ie.set_downloader(self)
 572
 573     def get_info_extractor(self, ie_key):
 574         """
 575         Get an instance of an IE with name ie_key, it will try to get one from
 576         the _ies list, if there's no instance it will create a new one and add
 577         it to the extractor list.
 578         """
 579         ie = self._ies_instances.get(ie_key)
 580         if ie is None:
 581             ie = get_info_extractor(ie_key)()
 582             self.add_info_extractor(ie)
 583         return ie
 584
 585     def add_default_info_extractors(self):
 586         """
 587         Add the InfoExtractors returned by gen_extractors to the end of the list
 588         """
 589         for ie in gen_extractor_classes():
 590             self.add_info_extractor(ie)
 591
 592     def add_post_processor(self, pp, when='normal'):
 593         """Add a PostProcessor object to the end of the chain."""
 594         self._pps[when].append(pp)
 595         pp.set_downloader(self)
 596
 597     def add_post_hook(self, ph):
 598         """Add the post hook"""
 599         self._post_hooks.append(ph)
 600
 601     def add_progress_hook(self, ph):
 602         """Add the progress hook (currently only for the file downloader)"""
 603         self._progress_hooks.append(ph)
 604
 605     def _bidi_workaround(self, message):
 606         if not hasattr(self, '_output_channel'):
 607             return message
 608
 609         assert hasattr(self, '_output_process')
 610         assert isinstance(message, compat_str)
 611         line_count = message.count('\n') + 1
 612         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 613         self._output_process.stdin.flush()
 614         res = ''.join(self._output_channel.readline().decode('utf-8')
 615                       for _ in range(line_count))
 616         return res[:-len('\n')]
 617
 618     def to_screen(self, message, skip_eol=False):
 619         """Print message to stdout if not in quiet mode."""
 620         return self.to_stdout(message, skip_eol, check_quiet=True)
 621
 622     def _write_string(self, s, out=None):
 623         write_string(s, out=out, encoding=self.params.get('encoding'))
 624
 625     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 626         """Print message to stdout if not in quiet mode."""
 627         if self.params.get('logger'):
 628             self.params['logger'].debug(message)
 629         elif not check_quiet or not self.params.get('quiet', False):
 630             message = self._bidi_workaround(message)
 631             terminator = ['\n', ''][skip_eol]
 632             output = message + terminator
 633
 634             self._write_string(output, self._screen_file)
 635
 636     def to_stderr(self, message):
 637         """Print message to stderr."""
 638         assert isinstance(message, compat_str)
 639         if self.params.get('logger'):
 640             self.params['logger'].error(message)
 641         else:
 642             message = self._bidi_workaround(message)
 643             output = message + '\n'
 644             self._write_string(output, self._err_file)
 645
 646     def to_console_title(self, message):
 647         if not self.params.get('consoletitle', False):
 648             return
 649         if compat_os_name == 'nt':
 650             if ctypes.windll.kernel32.GetConsoleWindow():
 651                 # c_wchar_p() might not be necessary if `message` is
 652                 # already of type unicode()
 653                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 654         elif 'TERM' in os.environ:
 655             self._write_string('\033]0;%s\007' % message, self._screen_file)
 656
 657     def save_console_title(self):
 658         if not self.params.get('consoletitle', False):
 659             return
 660         if self.params.get('simulate', False):
 661             return
 662         if compat_os_name != 'nt' and 'TERM' in os.environ:
 663             # Save the title on stack
 664             self._write_string('\033[22;0t', self._screen_file)
 665
 666     def restore_console_title(self):
 667         if not self.params.get('consoletitle', False):
 668             return
 669         if self.params.get('simulate', False):
 670             return
 671         if compat_os_name != 'nt' and 'TERM' in os.environ:
 672             # Restore the title from stack
 673             self._write_string('\033[23;0t', self._screen_file)
 674
 675     def __enter__(self):
 676         self.save_console_title()
 677         return self
 678
 679     def __exit__(self, *args):
 680         self.restore_console_title()
 681
 682         if self.params.get('cookiefile') is not None:
 683             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 684
 685     def trouble(self, message=None, tb=None):
 686         """Determine action to take when a download problem appears.
 687
 688         Depending on if the downloader has been configured to ignore
 689         download errors or not, this method may throw an exception or
 690         not when errors are found, after printing the message.
 691
 692         tb, if given, is additional traceback information.
 693         """
 694         if message is not None:
 695             self.to_stderr(message)
 696         if self.params.get('verbose'):
 697             if tb is None:
 698                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 699                     tb = ''
 700                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 701                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 702                     tb += encode_compat_str(traceback.format_exc())
 703                 else:
 704                     tb_data = traceback.format_list(traceback.extract_stack())
 705                     tb = ''.join(tb_data)
 706             self.to_stderr(tb)
 707         if not self.params.get('ignoreerrors', False):
 708             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 709                 exc_info = sys.exc_info()[1].exc_info
 710             else:
 711                 exc_info = sys.exc_info()
 712             raise DownloadError(message, exc_info)
 713         self._download_retcode = 1
 714
 715     def report_warning(self, message):
 716         '''
 717         Print the message to stderr, it will be prefixed with 'WARNING:'
 718         If stderr is a tty file the 'WARNING:' will be colored
 719         '''
 720         if self.params.get('logger') is not None:
 721             self.params['logger'].warning(message)
 722         else:
 723             if self.params.get('no_warnings'):
 724                 return
 725             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 726                 _msg_header = '\033[0;33mWARNING:\033[0m'
 727             else:
 728                 _msg_header = 'WARNING:'
 729             warning_message = '%s %s' % (_msg_header, message)
 730             self.to_stderr(warning_message)
 731
 732     def report_error(self, message, tb=None):
 733         '''
 734         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 735         in red if stderr is a tty file.
 736         '''
 737         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 738             _msg_header = '\033[0;31mERROR:\033[0m'
 739         else:
 740             _msg_header = 'ERROR:'
 741         error_message = '%s %s' % (_msg_header, message)
 742         self.trouble(error_message, tb)
 743
 744     def report_file_already_downloaded(self, file_name):
 745         """Report file has already been fully downloaded."""
 746         try:
 747             self.to_screen('[download] %s has already been downloaded' % file_name)
 748         except UnicodeEncodeError:
 749             self.to_screen('[download] The file has already been downloaded')
 750
 751     def report_file_delete(self, file_name):
 752         """Report that existing file will be deleted."""
 753         try:
 754             self.to_screen('Deleting existing file %s' % file_name)
 755         except UnicodeEncodeError:
 756             self.to_screen('Deleting existing file')
 757
 758     def parse_outtmpl(self):
 759         outtmpl_dict = self.params.get('outtmpl', {})
 760         if not isinstance(outtmpl_dict, dict):
 761             outtmpl_dict = {'default': outtmpl_dict}
 762         outtmpl_dict.update({
 763             k: v for k, v in DEFAULT_OUTTMPL.items()
 764             if not outtmpl_dict.get(k)})
 765         for key, val in outtmpl_dict.items():
 766             if isinstance(val, bytes):
 767                 self.report_warning(
 768                     'Parameter outtmpl is bytes, but should be a unicode string. '
 769                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 770         return outtmpl_dict
 771
 772     def _prepare_filename(self, info_dict, tmpl_type='default'):
 773         try:
 774             template_dict = dict(info_dict)
 775
 776             template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 777                 formatSeconds(info_dict['duration'], '-')
 778                 if info_dict.get('duration', None) is not None
 779                 else None)
 780
 781             template_dict['epoch'] = int(time.time())
 782             autonumber_size = self.params.get('autonumber_size')
 783             if autonumber_size is None:
 784                 autonumber_size = 5
 785             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 786             if template_dict.get('resolution') is None:
 787                 if template_dict.get('width') and template_dict.get('height'):
 788                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 789                 elif template_dict.get('height'):
 790                     template_dict['resolution'] = '%sp' % template_dict['height']
 791                 elif template_dict.get('width'):
 792                     template_dict['resolution'] = '%dx?' % template_dict['width']
 793
 794             sanitize = lambda k, v: sanitize_filename(
 795                 compat_str(v),
 796                 restricted=self.params.get('restrictfilenames'),
 797                 is_id=(k == 'id' or k.endswith('_id')))
 798             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 799                                  for k, v in template_dict.items()
 800                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 801             na = self.params.get('outtmpl_na_placeholder', 'NA')
 802             template_dict = collections.defaultdict(lambda: na, template_dict)
 803
 804             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 805             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 806
 807             # For fields playlist_index and autonumber convert all occurrences
 808             # of %(field)s to %(field)0Nd for backward compatibility
 809             field_size_compat_map = {
 810                 'playlist_index': len(str(template_dict['n_entries'])),
 811                 'autonumber': autonumber_size,
 812             }
 813             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 814             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 815             if mobj:
 816                 outtmpl = re.sub(
 817                     FIELD_SIZE_COMPAT_RE,
 818                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 819                     outtmpl)
 820
 821             # As of [1] format syntax is:
 822             #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 823             # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 824             FORMAT_RE = r'''(?x)
 825                 (?<!%)
 826                 %
 827                 \({0}\)  # mapping key
 828                 (?:[#0\-+ ]+)?  # conversion flags (optional)
 829                 (?:\d+)?  # minimum field width (optional)
 830                 (?:\.\d+)?  # precision (optional)
 831                 [hlL]?  # length modifier (optional)
 832                 (?P<type>[diouxXeEfFgGcrs%])  # conversion type
 833             '''
 834
 835             numeric_fields = list(self._NUMERIC_FIELDS)
 836
 837             # Format date
 838             FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
 839             for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
 840                 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
 841                 if key in template_dict:
 842                     continue
 843                 value = strftime_or_none(template_dict.get(field), frmt, na)
 844                 if conv_type in 'crs':  # string
 845                     value = sanitize(field, value)
 846                 else:  # number
 847                     numeric_fields.append(key)
 848                     value = float_or_none(value, default=None)
 849                 if value is not None:
 850                     template_dict[key] = value
 851
 852             # Missing numeric fields used together with integer presentation types
 853             # in format specification will break the argument substitution since
 854             # string NA placeholder is returned for missing fields. We will patch
 855             # output template for missing fields to meet string presentation type.
 856             for numeric_field in numeric_fields:
 857                 if numeric_field not in template_dict:
 858                     outtmpl = re.sub(
 859                         FORMAT_RE.format(re.escape(numeric_field)),
 860                         r'%({0})s'.format(numeric_field), outtmpl)
 861
 862             # expand_path translates '%%' into '%' and '$$' into '$'
 863             # correspondingly that is not what we want since we need to keep
 864             # '%%' intact for template dict substitution step. Working around
 865             # with boundary-alike separator hack.
 866             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 867             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 868
 869             # outtmpl should be expand_path'ed before template dict substitution
 870             # because meta fields may contain env variables we don't want to
 871             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 872             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 873             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 874
 875             if force_ext is not None:
 876                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 877
 878             # https://github.com/blackjack4494/youtube-dlc/issues/85
 879             trim_file_name = self.params.get('trim_file_name', False)
 880             if trim_file_name:
 881                 fn_groups = filename.rsplit('.')
 882                 ext = fn_groups[-1]
 883                 sub_ext = ''
 884                 if len(fn_groups) > 2:
 885                     sub_ext = fn_groups[-2]
 886                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 887
 888             return filename
 889         except ValueError as err:
 890             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 891             return None
 892
 893     def prepare_filename(self, info_dict, dir_type='', warn=False):
 894         """Generate the output filename."""
 895         paths = self.params.get('paths', {})
 896         assert isinstance(paths, dict)
 897         filename = self._prepare_filename(info_dict, dir_type or 'default')
 898
 899         if warn and not self.__prepare_filename_warned:
 900             if not paths:
 901                 pass
 902             elif filename == '-':
 903                 self.report_warning('--paths is ignored when an outputting to stdout')
 904             elif os.path.isabs(filename):
 905                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 906             self.__prepare_filename_warned = True
 907         if filename == '-' or not filename:
 908             return filename
 909
 910         homepath = expand_path(paths.get('home', '').strip())
 911         assert isinstance(homepath, compat_str)
 912         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 913         assert isinstance(subdir, compat_str)
 914         path = os.path.join(homepath, subdir, filename)
 915
 916         # Temporary fix for #4787
 917         # 'Treat' all problem characters by passing filename through preferredencoding
 918         # to workaround encoding issues with subprocess on python2 @ Windows
 919         if sys.version_info < (3, 0) and sys.platform == 'win32':
 920             path = encodeFilename(path, True).decode(preferredencoding())
 921         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 922
 923     def _match_entry(self, info_dict, incomplete):
 924         """ Returns None if the file should be downloaded """
 925
 926         def check_filter():
 927             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 928             if 'title' in info_dict:
 929                 # This can happen when we're just evaluating the playlist
 930                 title = info_dict['title']
 931                 matchtitle = self.params.get('matchtitle', False)
 932                 if matchtitle:
 933                     if not re.search(matchtitle, title, re.IGNORECASE):
 934                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 935                 rejecttitle = self.params.get('rejecttitle', False)
 936                 if rejecttitle:
 937                     if re.search(rejecttitle, title, re.IGNORECASE):
 938                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 939             date = info_dict.get('upload_date')
 940             if date is not None:
 941                 dateRange = self.params.get('daterange', DateRange())
 942                 if date not in dateRange:
 943                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 944             view_count = info_dict.get('view_count')
 945             if view_count is not None:
 946                 min_views = self.params.get('min_views')
 947                 if min_views is not None and view_count < min_views:
 948                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 949                 max_views = self.params.get('max_views')
 950                 if max_views is not None and view_count > max_views:
 951                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 952             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 953                 return 'Skipping "%s" because it is age restricted' % video_title
 954             if self.in_download_archive(info_dict):
 955                 return '%s has already been recorded in archive' % video_title
 956
 957             if not incomplete:
 958                 match_filter = self.params.get('match_filter')
 959                 if match_filter is not None:
 960                     ret = match_filter(info_dict)
 961                     if ret is not None:
 962                         return ret
 963             return None
 964
 965         reason = check_filter()
 966         if reason is not None:
 967             self.to_screen('[download] ' + reason)
 968             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 969                 raise ExistingVideoReached()
 970             elif self.params.get('break_on_reject', False):
 971                 raise RejectedVideoReached()
 972         return reason
 973
 974     @staticmethod
 975     def add_extra_info(info_dict, extra_info):
 976         '''Set the keys from extra_info in info dict if they are missing'''
 977         for key, value in extra_info.items():
 978             info_dict.setdefault(key, value)
 979
 980     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 981                      process=True, force_generic_extractor=False):
 982         '''
 983         Returns a list with a dictionary for each video we find.
 984         If 'download', also downloads the videos.
 985         extra_info is a dict containing the extra values to add to each result
 986         '''
 987
 988         if not ie_key and force_generic_extractor:
 989             ie_key = 'Generic'
 990
 991         if ie_key:
 992             ies = [self.get_info_extractor(ie_key)]
 993         else:
 994             ies = self._ies
 995
 996         for ie in ies:
 997             if not ie.suitable(url):
 998                 continue
 999
1000             ie_key = ie.ie_key()
1001             ie = self.get_info_extractor(ie_key)
1002             if not ie.working():
1003                 self.report_warning('The program functionality for this site has been marked as broken, '
1004                                     'and will probably not work.')
1005
1006             try:
1007                 temp_id = str_or_none(
1008                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1009                     else ie._match_id(url))
1010             except (AssertionError, IndexError, AttributeError):
1011                 temp_id = None
1012             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1013                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1014                                ie_key, temp_id))
1015                 break
1016             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
1017         else:
1018             self.report_error('no suitable InfoExtractor for URL %s' % url)
1019
1020     def __handle_extraction_exceptions(func):
1021         def wrapper(self, *args, **kwargs):
1022             try:
1023                 return func(self, *args, **kwargs)
1024             except GeoRestrictedError as e:
1025                 msg = e.msg
1026                 if e.countries:
1027                     msg += '\nThis video is available in %s.' % ', '.join(
1028                         map(ISO3166Utils.short2full, e.countries))
1029                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1030                 self.report_error(msg)
1031             except ExtractorError as e:  # An error we somewhat expected
1032                 self.report_error(compat_str(e), e.format_traceback())
1033             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1034                 raise
1035             except Exception as e:
1036                 if self.params.get('ignoreerrors', False):
1037                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1038                 else:
1039                     raise
1040         return wrapper
1041
1042     @__handle_extraction_exceptions
1043     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1044         ie_result = ie.extract(url)
1045         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1046             return
1047         if isinstance(ie_result, list):
1048             # Backwards compatibility: old IE result format
1049             ie_result = {
1050                 '_type': 'compat_list',
1051                 'entries': ie_result,
1052             }
1053         if info_dict:
1054             if info_dict.get('id'):
1055                 ie_result['id'] = info_dict['id']
1056             if info_dict.get('title'):
1057                 ie_result['title'] = info_dict['title']
1058         self.add_default_extra_info(ie_result, ie, url)
1059         if process:
1060             return self.process_ie_result(ie_result, download, extra_info)
1061         else:
1062             return ie_result
1063
1064     def add_default_extra_info(self, ie_result, ie, url):
1065         self.add_extra_info(ie_result, {
1066             'extractor': ie.IE_NAME,
1067             'webpage_url': url,
1068             'webpage_url_basename': url_basename(url),
1069             'extractor_key': ie.ie_key(),
1070         })
1071
1072     def process_ie_result(self, ie_result, download=True, extra_info={}):
1073         """
1074         Take the result of the ie(may be modified) and resolve all unresolved
1075         references (URLs, playlist items).
1076
1077         It will also download the videos if 'download'.
1078         Returns the resolved ie_result.
1079         """
1080         result_type = ie_result.get('_type', 'video')
1081
1082         if result_type in ('url', 'url_transparent'):
1083             ie_result['url'] = sanitize_url(ie_result['url'])
1084             extract_flat = self.params.get('extract_flat', False)
1085             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1086                     or extract_flat is True):
1087                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1088                 return ie_result
1089
1090         if result_type == 'video':
1091             self.add_extra_info(ie_result, extra_info)
1092             return self.process_video_result(ie_result, download=download)
1093         elif result_type == 'url':
1094             # We have to add extra_info to the results because it may be
1095             # contained in a playlist
1096             return self.extract_info(ie_result['url'],
1097                                      download, info_dict=ie_result,
1098                                      ie_key=ie_result.get('ie_key'),
1099                                      extra_info=extra_info)
1100         elif result_type == 'url_transparent':
1101             # Use the information from the embedding page
1102             info = self.extract_info(
1103                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1104                 extra_info=extra_info, download=False, process=False)
1105
1106             # extract_info may return None when ignoreerrors is enabled and
1107             # extraction failed with an error, don't crash and return early
1108             # in this case
1109             if not info:
1110                 return info
1111
1112             force_properties = dict(
1113                 (k, v) for k, v in ie_result.items() if v is not None)
1114             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1115                 if f in force_properties:
1116                     del force_properties[f]
1117             new_result = info.copy()
1118             new_result.update(force_properties)
1119
1120             # Extracted info may not be a video result (i.e.
1121             # info.get('_type', 'video') != video) but rather an url or
1122             # url_transparent. In such cases outer metadata (from ie_result)
1123             # should be propagated to inner one (info). For this to happen
1124             # _type of info should be overridden with url_transparent. This
1125             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1126             if new_result.get('_type') == 'url':
1127                 new_result['_type'] = 'url_transparent'
1128
1129             return self.process_ie_result(
1130                 new_result, download=download, extra_info=extra_info)
1131         elif result_type in ('playlist', 'multi_video'):
1132             # Protect from infinite recursion due to recursively nested playlists
1133             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1134             webpage_url = ie_result['webpage_url']
1135             if webpage_url in self._playlist_urls:
1136                 self.to_screen(
1137                     '[download] Skipping already downloaded playlist: %s'
1138                     % ie_result.get('title') or ie_result.get('id'))
1139                 return
1140
1141             self._playlist_level += 1
1142             self._playlist_urls.add(webpage_url)
1143             try:
1144                 return self.__process_playlist(ie_result, download)
1145             finally:
1146                 self._playlist_level -= 1
1147                 if not self._playlist_level:
1148                     self._playlist_urls.clear()
1149         elif result_type == 'compat_list':
1150             self.report_warning(
1151                 'Extractor %s returned a compat_list result. '
1152                 'It needs to be updated.' % ie_result.get('extractor'))
1153
1154             def _fixup(r):
1155                 self.add_extra_info(
1156                     r,
1157                     {
1158                         'extractor': ie_result['extractor'],
1159                         'webpage_url': ie_result['webpage_url'],
1160                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1161                         'extractor_key': ie_result['extractor_key'],
1162                     }
1163                 )
1164                 return r
1165             ie_result['entries'] = [
1166                 self.process_ie_result(_fixup(r), download, extra_info)
1167                 for r in ie_result['entries']
1168             ]
1169             return ie_result
1170         else:
1171             raise Exception('Invalid result type: %s' % result_type)
1172
1173     def __process_playlist(self, ie_result, download):
1174         # We process each entry in the playlist
1175         playlist = ie_result.get('title') or ie_result.get('id')
1176         self.to_screen('[download] Downloading playlist: %s' % playlist)
1177
1178         if self.params.get('allow_playlist_files', True):
1179             ie_copy = {
1180                 'playlist': playlist,
1181                 'playlist_id': ie_result.get('id'),
1182                 'playlist_title': ie_result.get('title'),
1183                 'playlist_uploader': ie_result.get('uploader'),
1184                 'playlist_uploader_id': ie_result.get('uploader_id'),
1185                 'playlist_index': 0
1186             }
1187             ie_copy.update(dict(ie_result))
1188
1189             def ensure_dir_exists(path):
1190                 return make_dir(path, self.report_error)
1191
1192             if self.params.get('writeinfojson', False):
1193                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1194                 if not ensure_dir_exists(encodeFilename(infofn)):
1195                     return
1196                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1197                     self.to_screen('[info] Playlist metadata is already present')
1198                 else:
1199                     playlist_info = dict(ie_result)
1200                     # playlist_info['entries'] = list(playlist_info['entries'])  # Entries is a generator which shouldnot be resolved here
1201                     del playlist_info['entries']
1202                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1203                     try:
1204                         write_json_file(self.filter_requested_info(playlist_info), infofn)
1205                     except (OSError, IOError):
1206                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1207
1208             if self.params.get('writedescription', False):
1209                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1210                 if not ensure_dir_exists(encodeFilename(descfn)):
1211                     return
1212                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1213                     self.to_screen('[info] Playlist description is already present')
1214                 elif ie_result.get('description') is None:
1215                     self.report_warning('There\'s no playlist description to write.')
1216                 else:
1217                     try:
1218                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1219                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1220                             descfile.write(ie_result['description'])
1221                     except (OSError, IOError):
1222                         self.report_error('Cannot write playlist description file ' + descfn)
1223                         return
1224
1225         playlist_results = []
1226
1227         playliststart = self.params.get('playliststart', 1) - 1
1228         playlistend = self.params.get('playlistend')
1229         # For backwards compatibility, interpret -1 as whole list
1230         if playlistend == -1:
1231             playlistend = None
1232
1233         playlistitems_str = self.params.get('playlist_items')
1234         playlistitems = None
1235         if playlistitems_str is not None:
1236             def iter_playlistitems(format):
1237                 for string_segment in format.split(','):
1238                     if '-' in string_segment:
1239                         start, end = string_segment.split('-')
1240                         for item in range(int(start), int(end) + 1):
1241                             yield int(item)
1242                     else:
1243                         yield int(string_segment)
1244             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1245
1246         ie_entries = ie_result['entries']
1247
1248         def make_playlistitems_entries(list_ie_entries):
1249             num_entries = len(list_ie_entries)
1250             return [
1251                 list_ie_entries[i - 1] for i in playlistitems
1252                 if -num_entries <= i - 1 < num_entries]
1253
1254         def report_download(num_entries):
1255             self.to_screen(
1256                 '[%s] playlist %s: Downloading %d videos' %
1257                 (ie_result['extractor'], playlist, num_entries))
1258
1259         if isinstance(ie_entries, list):
1260             n_all_entries = len(ie_entries)
1261             if playlistitems:
1262                 entries = make_playlistitems_entries(ie_entries)
1263             else:
1264                 entries = ie_entries[playliststart:playlistend]
1265             n_entries = len(entries)
1266             self.to_screen(
1267                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1268                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1269         elif isinstance(ie_entries, PagedList):
1270             if playlistitems:
1271                 entries = []
1272                 for item in playlistitems:
1273                     entries.extend(ie_entries.getslice(
1274                         item - 1, item
1275                     ))
1276             else:
1277                 entries = ie_entries.getslice(
1278                     playliststart, playlistend)
1279             n_entries = len(entries)
1280             report_download(n_entries)
1281         else:  # iterable
1282             if playlistitems:
1283                 entries = make_playlistitems_entries(list(itertools.islice(
1284                     ie_entries, 0, max(playlistitems))))
1285             else:
1286                 entries = list(itertools.islice(
1287                     ie_entries, playliststart, playlistend))
1288             n_entries = len(entries)
1289             report_download(n_entries)
1290
1291         if self.params.get('playlistreverse', False):
1292             entries = entries[::-1]
1293
1294         if self.params.get('playlistrandom', False):
1295             random.shuffle(entries)
1296
1297         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1298
1299         for i, entry in enumerate(entries, 1):
1300             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1301             # This __x_forwarded_for_ip thing is a bit ugly but requires
1302             # minimal changes
1303             if x_forwarded_for:
1304                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1305             extra = {
1306                 'n_entries': n_entries,
1307                 'playlist': playlist,
1308                 'playlist_id': ie_result.get('id'),
1309                 'playlist_title': ie_result.get('title'),
1310                 'playlist_uploader': ie_result.get('uploader'),
1311                 'playlist_uploader_id': ie_result.get('uploader_id'),
1312                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1313                 'extractor': ie_result['extractor'],
1314                 'webpage_url': ie_result['webpage_url'],
1315                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1316                 'extractor_key': ie_result['extractor_key'],
1317             }
1318
1319             if self._match_entry(entry, incomplete=True) is not None:
1320                 continue
1321
1322             entry_result = self.__process_iterable_entry(entry, download, extra)
1323             # TODO: skip failed (empty) entries?
1324             playlist_results.append(entry_result)
1325         ie_result['entries'] = playlist_results
1326         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1327         return ie_result
1328
1329     @__handle_extraction_exceptions
1330     def __process_iterable_entry(self, entry, download, extra_info):
1331         return self.process_ie_result(
1332             entry, download=download, extra_info=extra_info)
1333
1334     def _build_format_filter(self, filter_spec):
1335         " Returns a function to filter the formats according to the filter_spec "
1336
1337         OPERATORS = {
1338             '<': operator.lt,
1339             '<=': operator.le,
1340             '>': operator.gt,
1341             '>=': operator.ge,
1342             '=': operator.eq,
1343             '!=': operator.ne,
1344         }
1345         operator_rex = re.compile(r'''(?x)\s*
1346             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1347             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1348             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1349             $
1350             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1351         m = operator_rex.search(filter_spec)
1352         if m:
1353             try:
1354                 comparison_value = int(m.group('value'))
1355             except ValueError:
1356                 comparison_value = parse_filesize(m.group('value'))
1357                 if comparison_value is None:
1358                     comparison_value = parse_filesize(m.group('value') + 'B')
1359                 if comparison_value is None:
1360                     raise ValueError(
1361                         'Invalid value %r in format specification %r' % (
1362                             m.group('value'), filter_spec))
1363             op = OPERATORS[m.group('op')]
1364
1365         if not m:
1366             STR_OPERATORS = {
1367                 '=': operator.eq,
1368                 '^=': lambda attr, value: attr.startswith(value),
1369                 '$=': lambda attr, value: attr.endswith(value),
1370                 '*=': lambda attr, value: value in attr,
1371             }
1372             str_operator_rex = re.compile(r'''(?x)
1373                 \s*(?P<key>[a-zA-Z0-9._-]+)
1374                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1375                 \s*(?P<value>[a-zA-Z0-9._-]+)
1376                 \s*$
1377                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1378             m = str_operator_rex.search(filter_spec)
1379             if m:
1380                 comparison_value = m.group('value')
1381                 str_op = STR_OPERATORS[m.group('op')]
1382                 if m.group('negation'):
1383                     op = lambda attr, value: not str_op(attr, value)
1384                 else:
1385                     op = str_op
1386
1387         if not m:
1388             raise ValueError('Invalid filter specification %r' % filter_spec)
1389
1390         def _filter(f):
1391             actual_value = f.get(m.group('key'))
1392             if actual_value is None:
1393                 return m.group('none_inclusive')
1394             return op(actual_value, comparison_value)
1395         return _filter
1396
1397     def _default_format_spec(self, info_dict, download=True):
1398
1399         def can_merge():
1400             merger = FFmpegMergerPP(self)
1401             return merger.available and merger.can_merge()
1402
1403         prefer_best = (
1404             not self.params.get('simulate', False)
1405             and download
1406             and (
1407                 not can_merge()
1408                 or info_dict.get('is_live', False)
1409                 or self.outtmpl_dict['default'] == '-'))
1410
1411         return (
1412             'best/bestvideo+bestaudio'
1413             if prefer_best
1414             else 'bestvideo*+bestaudio/best'
1415             if not self.params.get('allow_multiple_audio_streams', False)
1416             else 'bestvideo+bestaudio/best')
1417
1418     def build_format_selector(self, format_spec):
1419         def syntax_error(note, start):
1420             message = (
1421                 'Invalid format specification: '
1422                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1423             return SyntaxError(message)
1424
1425         PICKFIRST = 'PICKFIRST'
1426         MERGE = 'MERGE'
1427         SINGLE = 'SINGLE'
1428         GROUP = 'GROUP'
1429         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1430
1431         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1432                                   'video': self.params.get('allow_multiple_video_streams', False)}
1433
1434         def _parse_filter(tokens):
1435             filter_parts = []
1436             for type, string, start, _, _ in tokens:
1437                 if type == tokenize.OP and string == ']':
1438                     return ''.join(filter_parts)
1439                 else:
1440                     filter_parts.append(string)
1441
1442         def _remove_unused_ops(tokens):
1443             # Remove operators that we don't use and join them with the surrounding strings
1444             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1445             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1446             last_string, last_start, last_end, last_line = None, None, None, None
1447             for type, string, start, end, line in tokens:
1448                 if type == tokenize.OP and string == '[':
1449                     if last_string:
1450                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1451                         last_string = None
1452                     yield type, string, start, end, line
1453                     # everything inside brackets will be handled by _parse_filter
1454                     for type, string, start, end, line in tokens:
1455                         yield type, string, start, end, line
1456                         if type == tokenize.OP and string == ']':
1457                             break
1458                 elif type == tokenize.OP and string in ALLOWED_OPS:
1459                     if last_string:
1460                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1461                         last_string = None
1462                     yield type, string, start, end, line
1463                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1464                     if not last_string:
1465                         last_string = string
1466                         last_start = start
1467                         last_end = end
1468                     else:
1469                         last_string += string
1470             if last_string:
1471                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1472
1473         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1474             selectors = []
1475             current_selector = None
1476             for type, string, start, _, _ in tokens:
1477                 # ENCODING is only defined in python 3.x
1478                 if type == getattr(tokenize, 'ENCODING', None):
1479                     continue
1480                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1481                     current_selector = FormatSelector(SINGLE, string, [])
1482                 elif type == tokenize.OP:
1483                     if string == ')':
1484                         if not inside_group:
1485                             # ')' will be handled by the parentheses group
1486                             tokens.restore_last_token()
1487                         break
1488                     elif inside_merge and string in ['/', ',']:
1489                         tokens.restore_last_token()
1490                         break
1491                     elif inside_choice and string == ',':
1492                         tokens.restore_last_token()
1493                         break
1494                     elif string == ',':
1495                         if not current_selector:
1496                             raise syntax_error('"," must follow a format selector', start)
1497                         selectors.append(current_selector)
1498                         current_selector = None
1499                     elif string == '/':
1500                         if not current_selector:
1501                             raise syntax_error('"/" must follow a format selector', start)
1502                         first_choice = current_selector
1503                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1504                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1505                     elif string == '[':
1506                         if not current_selector:
1507                             current_selector = FormatSelector(SINGLE, 'best', [])
1508                         format_filter = _parse_filter(tokens)
1509                         current_selector.filters.append(format_filter)
1510                     elif string == '(':
1511                         if current_selector:
1512                             raise syntax_error('Unexpected "("', start)
1513                         group = _parse_format_selection(tokens, inside_group=True)
1514                         current_selector = FormatSelector(GROUP, group, [])
1515                     elif string == '+':
1516                         if not current_selector:
1517                             raise syntax_error('Unexpected "+"', start)
1518                         selector_1 = current_selector
1519                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1520                         if not selector_2:
1521                             raise syntax_error('Expected a selector', start)
1522                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1523                     else:
1524                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1525                 elif type == tokenize.ENDMARKER:
1526                     break
1527             if current_selector:
1528                 selectors.append(current_selector)
1529             return selectors
1530
1531         def _build_selector_function(selector):
1532             if isinstance(selector, list):  # ,
1533                 fs = [_build_selector_function(s) for s in selector]
1534
1535                 def selector_function(ctx):
1536                     for f in fs:
1537                         for format in f(ctx):
1538                             yield format
1539                 return selector_function
1540
1541             elif selector.type == GROUP:  # ()
1542                 selector_function = _build_selector_function(selector.selector)
1543
1544             elif selector.type == PICKFIRST:  # /
1545                 fs = [_build_selector_function(s) for s in selector.selector]
1546
1547                 def selector_function(ctx):
1548                     for f in fs:
1549                         picked_formats = list(f(ctx))
1550                         if picked_formats:
1551                             return picked_formats
1552                     return []
1553
1554             elif selector.type == SINGLE:  # atom
1555                 format_spec = selector.selector if selector.selector is not None else 'best'
1556
1557                 if format_spec == 'all':
1558                     def selector_function(ctx):
1559                         formats = list(ctx['formats'])
1560                         if formats:
1561                             for f in formats:
1562                                 yield f
1563
1564                 else:
1565                     format_fallback = False
1566                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1567                     if format_spec_obj is not None:
1568                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1569                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1570                         not_format_type = 'v' if format_type == 'a' else 'a'
1571                         format_modified = format_spec_obj.group(3) is not None
1572
1573                         format_fallback = not format_type and not format_modified  # for b, w
1574                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1575                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1576                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1577                                     if format_type  # bv, ba, wv, wa
1578                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1579                                     if not format_modified  # b, w
1580                                     else None)  # b*, w*
1581                     else:
1582                         format_idx = -1
1583                         filter_f = ((lambda f: f.get('ext') == format_spec)
1584                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1585                                     else (lambda f: f.get('format_id') == format_spec))  # id
1586
1587                     def selector_function(ctx):
1588                         formats = list(ctx['formats'])
1589                         if not formats:
1590                             return
1591                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1592                         if matches:
1593                             yield matches[format_idx]
1594                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1595                             # for extractors with incomplete formats (audio only (soundcloud)
1596                             # or video only (imgur)) best/worst will fallback to
1597                             # best/worst {video,audio}-only format
1598                             yield formats[format_idx]
1599
1600             elif selector.type == MERGE:        # +
1601                 def _merge(formats_pair):
1602                     format_1, format_2 = formats_pair
1603
1604                     formats_info = []
1605                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1606                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1607
1608                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1609                         get_no_more = {"video": False, "audio": False}
1610                         for (i, fmt_info) in enumerate(formats_info):
1611                             for aud_vid in ["audio", "video"]:
1612                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1613                                     if get_no_more[aud_vid]:
1614                                         formats_info.pop(i)
1615                                     get_no_more[aud_vid] = True
1616
1617                     if len(formats_info) == 1:
1618                         return formats_info[0]
1619
1620                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1621                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1622
1623                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1624                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1625
1626                     output_ext = self.params.get('merge_output_format')
1627                     if not output_ext:
1628                         if the_only_video:
1629                             output_ext = the_only_video['ext']
1630                         elif the_only_audio and not video_fmts:
1631                             output_ext = the_only_audio['ext']
1632                         else:
1633                             output_ext = 'mkv'
1634
1635                     new_dict = {
1636                         'requested_formats': formats_info,
1637                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1638                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1639                         'ext': output_ext,
1640                     }
1641
1642                     if the_only_video:
1643                         new_dict.update({
1644                             'width': the_only_video.get('width'),
1645                             'height': the_only_video.get('height'),
1646                             'resolution': the_only_video.get('resolution'),
1647                             'fps': the_only_video.get('fps'),
1648                             'vcodec': the_only_video.get('vcodec'),
1649                             'vbr': the_only_video.get('vbr'),
1650                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1651                         })
1652
1653                     if the_only_audio:
1654                         new_dict.update({
1655                             'acodec': the_only_audio.get('acodec'),
1656                             'abr': the_only_audio.get('abr'),
1657                         })
1658
1659                     return new_dict
1660
1661                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1662
1663                 def selector_function(ctx):
1664                     for pair in itertools.product(
1665                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1666                         yield _merge(pair)
1667
1668             filters = [self._build_format_filter(f) for f in selector.filters]
1669
1670             def final_selector(ctx):
1671                 ctx_copy = copy.deepcopy(ctx)
1672                 for _filter in filters:
1673                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1674                 return selector_function(ctx_copy)
1675             return final_selector
1676
1677         stream = io.BytesIO(format_spec.encode('utf-8'))
1678         try:
1679             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1680         except tokenize.TokenError:
1681             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1682
1683         class TokenIterator(object):
1684             def __init__(self, tokens):
1685                 self.tokens = tokens
1686                 self.counter = 0
1687
1688             def __iter__(self):
1689                 return self
1690
1691             def __next__(self):
1692                 if self.counter >= len(self.tokens):
1693                     raise StopIteration()
1694                 value = self.tokens[self.counter]
1695                 self.counter += 1
1696                 return value
1697
1698             next = __next__
1699
1700             def restore_last_token(self):
1701                 self.counter -= 1
1702
1703         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1704         return _build_selector_function(parsed_selector)
1705
1706     def _calc_headers(self, info_dict):
1707         res = std_headers.copy()
1708
1709         add_headers = info_dict.get('http_headers')
1710         if add_headers:
1711             res.update(add_headers)
1712
1713         cookies = self._calc_cookies(info_dict)
1714         if cookies:
1715             res['Cookie'] = cookies
1716
1717         if 'X-Forwarded-For' not in res:
1718             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1719             if x_forwarded_for_ip:
1720                 res['X-Forwarded-For'] = x_forwarded_for_ip
1721
1722         return res
1723
1724     def _calc_cookies(self, info_dict):
1725         pr = sanitized_Request(info_dict['url'])
1726         self.cookiejar.add_cookie_header(pr)
1727         return pr.get_header('Cookie')
1728
1729     def process_video_result(self, info_dict, download=True):
1730         assert info_dict.get('_type', 'video') == 'video'
1731
1732         if 'id' not in info_dict:
1733             raise ExtractorError('Missing "id" field in extractor result')
1734         if 'title' not in info_dict:
1735             raise ExtractorError('Missing "title" field in extractor result')
1736
1737         def report_force_conversion(field, field_not, conversion):
1738             self.report_warning(
1739                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1740                 % (field, field_not, conversion))
1741
1742         def sanitize_string_field(info, string_field):
1743             field = info.get(string_field)
1744             if field is None or isinstance(field, compat_str):
1745                 return
1746             report_force_conversion(string_field, 'a string', 'string')
1747             info[string_field] = compat_str(field)
1748
1749         def sanitize_numeric_fields(info):
1750             for numeric_field in self._NUMERIC_FIELDS:
1751                 field = info.get(numeric_field)
1752                 if field is None or isinstance(field, compat_numeric_types):
1753                     continue
1754                 report_force_conversion(numeric_field, 'numeric', 'int')
1755                 info[numeric_field] = int_or_none(field)
1756
1757         sanitize_string_field(info_dict, 'id')
1758         sanitize_numeric_fields(info_dict)
1759
1760         if 'playlist' not in info_dict:
1761             # It isn't part of a playlist
1762             info_dict['playlist'] = None
1763             info_dict['playlist_index'] = None
1764
1765         thumbnails = info_dict.get('thumbnails')
1766         if thumbnails is None:
1767             thumbnail = info_dict.get('thumbnail')
1768             if thumbnail:
1769                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1770         if thumbnails:
1771             thumbnails.sort(key=lambda t: (
1772                 t.get('preference') if t.get('preference') is not None else -1,
1773                 t.get('width') if t.get('width') is not None else -1,
1774                 t.get('height') if t.get('height') is not None else -1,
1775                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1776             for i, t in enumerate(thumbnails):
1777                 t['url'] = sanitize_url(t['url'])
1778                 if t.get('width') and t.get('height'):
1779                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1780                 if t.get('id') is None:
1781                     t['id'] = '%d' % i
1782
1783         if self.params.get('list_thumbnails'):
1784             self.list_thumbnails(info_dict)
1785             return
1786
1787         thumbnail = info_dict.get('thumbnail')
1788         if thumbnail:
1789             info_dict['thumbnail'] = sanitize_url(thumbnail)
1790         elif thumbnails:
1791             info_dict['thumbnail'] = thumbnails[-1]['url']
1792
1793         if 'display_id' not in info_dict and 'id' in info_dict:
1794             info_dict['display_id'] = info_dict['id']
1795
1796         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1797             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1798             # see http://bugs.python.org/issue1646728)
1799             try:
1800                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1801                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1802             except (ValueError, OverflowError, OSError):
1803                 pass
1804
1805         # Auto generate title fields corresponding to the *_number fields when missing
1806         # in order to always have clean titles. This is very common for TV series.
1807         for field in ('chapter', 'season', 'episode'):
1808             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1809                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1810
1811         for cc_kind in ('subtitles', 'automatic_captions'):
1812             cc = info_dict.get(cc_kind)
1813             if cc:
1814                 for _, subtitle in cc.items():
1815                     for subtitle_format in subtitle:
1816                         if subtitle_format.get('url'):
1817                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1818                         if subtitle_format.get('ext') is None:
1819                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1820
1821         automatic_captions = info_dict.get('automatic_captions')
1822         subtitles = info_dict.get('subtitles')
1823
1824         if self.params.get('listsubtitles', False):
1825             if 'automatic_captions' in info_dict:
1826                 self.list_subtitles(
1827                     info_dict['id'], automatic_captions, 'automatic captions')
1828             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1829             return
1830
1831         info_dict['requested_subtitles'] = self.process_subtitles(
1832             info_dict['id'], subtitles, automatic_captions)
1833
1834         # We now pick which formats have to be downloaded
1835         if info_dict.get('formats') is None:
1836             # There's only one format available
1837             formats = [info_dict]
1838         else:
1839             formats = info_dict['formats']
1840
1841         if not formats:
1842             raise ExtractorError('No video formats found!')
1843
1844         def is_wellformed(f):
1845             url = f.get('url')
1846             if not url:
1847                 self.report_warning(
1848                     '"url" field is missing or empty - skipping format, '
1849                     'there is an error in extractor')
1850                 return False
1851             if isinstance(url, bytes):
1852                 sanitize_string_field(f, 'url')
1853             return True
1854
1855         # Filter out malformed formats for better extraction robustness
1856         formats = list(filter(is_wellformed, formats))
1857
1858         formats_dict = {}
1859
1860         # We check that all the formats have the format and format_id fields
1861         for i, format in enumerate(formats):
1862             sanitize_string_field(format, 'format_id')
1863             sanitize_numeric_fields(format)
1864             format['url'] = sanitize_url(format['url'])
1865             if not format.get('format_id'):
1866                 format['format_id'] = compat_str(i)
1867             else:
1868                 # Sanitize format_id from characters used in format selector expression
1869                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1870             format_id = format['format_id']
1871             if format_id not in formats_dict:
1872                 formats_dict[format_id] = []
1873             formats_dict[format_id].append(format)
1874
1875         # Make sure all formats have unique format_id
1876         for format_id, ambiguous_formats in formats_dict.items():
1877             if len(ambiguous_formats) > 1:
1878                 for i, format in enumerate(ambiguous_formats):
1879                     format['format_id'] = '%s-%d' % (format_id, i)
1880
1881         for i, format in enumerate(formats):
1882             if format.get('format') is None:
1883                 format['format'] = '{id} - {res}{note}'.format(
1884                     id=format['format_id'],
1885                     res=self.format_resolution(format),
1886                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1887                 )
1888             # Automatically determine file extension if missing
1889             if format.get('ext') is None:
1890                 format['ext'] = determine_ext(format['url']).lower()
1891             # Automatically determine protocol if missing (useful for format
1892             # selection purposes)
1893             if format.get('protocol') is None:
1894                 format['protocol'] = determine_protocol(format)
1895             # Add HTTP headers, so that external programs can use them from the
1896             # json output
1897             full_format_info = info_dict.copy()
1898             full_format_info.update(format)
1899             format['http_headers'] = self._calc_headers(full_format_info)
1900         # Remove private housekeeping stuff
1901         if '__x_forwarded_for_ip' in info_dict:
1902             del info_dict['__x_forwarded_for_ip']
1903
1904         # TODO Central sorting goes here
1905
1906         if formats[0] is not info_dict:
1907             # only set the 'formats' fields if the original info_dict list them
1908             # otherwise we end up with a circular reference, the first (and unique)
1909             # element in the 'formats' field in info_dict is info_dict itself,
1910             # which can't be exported to json
1911             info_dict['formats'] = formats
1912         if self.params.get('listformats'):
1913             self.list_formats(info_dict)
1914             return
1915
1916         req_format = self.params.get('format')
1917         if req_format is None:
1918             req_format = self._default_format_spec(info_dict, download=download)
1919             if self.params.get('verbose'):
1920                 self.to_screen('[debug] Default format spec: %s' % req_format)
1921
1922         format_selector = self.build_format_selector(req_format)
1923
1924         # While in format selection we may need to have an access to the original
1925         # format set in order to calculate some metrics or do some processing.
1926         # For now we need to be able to guess whether original formats provided
1927         # by extractor are incomplete or not (i.e. whether extractor provides only
1928         # video-only or audio-only formats) for proper formats selection for
1929         # extractors with such incomplete formats (see
1930         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1931         # Since formats may be filtered during format selection and may not match
1932         # the original formats the results may be incorrect. Thus original formats
1933         # or pre-calculated metrics should be passed to format selection routines
1934         # as well.
1935         # We will pass a context object containing all necessary additional data
1936         # instead of just formats.
1937         # This fixes incorrect format selection issue (see
1938         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1939         incomplete_formats = (
1940             # All formats are video-only or
1941             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1942             # all formats are audio-only
1943             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1944
1945         ctx = {
1946             'formats': formats,
1947             'incomplete_formats': incomplete_formats,
1948         }
1949
1950         formats_to_download = list(format_selector(ctx))
1951         if not formats_to_download:
1952             raise ExtractorError('requested format not available',
1953                                  expected=True)
1954
1955         if download:
1956             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1957             if len(formats_to_download) > 1:
1958                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1959             for format in formats_to_download:
1960                 new_info = dict(info_dict)
1961                 new_info.update(format)
1962                 self.process_info(new_info)
1963         # We update the info dict with the best quality format (backwards compatibility)
1964         info_dict.update(formats_to_download[-1])
1965         return info_dict
1966
1967     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1968         """Select the requested subtitles and their format"""
1969         available_subs = {}
1970         if normal_subtitles and self.params.get('writesubtitles'):
1971             available_subs.update(normal_subtitles)
1972         if automatic_captions and self.params.get('writeautomaticsub'):
1973             for lang, cap_info in automatic_captions.items():
1974                 if lang not in available_subs:
1975                     available_subs[lang] = cap_info
1976
1977         if (not self.params.get('writesubtitles') and not
1978                 self.params.get('writeautomaticsub') or not
1979                 available_subs):
1980             return None
1981
1982         if self.params.get('allsubtitles', False):
1983             requested_langs = available_subs.keys()
1984         else:
1985             if self.params.get('subtitleslangs', False):
1986                 requested_langs = self.params.get('subtitleslangs')
1987             elif 'en' in available_subs:
1988                 requested_langs = ['en']
1989             else:
1990                 requested_langs = [list(available_subs.keys())[0]]
1991
1992         formats_query = self.params.get('subtitlesformat', 'best')
1993         formats_preference = formats_query.split('/') if formats_query else []
1994         subs = {}
1995         for lang in requested_langs:
1996             formats = available_subs.get(lang)
1997             if formats is None:
1998                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1999                 continue
2000             for ext in formats_preference:
2001                 if ext == 'best':
2002                     f = formats[-1]
2003                     break
2004                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2005                 if matches:
2006                     f = matches[-1]
2007                     break
2008             else:
2009                 f = formats[-1]
2010                 self.report_warning(
2011                     'No subtitle format found matching "%s" for language %s, '
2012                     'using %s' % (formats_query, lang, f['ext']))
2013             subs[lang] = f
2014         return subs
2015
2016     def __forced_printings(self, info_dict, filename, incomplete):
2017         def print_mandatory(field):
2018             if (self.params.get('force%s' % field, False)
2019                     and (not incomplete or info_dict.get(field) is not None)):
2020                 self.to_stdout(info_dict[field])
2021
2022         def print_optional(field):
2023             if (self.params.get('force%s' % field, False)
2024                     and info_dict.get(field) is not None):
2025                 self.to_stdout(info_dict[field])
2026
2027         print_mandatory('title')
2028         print_mandatory('id')
2029         if self.params.get('forceurl', False) and not incomplete:
2030             if info_dict.get('requested_formats') is not None:
2031                 for f in info_dict['requested_formats']:
2032                     self.to_stdout(f['url'] + f.get('play_path', ''))
2033             else:
2034                 # For RTMP URLs, also include the playpath
2035                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2036         print_optional('thumbnail')
2037         print_optional('description')
2038         if self.params.get('forcefilename', False) and filename is not None:
2039             self.to_stdout(filename)
2040         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2041             self.to_stdout(formatSeconds(info_dict['duration']))
2042         print_mandatory('format')
2043         if self.params.get('forcejson', False):
2044             self.post_extract(info_dict)
2045             self.to_stdout(json.dumps(info_dict))
2046
2047     def process_info(self, info_dict):
2048         """Process a single resolved IE result."""
2049
2050         assert info_dict.get('_type', 'video') == 'video'
2051
2052         info_dict.setdefault('__postprocessors', [])
2053
2054         max_downloads = self.params.get('max_downloads')
2055         if max_downloads is not None:
2056             if self._num_downloads >= int(max_downloads):
2057                 raise MaxDownloadsReached()
2058
2059         # TODO: backward compatibility, to be removed
2060         info_dict['fulltitle'] = info_dict['title']
2061
2062         if 'format' not in info_dict:
2063             info_dict['format'] = info_dict['ext']
2064
2065         if self._match_entry(info_dict, incomplete=False) is not None:
2066             return
2067
2068         self.post_extract(info_dict)
2069         self._num_downloads += 1
2070
2071         info_dict = self.pre_process(info_dict)
2072
2073         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2074         temp_filename = self.prepare_filename(info_dict, 'temp')
2075         files_to_move = {}
2076         skip_dl = self.params.get('skip_download', False)
2077
2078         # Forced printings
2079         self.__forced_printings(info_dict, full_filename, incomplete=False)
2080
2081         if self.params.get('simulate', False):
2082             if self.params.get('force_write_download_archive', False):
2083                 self.record_download_archive(info_dict)
2084
2085             # Do nothing else if in simulate mode
2086             return
2087
2088         if full_filename is None:
2089             return
2090
2091         def ensure_dir_exists(path):
2092             return make_dir(path, self.report_error)
2093
2094         if not ensure_dir_exists(encodeFilename(full_filename)):
2095             return
2096         if not ensure_dir_exists(encodeFilename(temp_filename)):
2097             return
2098
2099         if self.params.get('writedescription', False):
2100             descfn = self.prepare_filename(info_dict, 'description')
2101             if not ensure_dir_exists(encodeFilename(descfn)):
2102                 return
2103             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2104                 self.to_screen('[info] Video description is already present')
2105             elif info_dict.get('description') is None:
2106                 self.report_warning('There\'s no description to write.')
2107             else:
2108                 try:
2109                     self.to_screen('[info] Writing video description to: ' + descfn)
2110                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2111                         descfile.write(info_dict['description'])
2112                 except (OSError, IOError):
2113                     self.report_error('Cannot write description file ' + descfn)
2114                     return
2115
2116         if self.params.get('writeannotations', False):
2117             annofn = self.prepare_filename(info_dict, 'annotation')
2118             if not ensure_dir_exists(encodeFilename(annofn)):
2119                 return
2120             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2121                 self.to_screen('[info] Video annotations are already present')
2122             elif not info_dict.get('annotations'):
2123                 self.report_warning('There are no annotations to write.')
2124             else:
2125                 try:
2126                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2127                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2128                         annofile.write(info_dict['annotations'])
2129                 except (KeyError, TypeError):
2130                     self.report_warning('There are no annotations to write.')
2131                 except (OSError, IOError):
2132                     self.report_error('Cannot write annotations file: ' + annofn)
2133                     return
2134
2135         def dl(name, info, subtitle=False):
2136             fd = get_suitable_downloader(info, self.params)(self, self.params)
2137             for ph in self._progress_hooks:
2138                 fd.add_progress_hook(ph)
2139             if self.params.get('verbose'):
2140                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2141             return fd.download(name, info, subtitle)
2142
2143         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2144                                        self.params.get('writeautomaticsub')])
2145
2146         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2147             # subtitles download errors are already managed as troubles in relevant IE
2148             # that way it will silently go on when used with unsupporting IE
2149             subtitles = info_dict['requested_subtitles']
2150             # ie = self.get_info_extractor(info_dict['extractor_key'])
2151             for sub_lang, sub_info in subtitles.items():
2152                 sub_format = sub_info['ext']
2153                 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2154                 sub_filename = subtitles_filename(
2155                     temp_filename if not skip_dl else sub_fn,
2156                     sub_lang, sub_format, info_dict.get('ext'))
2157                 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
2158                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2159                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2160                     files_to_move[sub_filename] = sub_filename_final
2161                 else:
2162                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2163                     if sub_info.get('data') is not None:
2164                         try:
2165                             # Use newline='' to prevent conversion of newline characters
2166                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2167                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2168                                 subfile.write(sub_info['data'])
2169                             files_to_move[sub_filename] = sub_filename_final
2170                         except (OSError, IOError):
2171                             self.report_error('Cannot write subtitles file ' + sub_filename)
2172                             return
2173                     else:
2174                         try:
2175                             dl(sub_filename, sub_info, subtitle=True)
2176                             files_to_move[sub_filename] = sub_filename_final
2177                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2178                             self.report_warning('Unable to download subtitle for "%s": %s' %
2179                                                 (sub_lang, error_to_compat_str(err)))
2180                             continue
2181
2182         if skip_dl:
2183             if self.params.get('convertsubtitles', False):
2184                 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2185                 filename_real_ext = os.path.splitext(full_filename)[1][1:]
2186                 filename_wo_ext = (
2187                     os.path.splitext(full_filename)[0]
2188                     if filename_real_ext == info_dict['ext']
2189                     else full_filename)
2190                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2191                 # if subconv.available:
2192                 #     info_dict['__postprocessors'].append(subconv)
2193                 if os.path.exists(encodeFilename(afilename)):
2194                     self.to_screen(
2195                         '[download] %s has already been downloaded and '
2196                         'converted' % afilename)
2197                 else:
2198                     try:
2199                         self.post_process(full_filename, info_dict, files_to_move)
2200                     except PostProcessingError as err:
2201                         self.report_error('Postprocessing: %s' % str(err))
2202                         return
2203
2204         if self.params.get('writeinfojson', False):
2205             infofn = self.prepare_filename(info_dict, 'infojson')
2206             if not ensure_dir_exists(encodeFilename(infofn)):
2207                 return
2208             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2209                 self.to_screen('[info] Video metadata is already present')
2210             else:
2211                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2212                 try:
2213                     write_json_file(self.filter_requested_info(info_dict), infofn)
2214                 except (OSError, IOError):
2215                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2216                     return
2217             info_dict['__infojson_filename'] = infofn
2218
2219         thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2220         thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2221         for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2222             thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2223             thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2224             files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
2225
2226         # Write internet shortcut files
2227         url_link = webloc_link = desktop_link = False
2228         if self.params.get('writelink', False):
2229             if sys.platform == "darwin":  # macOS.
2230                 webloc_link = True
2231             elif sys.platform.startswith("linux"):
2232                 desktop_link = True
2233             else:  # if sys.platform in ['win32', 'cygwin']:
2234                 url_link = True
2235         if self.params.get('writeurllink', False):
2236             url_link = True
2237         if self.params.get('writewebloclink', False):
2238             webloc_link = True
2239         if self.params.get('writedesktoplink', False):
2240             desktop_link = True
2241
2242         if url_link or webloc_link or desktop_link:
2243             if 'webpage_url' not in info_dict:
2244                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2245                 return
2246             ascii_url = iri_to_uri(info_dict['webpage_url'])
2247
2248         def _write_link_file(extension, template, newline, embed_filename):
2249             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2250             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2251                 self.to_screen('[info] Internet shortcut is already present')
2252             else:
2253                 try:
2254                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2255                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2256                         template_vars = {'url': ascii_url}
2257                         if embed_filename:
2258                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2259                         linkfile.write(template % template_vars)
2260                 except (OSError, IOError):
2261                     self.report_error('Cannot write internet shortcut ' + linkfn)
2262                     return False
2263             return True
2264
2265         if url_link:
2266             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2267                 return
2268         if webloc_link:
2269             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2270                 return
2271         if desktop_link:
2272             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2273                 return
2274
2275         # Download
2276         must_record_download_archive = False
2277         if not skip_dl:
2278             try:
2279
2280                 def existing_file(*filepaths):
2281                     ext = info_dict.get('ext')
2282                     final_ext = self.params.get('final_ext', ext)
2283                     existing_files = []
2284                     for file in orderedSet(filepaths):
2285                         if final_ext != ext:
2286                             converted = replace_extension(file, final_ext, ext)
2287                             if os.path.exists(encodeFilename(converted)):
2288                                 existing_files.append(converted)
2289                         if os.path.exists(encodeFilename(file)):
2290                             existing_files.append(file)
2291
2292                     if not existing_files or self.params.get('overwrites', False):
2293                         for file in orderedSet(existing_files):
2294                             self.report_file_delete(file)
2295                             os.remove(encodeFilename(file))
2296                         return None
2297
2298                     self.report_file_already_downloaded(existing_files[0])
2299                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2300                     return existing_files[0]
2301
2302                 success = True
2303                 if info_dict.get('requested_formats') is not None:
2304                     downloaded = []
2305                     merger = FFmpegMergerPP(self)
2306                     if self.params.get('allow_unplayable_formats'):
2307                         self.report_warning(
2308                             'You have requested merging of multiple formats '
2309                             'while also allowing unplayable formats to be downloaded. '
2310                             'The formats won\'t be merged to prevent data corruption.')
2311                     elif not merger.available:
2312                         self.report_warning(
2313                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2314                             'The formats won\'t be merged.')
2315
2316                     def compatible_formats(formats):
2317                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2318                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2319                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2320                         if len(video_formats) > 2 or len(audio_formats) > 2:
2321                             return False
2322
2323                         # Check extension
2324                         exts = set(format.get('ext') for format in formats)
2325                         COMPATIBLE_EXTS = (
2326                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2327                             set(('webm',)),
2328                         )
2329                         for ext_sets in COMPATIBLE_EXTS:
2330                             if ext_sets.issuperset(exts):
2331                                 return True
2332                         # TODO: Check acodec/vcodec
2333                         return False
2334
2335                     requested_formats = info_dict['requested_formats']
2336                     old_ext = info_dict['ext']
2337                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2338                         info_dict['ext'] = 'mkv'
2339                         self.report_warning(
2340                             'Requested formats are incompatible for merge and will be merged into mkv.')
2341
2342                     def correct_ext(filename):
2343                         filename_real_ext = os.path.splitext(filename)[1][1:]
2344                         filename_wo_ext = (
2345                             os.path.splitext(filename)[0]
2346                             if filename_real_ext == old_ext
2347                             else filename)
2348                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2349
2350                     # Ensure filename always has a correct extension for successful merge
2351                     full_filename = correct_ext(full_filename)
2352                     temp_filename = correct_ext(temp_filename)
2353                     dl_filename = existing_file(full_filename, temp_filename)
2354                     info_dict['__real_download'] = False
2355                     if dl_filename is None:
2356                         for f in requested_formats:
2357                             new_info = dict(info_dict)
2358                             new_info.update(f)
2359                             fname = prepend_extension(
2360                                 self.prepare_filename(new_info, 'temp'),
2361                                 'f%s' % f['format_id'], new_info['ext'])
2362                             if not ensure_dir_exists(fname):
2363                                 return
2364                             downloaded.append(fname)
2365                             partial_success, real_download = dl(fname, new_info)
2366                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2367                             success = success and partial_success
2368                         if merger.available and not self.params.get('allow_unplayable_formats'):
2369                             info_dict['__postprocessors'].append(merger)
2370                             info_dict['__files_to_merge'] = downloaded
2371                             # Even if there were no downloads, it is being merged only now
2372                             info_dict['__real_download'] = True
2373                         else:
2374                             for file in downloaded:
2375                                 files_to_move[file] = None
2376                 else:
2377                     # Just a single file
2378                     dl_filename = existing_file(full_filename, temp_filename)
2379                     if dl_filename is None:
2380                         success, real_download = dl(temp_filename, info_dict)
2381                         info_dict['__real_download'] = real_download
2382
2383                 dl_filename = dl_filename or temp_filename
2384                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2385
2386             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2387                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2388                 return
2389             except (OSError, IOError) as err:
2390                 raise UnavailableVideoError(err)
2391             except (ContentTooShortError, ) as err:
2392                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2393                 return
2394
2395             if success and full_filename != '-':
2396                 # Fixup content
2397                 fixup_policy = self.params.get('fixup')
2398                 if fixup_policy is None:
2399                     fixup_policy = 'detect_or_warn'
2400
2401                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2402
2403                 stretched_ratio = info_dict.get('stretched_ratio')
2404                 if stretched_ratio is not None and stretched_ratio != 1:
2405                     if fixup_policy == 'warn':
2406                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2407                             info_dict['id'], stretched_ratio))
2408                     elif fixup_policy == 'detect_or_warn':
2409                         stretched_pp = FFmpegFixupStretchedPP(self)
2410                         if stretched_pp.available:
2411                             info_dict['__postprocessors'].append(stretched_pp)
2412                         else:
2413                             self.report_warning(
2414                                 '%s: Non-uniform pixel ratio (%s). %s'
2415                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2416                     else:
2417                         assert fixup_policy in ('ignore', 'never')
2418
2419                 if (info_dict.get('requested_formats') is None
2420                         and info_dict.get('container') == 'm4a_dash'
2421                         and info_dict.get('ext') == 'm4a'):
2422                     if fixup_policy == 'warn':
2423                         self.report_warning(
2424                             '%s: writing DASH m4a. '
2425                             'Only some players support this container.'
2426                             % info_dict['id'])
2427                     elif fixup_policy == 'detect_or_warn':
2428                         fixup_pp = FFmpegFixupM4aPP(self)
2429                         if fixup_pp.available:
2430                             info_dict['__postprocessors'].append(fixup_pp)
2431                         else:
2432                             self.report_warning(
2433                                 '%s: writing DASH m4a. '
2434                                 'Only some players support this container. %s'
2435                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2436                     else:
2437                         assert fixup_policy in ('ignore', 'never')
2438
2439                 if (info_dict.get('protocol') == 'm3u8_native'
2440                         or info_dict.get('protocol') == 'm3u8'
2441                         and self.params.get('hls_prefer_native')):
2442                     if fixup_policy == 'warn':
2443                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2444                             info_dict['id']))
2445                     elif fixup_policy == 'detect_or_warn':
2446                         fixup_pp = FFmpegFixupM3u8PP(self)
2447                         if fixup_pp.available:
2448                             info_dict['__postprocessors'].append(fixup_pp)
2449                         else:
2450                             self.report_warning(
2451                                 '%s: malformed AAC bitstream detected. %s'
2452                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2453                     else:
2454                         assert fixup_policy in ('ignore', 'never')
2455
2456                 try:
2457                     self.post_process(dl_filename, info_dict, files_to_move)
2458                 except PostProcessingError as err:
2459                     self.report_error('Postprocessing: %s' % str(err))
2460                     return
2461                 try:
2462                     for ph in self._post_hooks:
2463                         ph(full_filename)
2464                 except Exception as err:
2465                     self.report_error('post hooks: %s' % str(err))
2466                     return
2467                 must_record_download_archive = True
2468
2469         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2470             self.record_download_archive(info_dict)
2471         max_downloads = self.params.get('max_downloads')
2472         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2473             raise MaxDownloadsReached()
2474
2475     def download(self, url_list):
2476         """Download a given list of URLs."""
2477         outtmpl = self.outtmpl_dict['default']
2478         if (len(url_list) > 1
2479                 and outtmpl != '-'
2480                 and '%' not in outtmpl
2481                 and self.params.get('max_downloads') != 1):
2482             raise SameFileError(outtmpl)
2483
2484         for url in url_list:
2485             try:
2486                 # It also downloads the videos
2487                 res = self.extract_info(
2488                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2489             except UnavailableVideoError:
2490                 self.report_error('unable to download video')
2491             except MaxDownloadsReached:
2492                 self.to_screen('[info] Maximum number of downloaded files reached')
2493                 raise
2494             except ExistingVideoReached:
2495                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2496                 raise
2497             except RejectedVideoReached:
2498                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2499                 raise
2500             else:
2501                 if self.params.get('dump_single_json', False):
2502                     self.post_extract(res)
2503                     self.to_stdout(json.dumps(res))
2504
2505         return self._download_retcode
2506
2507     def download_with_info_file(self, info_filename):
2508         with contextlib.closing(fileinput.FileInput(
2509                 [info_filename], mode='r',
2510                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2511             # FileInput doesn't have a read method, we can't call json.load
2512             info = self.filter_requested_info(json.loads('\n'.join(f)))
2513         try:
2514             self.process_ie_result(info, download=True)
2515         except DownloadError:
2516             webpage_url = info.get('webpage_url')
2517             if webpage_url is not None:
2518                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2519                 return self.download([webpage_url])
2520             else:
2521                 raise
2522         return self._download_retcode
2523
2524     @staticmethod
2525     def filter_requested_info(info_dict):
2526         fields_to_remove = ('requested_formats', 'requested_subtitles')
2527         return dict(
2528             (k, v) for k, v in info_dict.items()
2529             if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
2530
2531     def run_pp(self, pp, infodict, files_to_move={}):
2532         files_to_delete = []
2533         files_to_delete, infodict = pp.run(infodict)
2534         if not files_to_delete:
2535             return files_to_move, infodict
2536
2537         if self.params.get('keepvideo', False):
2538             for f in files_to_delete:
2539                 files_to_move.setdefault(f, '')
2540         else:
2541             for old_filename in set(files_to_delete):
2542                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2543                 try:
2544                     os.remove(encodeFilename(old_filename))
2545                 except (IOError, OSError):
2546                     self.report_warning('Unable to remove downloaded original file')
2547                 if old_filename in files_to_move:
2548                     del files_to_move[old_filename]
2549         return files_to_move, infodict
2550
2551     @staticmethod
2552     def post_extract(info_dict):
2553         def actual_post_extract(info_dict):
2554             if info_dict.get('_type') in ('playlist', 'multi_video'):
2555                 for video_dict in info_dict.get('entries', {}):
2556                     actual_post_extract(video_dict)
2557                 return
2558
2559             if '__post_extractor' not in info_dict:
2560                 return
2561             post_extractor = info_dict['__post_extractor']
2562             if post_extractor:
2563                 info_dict.update(post_extractor().items())
2564             del info_dict['__post_extractor']
2565             return
2566
2567         actual_post_extract(info_dict)
2568
2569     def pre_process(self, ie_info):
2570         info = dict(ie_info)
2571         for pp in self._pps['beforedl']:
2572             info = self.run_pp(pp, info)[1]
2573         return info
2574
2575     def post_process(self, filename, ie_info, files_to_move={}):
2576         """Run all the postprocessors on the given file."""
2577         info = dict(ie_info)
2578         info['filepath'] = filename
2579         info['__files_to_move'] = {}
2580
2581         for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2582             files_to_move, info = self.run_pp(pp, info, files_to_move)
2583         info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
2584         for pp in self._pps['aftermove']:
2585             info = self.run_pp(pp, info, {})[1]
2586
2587     def _make_archive_id(self, info_dict):
2588         video_id = info_dict.get('id')
2589         if not video_id:
2590             return
2591         # Future-proof against any change in case
2592         # and backwards compatibility with prior versions
2593         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2594         if extractor is None:
2595             url = str_or_none(info_dict.get('url'))
2596             if not url:
2597                 return
2598             # Try to find matching extractor for the URL and take its ie_key
2599             for ie in self._ies:
2600                 if ie.suitable(url):
2601                     extractor = ie.ie_key()
2602                     break
2603             else:
2604                 return
2605         return '%s %s' % (extractor.lower(), video_id)
2606
2607     def in_download_archive(self, info_dict):
2608         fn = self.params.get('download_archive')
2609         if fn is None:
2610             return False
2611
2612         vid_id = self._make_archive_id(info_dict)
2613         if not vid_id:
2614             return False  # Incomplete video information
2615
2616         return vid_id in self.archive
2617
2618     def record_download_archive(self, info_dict):
2619         fn = self.params.get('download_archive')
2620         if fn is None:
2621             return
2622         vid_id = self._make_archive_id(info_dict)
2623         assert vid_id
2624         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2625             archive_file.write(vid_id + '\n')
2626         self.archive.add(vid_id)
2627
2628     @staticmethod
2629     def format_resolution(format, default='unknown'):
2630         if format.get('vcodec') == 'none':
2631             return 'audio only'
2632         if format.get('resolution') is not None:
2633             return format['resolution']
2634         if format.get('height') is not None:
2635             if format.get('width') is not None:
2636                 res = '%sx%s' % (format['width'], format['height'])
2637             else:
2638                 res = '%sp' % format['height']
2639         elif format.get('width') is not None:
2640             res = '%dx?' % format['width']
2641         else:
2642             res = default
2643         return res
2644
2645     def _format_note(self, fdict):
2646         res = ''
2647         if fdict.get('ext') in ['f4f', 'f4m']:
2648             res += '(unsupported) '
2649         if fdict.get('language'):
2650             if res:
2651                 res += ' '
2652             res += '[%s] ' % fdict['language']
2653         if fdict.get('format_note') is not None:
2654             res += fdict['format_note'] + ' '
2655         if fdict.get('tbr') is not None:
2656             res += '%4dk ' % fdict['tbr']
2657         if fdict.get('container') is not None:
2658             if res:
2659                 res += ', '
2660             res += '%s container' % fdict['container']
2661         if (fdict.get('vcodec') is not None
2662                 and fdict.get('vcodec') != 'none'):
2663             if res:
2664                 res += ', '
2665             res += fdict['vcodec']
2666             if fdict.get('vbr') is not None:
2667                 res += '@'
2668         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2669             res += 'video@'
2670         if fdict.get('vbr') is not None:
2671             res += '%4dk' % fdict['vbr']
2672         if fdict.get('fps') is not None:
2673             if res:
2674                 res += ', '
2675             res += '%sfps' % fdict['fps']
2676         if fdict.get('acodec') is not None:
2677             if res:
2678                 res += ', '
2679             if fdict['acodec'] == 'none':
2680                 res += 'video only'
2681             else:
2682                 res += '%-5s' % fdict['acodec']
2683         elif fdict.get('abr') is not None:
2684             if res:
2685                 res += ', '
2686             res += 'audio'
2687         if fdict.get('abr') is not None:
2688             res += '@%3dk' % fdict['abr']
2689         if fdict.get('asr') is not None:
2690             res += ' (%5dHz)' % fdict['asr']
2691         if fdict.get('filesize') is not None:
2692             if res:
2693                 res += ', '
2694             res += format_bytes(fdict['filesize'])
2695         elif fdict.get('filesize_approx') is not None:
2696             if res:
2697                 res += ', '
2698             res += '~' + format_bytes(fdict['filesize_approx'])
2699         return res
2700
2701     def _format_note_table(self, f):
2702         def join_fields(*vargs):
2703             return ', '.join((val for val in vargs if val != ''))
2704
2705         return join_fields(
2706             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2707             format_field(f, 'language', '[%s]'),
2708             format_field(f, 'format_note'),
2709             format_field(f, 'container', ignore=(None, f.get('ext'))),
2710             format_field(f, 'asr', '%5dHz'))
2711
2712     def list_formats(self, info_dict):
2713         formats = info_dict.get('formats', [info_dict])
2714         new_format = self.params.get('listformats_table', False)
2715         if new_format:
2716             table = [
2717                 [
2718                     format_field(f, 'format_id'),
2719                     format_field(f, 'ext'),
2720                     self.format_resolution(f),
2721                     format_field(f, 'fps', '%d'),
2722                     '|',
2723                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2724                     format_field(f, 'tbr', '%4dk'),
2725                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
2726                     '|',
2727                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2728                     format_field(f, 'vbr', '%4dk'),
2729                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2730                     format_field(f, 'abr', '%3dk'),
2731                     format_field(f, 'asr', '%5dHz'),
2732                     self._format_note_table(f)]
2733                 for f in formats
2734                 if f.get('preference') is None or f['preference'] >= -1000]
2735             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2736                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2737         else:
2738             table = [
2739                 [
2740                     format_field(f, 'format_id'),
2741                     format_field(f, 'ext'),
2742                     self.format_resolution(f),
2743                     self._format_note(f)]
2744                 for f in formats
2745                 if f.get('preference') is None or f['preference'] >= -1000]
2746             header_line = ['format code', 'extension', 'resolution', 'note']
2747
2748         self.to_screen(
2749             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2750                 header_line,
2751                 table,
2752                 delim=new_format,
2753                 extraGap=(0 if new_format else 1),
2754                 hideEmpty=new_format)))
2755
2756     def list_thumbnails(self, info_dict):
2757         thumbnails = info_dict.get('thumbnails')
2758         if not thumbnails:
2759             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2760             return
2761
2762         self.to_screen(
2763             '[info] Thumbnails for %s:' % info_dict['id'])
2764         self.to_screen(render_table(
2765             ['ID', 'width', 'height', 'URL'],
2766             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2767
2768     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2769         if not subtitles:
2770             self.to_screen('%s has no %s' % (video_id, name))
2771             return
2772         self.to_screen(
2773             'Available %s for %s:' % (name, video_id))
2774         self.to_screen(render_table(
2775             ['Language', 'formats'],
2776             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2777                 for lang, formats in subtitles.items()]))
2778
2779     def urlopen(self, req):
2780         """ Start an HTTP download """
2781         if isinstance(req, compat_basestring):
2782             req = sanitized_Request(req)
2783         return self._opener.open(req, timeout=self._socket_timeout)
2784
2785     def print_debug_header(self):
2786         if not self.params.get('verbose'):
2787             return
2788
2789         if type('') is not compat_str:
2790             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2791             self.report_warning(
2792                 'Your Python is broken! Update to a newer and supported version')
2793
2794         stdout_encoding = getattr(
2795             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2796         encoding_str = (
2797             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2798                 locale.getpreferredencoding(),
2799                 sys.getfilesystemencoding(),
2800                 stdout_encoding,
2801                 self.get_encoding()))
2802         write_string(encoding_str, encoding=None)
2803
2804         source = (
2805             '(exe)' if hasattr(sys, 'frozen')
2806             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2807             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2808             else '')
2809         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
2810         if _LAZY_LOADER:
2811             self._write_string('[debug] Lazy loading extractors enabled\n')
2812         if _PLUGIN_CLASSES:
2813             self._write_string(
2814                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2815         try:
2816             sp = subprocess.Popen(
2817                 ['git', 'rev-parse', '--short', 'HEAD'],
2818                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2819                 cwd=os.path.dirname(os.path.abspath(__file__)))
2820             out, err = process_communicate_or_kill(sp)
2821             out = out.decode().strip()
2822             if re.match('[0-9a-f]+', out):
2823                 self._write_string('[debug] Git HEAD: %s\n' % out)
2824         except Exception:
2825             try:
2826                 sys.exc_clear()
2827             except Exception:
2828                 pass
2829
2830         def python_implementation():
2831             impl_name = platform.python_implementation()
2832             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2833                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2834             return impl_name
2835
2836         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2837             platform.python_version(),
2838             python_implementation(),
2839             platform.architecture()[0],
2840             platform_name()))
2841
2842         exe_versions = FFmpegPostProcessor.get_versions(self)
2843         exe_versions['rtmpdump'] = rtmpdump_version()
2844         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2845         exe_str = ', '.join(
2846             '%s %s' % (exe, v)
2847             for exe, v in sorted(exe_versions.items())
2848             if v
2849         )
2850         if not exe_str:
2851             exe_str = 'none'
2852         self._write_string('[debug] exe versions: %s\n' % exe_str)
2853
2854         proxy_map = {}
2855         for handler in self._opener.handlers:
2856             if hasattr(handler, 'proxies'):
2857                 proxy_map.update(handler.proxies)
2858         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2859
2860         if self.params.get('call_home', False):
2861             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2862             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2863             return
2864             latest_version = self.urlopen(
2865                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2866             if version_tuple(latest_version) > version_tuple(__version__):
2867                 self.report_warning(
2868                     'You are using an outdated version (newest version: %s)! '
2869                     'See https://yt-dl.org/update if you need help updating.' %
2870                     latest_version)
2871
2872     def _setup_opener(self):
2873         timeout_val = self.params.get('socket_timeout')
2874         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2875
2876         opts_cookiefile = self.params.get('cookiefile')
2877         opts_proxy = self.params.get('proxy')
2878
2879         if opts_cookiefile is None:
2880             self.cookiejar = compat_cookiejar.CookieJar()
2881         else:
2882             opts_cookiefile = expand_path(opts_cookiefile)
2883             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2884             if os.access(opts_cookiefile, os.R_OK):
2885                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2886
2887         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2888         if opts_proxy is not None:
2889             if opts_proxy == '':
2890                 proxies = {}
2891             else:
2892                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2893         else:
2894             proxies = compat_urllib_request.getproxies()
2895             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2896             if 'http' in proxies and 'https' not in proxies:
2897                 proxies['https'] = proxies['http']
2898         proxy_handler = PerRequestProxyHandler(proxies)
2899
2900         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2901         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2902         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2903         redirect_handler = YoutubeDLRedirectHandler()
2904         data_handler = compat_urllib_request_DataHandler()
2905
2906         # When passing our own FileHandler instance, build_opener won't add the
2907         # default FileHandler and allows us to disable the file protocol, which
2908         # can be used for malicious purposes (see
2909         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2910         file_handler = compat_urllib_request.FileHandler()
2911
2912         def file_open(*args, **kwargs):
2913             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
2914         file_handler.file_open = file_open
2915
2916         opener = compat_urllib_request.build_opener(
2917             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2918
2919         # Delete the default user-agent header, which would otherwise apply in
2920         # cases where our custom HTTP handler doesn't come into play
2921         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2922         opener.addheaders = []
2923         self._opener = opener
2924
2925     def encode(self, s):
2926         if isinstance(s, bytes):
2927             return s  # Already encoded
2928
2929         try:
2930             return s.encode(self.get_encoding())
2931         except UnicodeEncodeError as err:
2932             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2933             raise
2934
2935     def get_encoding(self):
2936         encoding = self.params.get('encoding')
2937         if encoding is None:
2938             encoding = preferredencoding()
2939         return encoding
2940
2941     def _write_thumbnails(self, info_dict, filename):  # return the extensions
2942         write_all = self.params.get('write_all_thumbnails', False)
2943         thumbnails = []
2944         if write_all or self.params.get('writethumbnail', False):
2945             thumbnails = info_dict.get('thumbnails') or []
2946         multiple = write_all and len(thumbnails) > 1
2947
2948         ret = []
2949         for t in thumbnails[::1 if write_all else -1]:
2950             thumb_ext = determine_ext(t['url'], 'jpg')
2951             suffix = '%s.' % t['id'] if multiple else ''
2952             thumb_display_id = '%s ' % t['id'] if multiple else ''
2953             t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
2954
2955             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2956                 ret.append(suffix + thumb_ext)
2957                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2958                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2959             else:
2960                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2961                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2962                 try:
2963                     uf = self.urlopen(t['url'])
2964                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2965                         shutil.copyfileobj(uf, thumbf)
2966                     ret.append(suffix + thumb_ext)
2967                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2968                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2969                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2970                     self.report_warning('Unable to download thumbnail "%s": %s' %
2971                                         (t['url'], error_to_compat_str(err)))
2972             if ret and not write_all:
2973                 break
2974         return ret