yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_http_client,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     OUTTMPL_TYPES,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     int_or_none,
  72     iri_to_uri,
  73     ISO3166Utils,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     orderedSet,
  79     PagedList,
  80     parse_filesize,
  81     PerRequestProxyHandler,
  82     platform_name,
  83     PostProcessingError,
  84     preferredencoding,
  85     prepend_extension,
  86     register_socks_protocols,
  87     render_table,
  88     replace_extension,
  89     RejectedVideoReached,
  90     SameFileError,
  91     sanitize_filename,
  92     sanitize_path,
  93     sanitize_url,
  94     sanitized_Request,
  95     std_headers,
  96     str_or_none,
  97     strftime_or_none,
  98     subtitles_filename,
  99     to_high_limit_path,
 100     UnavailableVideoError,
 101     url_basename,
 102     version_tuple,
 103     write_json_file,
 104     write_string,
 105     YoutubeDLCookieJar,
 106     YoutubeDLCookieProcessor,
 107     YoutubeDLHandler,
 108     YoutubeDLRedirectHandler,
 109     process_communicate_or_kill,
 110 )
 111 from .cache import Cache
 112 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
 113 from .extractor.openload import PhantomJSwrapper
 114 from .downloader import get_suitable_downloader
 115 from .downloader.rtmp import rtmpdump_version
 116 from .postprocessor import (
 117     FFmpegFixupM3u8PP,
 118     FFmpegFixupM4aPP,
 119     FFmpegFixupStretchedPP,
 120     FFmpegMergerPP,
 121     FFmpegPostProcessor,
 122     # FFmpegSubtitlesConvertorPP,
 123     get_postprocessor,
 124     MoveFilesAfterDownloadPP,
 125 )
 126 from .version import __version__
 127
 128 if compat_os_name == 'nt':
 129     import ctypes
 130
 131
 132 class YoutubeDL(object):
 133     """YoutubeDL class.
 134
 135     YoutubeDL objects are the ones responsible of downloading the
 136     actual video file and writing it to disk if the user has requested
 137     it, among some other tasks. In most cases there should be one per
 138     program. As, given a video URL, the downloader doesn't know how to
 139     extract all the needed information, task that InfoExtractors do, it
 140     has to pass the URL to one of them.
 141
 142     For this, YoutubeDL objects have a method that allows
 143     InfoExtractors to be registered in a given order. When it is passed
 144     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 145     finds that reports being able to handle it. The InfoExtractor extracts
 146     all the information about the video or videos the URL refers to, and
 147     YoutubeDL process the extracted information, possibly using a File
 148     Downloader to download the video.
 149
 150     YoutubeDL objects accept a lot of parameters. In order not to saturate
 151     the object constructor with arguments, it receives a dictionary of
 152     options instead. These options are available through the params
 153     attribute for the InfoExtractors to use. The YoutubeDL also
 154     registers itself as the downloader in charge for the InfoExtractors
 155     that are added to it, so this is a "mutual registration".
 156
 157     Available options:
 158
 159     username:          Username for authentication purposes.
 160     password:          Password for authentication purposes.
 161     videopassword:     Password for accessing a video.
 162     ap_mso:            Adobe Pass multiple-system operator identifier.
 163     ap_username:       Multiple-system operator account username.
 164     ap_password:       Multiple-system operator account password.
 165     usenetrc:          Use netrc for authentication instead.
 166     verbose:           Print additional info to stdout.
 167     quiet:             Do not print messages to stdout.
 168     no_warnings:       Do not print out anything for warnings.
 169     forceurl:          Force printing final URL.
 170     forcetitle:        Force printing title.
 171     forceid:           Force printing ID.
 172     forcethumbnail:    Force printing thumbnail URL.
 173     forcedescription:  Force printing description.
 174     forcefilename:     Force printing final filename.
 175     forceduration:     Force printing duration.
 176     forcejson:         Force printing info_dict as JSON.
 177     dump_single_json:  Force printing the info_dict of the whole playlist
 178                        (or video) as a single JSON line.
 179     force_write_download_archive: Force writing download archive regardless
 180                        of 'skip_download' or 'simulate'.
 181     simulate:          Do not download the video files.
 182     format:            Video format code. see "FORMAT SELECTION" for more details.
 183     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 184     format_sort:       How to sort the video formats. see "Sorting Formats"
 185                        for more details.
 186     format_sort_force: Force the given format_sort. see "Sorting Formats"
 187                        for more details.
 188     allow_multiple_video_streams:   Allow multiple video streams to be merged
 189                        into a single file
 190     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 191                        into a single file
 192     paths:             Dictionary of output paths. The allowed keys are 'home'
 193                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 194     outtmpl:           Dictionary of templates for output names. Allowed keys
 195                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 196                        A string a also accepted for backward compatibility
 197     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 198     restrictfilenames: Do not allow "&" and spaces in file names
 199     trim_file_name:    Limit length of filename (extension excluded)
 200     windowsfilenames:  Force the filenames to be windows compatible
 201     ignoreerrors:      Do not stop on download errors
 202                        (Default True when running yt-dlp,
 203                        but False when directly accessing YoutubeDL class)
 204     force_generic_extractor: Force downloader to use the generic extractor
 205     overwrites:        Overwrite all video and metadata files if True,
 206                        overwrite only non-video files if None
 207                        and don't overwrite any file if False
 208     playliststart:     Playlist item to start at.
 209     playlistend:       Playlist item to end at.
 210     playlist_items:    Specific indices of playlist to download.
 211     playlistreverse:   Download playlist items in reverse order.
 212     playlistrandom:    Download playlist items in random order.
 213     matchtitle:        Download only matching titles.
 214     rejecttitle:       Reject downloads for matching titles.
 215     logger:            Log messages to a logging.Logger instance.
 216     logtostderr:       Log messages to stderr instead of stdout.
 217     writedescription:  Write the video description to a .description file
 218     writeinfojson:     Write the video description to a .info.json file
 219     writecomments:     Extract video comments. This will not be written to disk
 220                        unless writeinfojson is also given
 221     writeannotations:  Write the video annotations to a .annotations.xml file
 222     writethumbnail:    Write the thumbnail image to a file
 223     allow_playlist_files: Whether to write playlists' description, infojson etc
 224                        also to disk when using the 'write*' options
 225     write_all_thumbnails:  Write all thumbnail formats to files
 226     writelink:         Write an internet shortcut file, depending on the
 227                        current platform (.url/.webloc/.desktop)
 228     writeurllink:      Write a Windows internet shortcut file (.url)
 229     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 230     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 231     writesubtitles:    Write the video subtitles to a file
 232     writeautomaticsub: Write the automatically generated subtitles to a file
 233     allsubtitles:      Downloads all the subtitles of the video
 234                        (requires writesubtitles or writeautomaticsub)
 235     listsubtitles:     Lists all available subtitles for the video
 236     subtitlesformat:   The format code for subtitles
 237     subtitleslangs:    List of languages of the subtitles to download
 238     keepvideo:         Keep the video file after post-processing
 239     daterange:         A DateRange object, download only if the upload_date is in the range.
 240     skip_download:     Skip the actual download of the video file
 241     cachedir:          Location of the cache files in the filesystem.
 242                        False to disable filesystem cache.
 243     noplaylist:        Download single video instead of a playlist if in doubt.
 244     age_limit:         An integer representing the user's age in years.
 245                        Unsuitable videos for the given age are skipped.
 246     min_views:         An integer representing the minimum view count the video
 247                        must have in order to not be skipped.
 248                        Videos without view count information are always
 249                        downloaded. None for no limit.
 250     max_views:         An integer representing the maximum view count.
 251                        Videos that are more popular than that are not
 252                        downloaded.
 253                        Videos without view count information are always
 254                        downloaded. None for no limit.
 255     download_archive:  File name of a file where all downloads are recorded.
 256                        Videos already present in the file are not downloaded
 257                        again.
 258     break_on_existing: Stop the download process after attempting to download a
 259                        file that is in the archive.
 260     break_on_reject:   Stop the download process when encountering a video that
 261                        has been filtered out.
 262     cookiefile:        File name where cookies should be read from and dumped to
 263     nocheckcertificate:Do not verify SSL certificates
 264     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 265                        At the moment, this is only supported by YouTube.
 266     proxy:             URL of the proxy server to use
 267     geo_verification_proxy:  URL of the proxy to use for IP address verification
 268                        on geo-restricted sites.
 269     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 270     bidi_workaround:   Work around buggy terminals without bidirectional text
 271                        support, using fridibi
 272     debug_printtraffic:Print out sent and received HTTP traffic
 273     include_ads:       Download ads as well
 274     default_search:    Prepend this string if an input url is not valid.
 275                        'auto' for elaborate guessing
 276     encoding:          Use this encoding instead of the system-specified.
 277     extract_flat:      Do not resolve URLs, return the immediate result.
 278                        Pass in 'in_playlist' to only show this behavior for
 279                        playlist items.
 280     postprocessors:    A list of dictionaries, each with an entry
 281                        * key:  The name of the postprocessor. See
 282                                yt_dlp/postprocessor/__init__.py for a list.
 283                        * _after_move: Optional. If True, run this post_processor
 284                                after 'MoveFilesAfterDownload'
 285                        as well as any further keyword arguments for the
 286                        postprocessor.
 287     post_hooks:        A list of functions that get called as the final step
 288                        for each video file, after all postprocessors have been
 289                        called. The filename will be passed as the only argument.
 290     progress_hooks:    A list of functions that get called on download
 291                        progress, with a dictionary with the entries
 292                        * status: One of "downloading", "error", or "finished".
 293                                  Check this first and ignore unknown values.
 294
 295                        If status is one of "downloading", or "finished", the
 296                        following properties may also be present:
 297                        * filename: The final filename (always present)
 298                        * tmpfilename: The filename we're currently writing to
 299                        * downloaded_bytes: Bytes on disk
 300                        * total_bytes: Size of the whole file, None if unknown
 301                        * total_bytes_estimate: Guess of the eventual file size,
 302                                                None if unavailable.
 303                        * elapsed: The number of seconds since download started.
 304                        * eta: The estimated time in seconds, None if unknown
 305                        * speed: The download speed in bytes/second, None if
 306                                 unknown
 307                        * fragment_index: The counter of the currently
 308                                          downloaded video fragment.
 309                        * fragment_count: The number of fragments (= individual
 310                                          files that will be merged)
 311
 312                        Progress hooks are guaranteed to be called at least once
 313                        (with status "finished") if the download is successful.
 314     merge_output_format: Extension to use when merging formats.
 315     final_ext:         Expected final extension; used to detect when the file was
 316                        already downloaded and converted. "merge_output_format" is
 317                        replaced by this extension when given
 318     fixup:             Automatically correct known faults of the file.
 319                        One of:
 320                        - "never": do nothing
 321                        - "warn": only emit a warning
 322                        - "detect_or_warn": check whether we can do anything
 323                                            about it, warn otherwise (default)
 324     source_address:    Client-side IP address to bind to.
 325     call_home:         Boolean, true iff we are allowed to contact the
 326                        yt-dlp servers for debugging. (BROKEN)
 327     sleep_interval_requests: Number of seconds to sleep between requests
 328                        during extraction
 329     sleep_interval:    Number of seconds to sleep before each download when
 330                        used alone or a lower bound of a range for randomized
 331                        sleep before each download (minimum possible number
 332                        of seconds to sleep) when used along with
 333                        max_sleep_interval.
 334     max_sleep_interval:Upper bound of a range for randomized sleep before each
 335                        download (maximum possible number of seconds to sleep).
 336                        Must only be used along with sleep_interval.
 337                        Actual sleep time will be a random float from range
 338                        [sleep_interval; max_sleep_interval].
 339     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 340     listformats:       Print an overview of available video formats and exit.
 341     list_thumbnails:   Print a table of all thumbnails and exit.
 342     match_filter:      A function that gets called with the info_dict of
 343                        every video.
 344                        If it returns a message, the video is ignored.
 345                        If it returns None, the video is downloaded.
 346                        match_filter_func in utils.py is one example for this.
 347     no_color:          Do not emit color codes in output.
 348     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 349                        HTTP header
 350     geo_bypass_country:
 351                        Two-letter ISO 3166-2 country code that will be used for
 352                        explicit geographic restriction bypassing via faking
 353                        X-Forwarded-For HTTP header
 354     geo_bypass_ip_block:
 355                        IP range in CIDR notation that will be used similarly to
 356                        geo_bypass_country
 357
 358     The following options determine which downloader is picked:
 359     external_downloader: Executable of the external downloader to call.
 360                        None or unset for standard (built-in) downloader.
 361     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 362                        if True, otherwise use ffmpeg/avconv if False, otherwise
 363                        use downloader suggested by extractor if None.
 364
 365     The following parameters are not used by YoutubeDL itself, they are used by
 366     the downloader (see yt_dlp/downloader/common.py):
 367     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 368     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 369     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 370     http_chunk_size.
 371
 372     The following options are used by the post processors:
 373     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 374                        otherwise prefer ffmpeg. (avconv support is deprecated)
 375     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 376                        to the binary or its containing directory.
 377     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 378                         and a list of additional command-line arguments for the
 379                         postprocessor/executable. The dict can also have "PP+EXE" keys
 380                         which are used when the given exe is used by the given PP.
 381                         Use 'default' as the name for arguments to passed to all PP
 382
 383     The following options are used by the extractors:
 384     extractor_retries: Number of times to retry for known errors
 385     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 386     hls_split_discontinuity: Split HLS playlists to different formats at
 387                        discontinuities such as ad breaks (default: False)
 388     youtube_include_dash_manifest: If True (default), DASH manifests and related
 389                        data will be downloaded and processed by extractor.
 390                        You can reduce network I/O by disabling it if you don't
 391                        care about DASH. (only for youtube)
 392     youtube_include_hls_manifest: If True (default), HLS manifests and related
 393                        data will be downloaded and processed by extractor.
 394                        You can reduce network I/O by disabling it if you don't
 395                        care about HLS. (only for youtube)
 396     """
 397
 398     _NUMERIC_FIELDS = set((
 399         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 400         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 401         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 402         'average_rating', 'comment_count', 'age_limit',
 403         'start_time', 'end_time',
 404         'chapter_number', 'season_number', 'episode_number',
 405         'track_number', 'disc_number', 'release_year',
 406         'playlist_index',
 407     ))
 408
 409     params = None
 410     _ies = []
 411     _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 412     __prepare_filename_warned = False
 413     _first_webpage_request = True
 414     _download_retcode = None
 415     _num_downloads = None
 416     _playlist_level = 0
 417     _playlist_urls = set()
 418     _screen_file = None
 419
 420     def __init__(self, params=None, auto_init=True):
 421         """Create a FileDownloader object with the given options."""
 422         if params is None:
 423             params = {}
 424         self._ies = []
 425         self._ies_instances = {}
 426         self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 427         self.__prepare_filename_warned = False
 428         self._first_webpage_request = True
 429         self._post_hooks = []
 430         self._progress_hooks = []
 431         self._download_retcode = 0
 432         self._num_downloads = 0
 433         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 434         self._err_file = sys.stderr
 435         self.params = {
 436             # Default parameters
 437             'nocheckcertificate': False,
 438         }
 439         self.params.update(params)
 440         self.cache = Cache(self)
 441         self.archive = set()
 442
 443         """Preload the archive, if any is specified"""
 444         def preload_download_archive(self):
 445             fn = self.params.get('download_archive')
 446             if fn is None:
 447                 return False
 448             try:
 449                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 450                     for line in archive_file:
 451                         self.archive.add(line.strip())
 452             except IOError as ioe:
 453                 if ioe.errno != errno.ENOENT:
 454                     raise
 455                 return False
 456             return True
 457
 458         def check_deprecated(param, option, suggestion):
 459             if self.params.get(param) is not None:
 460                 self.report_warning(
 461                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 462                 return True
 463             return False
 464
 465         if self.params.get('verbose'):
 466             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 467
 468         preload_download_archive(self)
 469
 470         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 471             if self.params.get('geo_verification_proxy') is None:
 472                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 473
 474         if self.params.get('final_ext'):
 475             if self.params.get('merge_output_format'):
 476                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 477             self.params['merge_output_format'] = self.params['final_ext']
 478
 479         if 'overwrites' in self.params and self.params['overwrites'] is None:
 480             del self.params['overwrites']
 481
 482         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 483         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 484         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 485
 486         if params.get('bidi_workaround', False):
 487             try:
 488                 import pty
 489                 master, slave = pty.openpty()
 490                 width = compat_get_terminal_size().columns
 491                 if width is None:
 492                     width_args = []
 493                 else:
 494                     width_args = ['-w', str(width)]
 495                 sp_kwargs = dict(
 496                     stdin=subprocess.PIPE,
 497                     stdout=slave,
 498                     stderr=self._err_file)
 499                 try:
 500                     self._output_process = subprocess.Popen(
 501                         ['bidiv'] + width_args, **sp_kwargs
 502                     )
 503                 except OSError:
 504                     self._output_process = subprocess.Popen(
 505                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 506                 self._output_channel = os.fdopen(master, 'rb')
 507             except OSError as ose:
 508                 if ose.errno == errno.ENOENT:
 509                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 510                 else:
 511                     raise
 512
 513         if (sys.platform != 'win32'
 514                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 515                 and not params.get('restrictfilenames', False)):
 516             # Unicode filesystem API will throw errors (#1474, #13027)
 517             self.report_warning(
 518                 'Assuming --restrict-filenames since file system encoding '
 519                 'cannot encode all characters. '
 520                 'Set the LC_ALL environment variable to fix this.')
 521             self.params['restrictfilenames'] = True
 522
 523         self.outtmpl_dict = self.parse_outtmpl()
 524
 525         self._setup_opener()
 526
 527         if auto_init:
 528             self.print_debug_header()
 529             self.add_default_info_extractors()
 530
 531         for pp_def_raw in self.params.get('postprocessors', []):
 532             pp_class = get_postprocessor(pp_def_raw['key'])
 533             pp_def = dict(pp_def_raw)
 534             del pp_def['key']
 535             if 'when' in pp_def:
 536                 when = pp_def['when']
 537                 del pp_def['when']
 538             else:
 539                 when = 'normal'
 540             pp = pp_class(self, **compat_kwargs(pp_def))
 541             self.add_post_processor(pp, when=when)
 542
 543         for ph in self.params.get('post_hooks', []):
 544             self.add_post_hook(ph)
 545
 546         for ph in self.params.get('progress_hooks', []):
 547             self.add_progress_hook(ph)
 548
 549         register_socks_protocols()
 550
 551     def warn_if_short_id(self, argv):
 552         # short YouTube ID starting with dash?
 553         idxs = [
 554             i for i, a in enumerate(argv)
 555             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 556         if idxs:
 557             correct_argv = (
 558                 ['yt-dlp']
 559                 + [a for i, a in enumerate(argv) if i not in idxs]
 560                 + ['--'] + [argv[i] for i in idxs]
 561             )
 562             self.report_warning(
 563                 'Long argument string detected. '
 564                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 565                 args_to_str(correct_argv))
 566
 567     def add_info_extractor(self, ie):
 568         """Add an InfoExtractor object to the end of the list."""
 569         self._ies.append(ie)
 570         if not isinstance(ie, type):
 571             self._ies_instances[ie.ie_key()] = ie
 572             ie.set_downloader(self)
 573
 574     def get_info_extractor(self, ie_key):
 575         """
 576         Get an instance of an IE with name ie_key, it will try to get one from
 577         the _ies list, if there's no instance it will create a new one and add
 578         it to the extractor list.
 579         """
 580         ie = self._ies_instances.get(ie_key)
 581         if ie is None:
 582             ie = get_info_extractor(ie_key)()
 583             self.add_info_extractor(ie)
 584         return ie
 585
 586     def add_default_info_extractors(self):
 587         """
 588         Add the InfoExtractors returned by gen_extractors to the end of the list
 589         """
 590         for ie in gen_extractor_classes():
 591             self.add_info_extractor(ie)
 592
 593     def add_post_processor(self, pp, when='normal'):
 594         """Add a PostProcessor object to the end of the chain."""
 595         self._pps[when].append(pp)
 596         pp.set_downloader(self)
 597
 598     def add_post_hook(self, ph):
 599         """Add the post hook"""
 600         self._post_hooks.append(ph)
 601
 602     def add_progress_hook(self, ph):
 603         """Add the progress hook (currently only for the file downloader)"""
 604         self._progress_hooks.append(ph)
 605
 606     def _bidi_workaround(self, message):
 607         if not hasattr(self, '_output_channel'):
 608             return message
 609
 610         assert hasattr(self, '_output_process')
 611         assert isinstance(message, compat_str)
 612         line_count = message.count('\n') + 1
 613         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 614         self._output_process.stdin.flush()
 615         res = ''.join(self._output_channel.readline().decode('utf-8')
 616                       for _ in range(line_count))
 617         return res[:-len('\n')]
 618
 619     def to_screen(self, message, skip_eol=False):
 620         """Print message to stdout if not in quiet mode."""
 621         return self.to_stdout(message, skip_eol, check_quiet=True)
 622
 623     def _write_string(self, s, out=None):
 624         write_string(s, out=out, encoding=self.params.get('encoding'))
 625
 626     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 627         """Print message to stdout if not in quiet mode."""
 628         if self.params.get('logger'):
 629             self.params['logger'].debug(message)
 630         elif not check_quiet or not self.params.get('quiet', False):
 631             message = self._bidi_workaround(message)
 632             terminator = ['\n', ''][skip_eol]
 633             output = message + terminator
 634
 635             self._write_string(output, self._screen_file)
 636
 637     def to_stderr(self, message):
 638         """Print message to stderr."""
 639         assert isinstance(message, compat_str)
 640         if self.params.get('logger'):
 641             self.params['logger'].error(message)
 642         else:
 643             message = self._bidi_workaround(message)
 644             output = message + '\n'
 645             self._write_string(output, self._err_file)
 646
 647     def to_console_title(self, message):
 648         if not self.params.get('consoletitle', False):
 649             return
 650         if compat_os_name == 'nt':
 651             if ctypes.windll.kernel32.GetConsoleWindow():
 652                 # c_wchar_p() might not be necessary if `message` is
 653                 # already of type unicode()
 654                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 655         elif 'TERM' in os.environ:
 656             self._write_string('\033]0;%s\007' % message, self._screen_file)
 657
 658     def save_console_title(self):
 659         if not self.params.get('consoletitle', False):
 660             return
 661         if self.params.get('simulate', False):
 662             return
 663         if compat_os_name != 'nt' and 'TERM' in os.environ:
 664             # Save the title on stack
 665             self._write_string('\033[22;0t', self._screen_file)
 666
 667     def restore_console_title(self):
 668         if not self.params.get('consoletitle', False):
 669             return
 670         if self.params.get('simulate', False):
 671             return
 672         if compat_os_name != 'nt' and 'TERM' in os.environ:
 673             # Restore the title from stack
 674             self._write_string('\033[23;0t', self._screen_file)
 675
 676     def __enter__(self):
 677         self.save_console_title()
 678         return self
 679
 680     def __exit__(self, *args):
 681         self.restore_console_title()
 682
 683         if self.params.get('cookiefile') is not None:
 684             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 685
 686     def trouble(self, message=None, tb=None):
 687         """Determine action to take when a download problem appears.
 688
 689         Depending on if the downloader has been configured to ignore
 690         download errors or not, this method may throw an exception or
 691         not when errors are found, after printing the message.
 692
 693         tb, if given, is additional traceback information.
 694         """
 695         if message is not None:
 696             self.to_stderr(message)
 697         if self.params.get('verbose'):
 698             if tb is None:
 699                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 700                     tb = ''
 701                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 702                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 703                     tb += encode_compat_str(traceback.format_exc())
 704                 else:
 705                     tb_data = traceback.format_list(traceback.extract_stack())
 706                     tb = ''.join(tb_data)
 707             self.to_stderr(tb)
 708         if not self.params.get('ignoreerrors', False):
 709             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 710                 exc_info = sys.exc_info()[1].exc_info
 711             else:
 712                 exc_info = sys.exc_info()
 713             raise DownloadError(message, exc_info)
 714         self._download_retcode = 1
 715
 716     def report_warning(self, message):
 717         '''
 718         Print the message to stderr, it will be prefixed with 'WARNING:'
 719         If stderr is a tty file the 'WARNING:' will be colored
 720         '''
 721         if self.params.get('logger') is not None:
 722             self.params['logger'].warning(message)
 723         else:
 724             if self.params.get('no_warnings'):
 725                 return
 726             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 727                 _msg_header = '\033[0;33mWARNING:\033[0m'
 728             else:
 729                 _msg_header = 'WARNING:'
 730             warning_message = '%s %s' % (_msg_header, message)
 731             self.to_stderr(warning_message)
 732
 733     def report_error(self, message, tb=None):
 734         '''
 735         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 736         in red if stderr is a tty file.
 737         '''
 738         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 739             _msg_header = '\033[0;31mERROR:\033[0m'
 740         else:
 741             _msg_header = 'ERROR:'
 742         error_message = '%s %s' % (_msg_header, message)
 743         self.trouble(error_message, tb)
 744
 745     def report_file_already_downloaded(self, file_name):
 746         """Report file has already been fully downloaded."""
 747         try:
 748             self.to_screen('[download] %s has already been downloaded' % file_name)
 749         except UnicodeEncodeError:
 750             self.to_screen('[download] The file has already been downloaded')
 751
 752     def report_file_delete(self, file_name):
 753         """Report that existing file will be deleted."""
 754         try:
 755             self.to_screen('Deleting existing file %s' % file_name)
 756         except UnicodeEncodeError:
 757             self.to_screen('Deleting existing file')
 758
 759     def parse_outtmpl(self):
 760         outtmpl_dict = self.params.get('outtmpl', {})
 761         if not isinstance(outtmpl_dict, dict):
 762             outtmpl_dict = {'default': outtmpl_dict}
 763         outtmpl_dict.update({
 764             k: v for k, v in DEFAULT_OUTTMPL.items()
 765             if not outtmpl_dict.get(k)})
 766         for key, val in outtmpl_dict.items():
 767             if isinstance(val, bytes):
 768                 self.report_warning(
 769                     'Parameter outtmpl is bytes, but should be a unicode string. '
 770                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 771         return outtmpl_dict
 772
 773     def _prepare_filename(self, info_dict, tmpl_type='default'):
 774         try:
 775             template_dict = dict(info_dict)
 776
 777             template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 778                 formatSeconds(info_dict['duration'], '-')
 779                 if info_dict.get('duration', None) is not None
 780                 else None)
 781
 782             template_dict['epoch'] = int(time.time())
 783             autonumber_size = self.params.get('autonumber_size')
 784             if autonumber_size is None:
 785                 autonumber_size = 5
 786             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 787             if template_dict.get('resolution') is None:
 788                 if template_dict.get('width') and template_dict.get('height'):
 789                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 790                 elif template_dict.get('height'):
 791                     template_dict['resolution'] = '%sp' % template_dict['height']
 792                 elif template_dict.get('width'):
 793                     template_dict['resolution'] = '%dx?' % template_dict['width']
 794
 795             sanitize = lambda k, v: sanitize_filename(
 796                 compat_str(v),
 797                 restricted=self.params.get('restrictfilenames'),
 798                 is_id=(k == 'id' or k.endswith('_id')))
 799             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 800                                  for k, v in template_dict.items()
 801                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 802             na = self.params.get('outtmpl_na_placeholder', 'NA')
 803             template_dict = collections.defaultdict(lambda: na, template_dict)
 804
 805             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 806             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 807
 808             # For fields playlist_index and autonumber convert all occurrences
 809             # of %(field)s to %(field)0Nd for backward compatibility
 810             field_size_compat_map = {
 811                 'playlist_index': len(str(template_dict['n_entries'])),
 812                 'autonumber': autonumber_size,
 813             }
 814             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 815             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 816             if mobj:
 817                 outtmpl = re.sub(
 818                     FIELD_SIZE_COMPAT_RE,
 819                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 820                     outtmpl)
 821
 822             # As of [1] format syntax is:
 823             #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 824             # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 825             FORMAT_RE = r'''(?x)
 826                 (?<!%)
 827                 %
 828                 \({0}\)  # mapping key
 829                 (?:[#0\-+ ]+)?  # conversion flags (optional)
 830                 (?:\d+)?  # minimum field width (optional)
 831                 (?:\.\d+)?  # precision (optional)
 832                 [hlL]?  # length modifier (optional)
 833                 (?P<type>[diouxXeEfFgGcrs%])  # conversion type
 834             '''
 835
 836             numeric_fields = list(self._NUMERIC_FIELDS)
 837
 838             # Format date
 839             FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
 840             for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
 841                 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
 842                 if key in template_dict:
 843                     continue
 844                 value = strftime_or_none(template_dict.get(field), frmt, na)
 845                 if conv_type in 'crs':  # string
 846                     value = sanitize(field, value)
 847                 else:  # number
 848                     numeric_fields.append(key)
 849                     value = float_or_none(value, default=None)
 850                 if value is not None:
 851                     template_dict[key] = value
 852
 853             # Missing numeric fields used together with integer presentation types
 854             # in format specification will break the argument substitution since
 855             # string NA placeholder is returned for missing fields. We will patch
 856             # output template for missing fields to meet string presentation type.
 857             for numeric_field in numeric_fields:
 858                 if numeric_field not in template_dict:
 859                     outtmpl = re.sub(
 860                         FORMAT_RE.format(re.escape(numeric_field)),
 861                         r'%({0})s'.format(numeric_field), outtmpl)
 862
 863             # expand_path translates '%%' into '%' and '$$' into '$'
 864             # correspondingly that is not what we want since we need to keep
 865             # '%%' intact for template dict substitution step. Working around
 866             # with boundary-alike separator hack.
 867             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 868             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 869
 870             # outtmpl should be expand_path'ed before template dict substitution
 871             # because meta fields may contain env variables we don't want to
 872             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 873             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 874             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 875
 876             if force_ext is not None:
 877                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 878
 879             # https://github.com/blackjack4494/youtube-dlc/issues/85
 880             trim_file_name = self.params.get('trim_file_name', False)
 881             if trim_file_name:
 882                 fn_groups = filename.rsplit('.')
 883                 ext = fn_groups[-1]
 884                 sub_ext = ''
 885                 if len(fn_groups) > 2:
 886                     sub_ext = fn_groups[-2]
 887                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 888
 889             return filename
 890         except ValueError as err:
 891             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 892             return None
 893
 894     def prepare_filename(self, info_dict, dir_type='', warn=False):
 895         """Generate the output filename."""
 896         paths = self.params.get('paths', {})
 897         assert isinstance(paths, dict)
 898         filename = self._prepare_filename(info_dict, dir_type or 'default')
 899
 900         if warn and not self.__prepare_filename_warned:
 901             if not paths:
 902                 pass
 903             elif filename == '-':
 904                 self.report_warning('--paths is ignored when an outputting to stdout')
 905             elif os.path.isabs(filename):
 906                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 907             self.__prepare_filename_warned = True
 908         if filename == '-' or not filename:
 909             return filename
 910
 911         homepath = expand_path(paths.get('home', '').strip())
 912         assert isinstance(homepath, compat_str)
 913         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 914         assert isinstance(subdir, compat_str)
 915         path = os.path.join(homepath, subdir, filename)
 916
 917         # Temporary fix for #4787
 918         # 'Treat' all problem characters by passing filename through preferredencoding
 919         # to workaround encoding issues with subprocess on python2 @ Windows
 920         if sys.version_info < (3, 0) and sys.platform == 'win32':
 921             path = encodeFilename(path, True).decode(preferredencoding())
 922         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 923
 924     def _match_entry(self, info_dict, incomplete):
 925         """ Returns None if the file should be downloaded """
 926
 927         def check_filter():
 928             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 929             if 'title' in info_dict:
 930                 # This can happen when we're just evaluating the playlist
 931                 title = info_dict['title']
 932                 matchtitle = self.params.get('matchtitle', False)
 933                 if matchtitle:
 934                     if not re.search(matchtitle, title, re.IGNORECASE):
 935                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 936                 rejecttitle = self.params.get('rejecttitle', False)
 937                 if rejecttitle:
 938                     if re.search(rejecttitle, title, re.IGNORECASE):
 939                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 940             date = info_dict.get('upload_date')
 941             if date is not None:
 942                 dateRange = self.params.get('daterange', DateRange())
 943                 if date not in dateRange:
 944                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 945             view_count = info_dict.get('view_count')
 946             if view_count is not None:
 947                 min_views = self.params.get('min_views')
 948                 if min_views is not None and view_count < min_views:
 949                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 950                 max_views = self.params.get('max_views')
 951                 if max_views is not None and view_count > max_views:
 952                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 953             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 954                 return 'Skipping "%s" because it is age restricted' % video_title
 955             if self.in_download_archive(info_dict):
 956                 return '%s has already been recorded in archive' % video_title
 957
 958             if not incomplete:
 959                 match_filter = self.params.get('match_filter')
 960                 if match_filter is not None:
 961                     ret = match_filter(info_dict)
 962                     if ret is not None:
 963                         return ret
 964             return None
 965
 966         reason = check_filter()
 967         if reason is not None:
 968             self.to_screen('[download] ' + reason)
 969             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 970                 raise ExistingVideoReached()
 971             elif self.params.get('break_on_reject', False):
 972                 raise RejectedVideoReached()
 973         return reason
 974
 975     @staticmethod
 976     def add_extra_info(info_dict, extra_info):
 977         '''Set the keys from extra_info in info dict if they are missing'''
 978         for key, value in extra_info.items():
 979             info_dict.setdefault(key, value)
 980
 981     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 982                      process=True, force_generic_extractor=False):
 983         '''
 984         Returns a list with a dictionary for each video we find.
 985         If 'download', also downloads the videos.
 986         extra_info is a dict containing the extra values to add to each result
 987         '''
 988
 989         if not ie_key and force_generic_extractor:
 990             ie_key = 'Generic'
 991
 992         if ie_key:
 993             ies = [self.get_info_extractor(ie_key)]
 994         else:
 995             ies = self._ies
 996
 997         for ie in ies:
 998             if not ie.suitable(url):
 999                 continue
1000
1001             ie_key = ie.ie_key()
1002             ie = self.get_info_extractor(ie_key)
1003             if not ie.working():
1004                 self.report_warning('The program functionality for this site has been marked as broken, '
1005                                     'and will probably not work.')
1006
1007             try:
1008                 temp_id = str_or_none(
1009                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1010                     else ie._match_id(url))
1011             except (AssertionError, IndexError, AttributeError):
1012                 temp_id = None
1013             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1014                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1015                                ie_key, temp_id))
1016                 break
1017             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
1018         else:
1019             self.report_error('no suitable InfoExtractor for URL %s' % url)
1020
1021     def __handle_extraction_exceptions(func):
1022         def wrapper(self, *args, **kwargs):
1023             try:
1024                 return func(self, *args, **kwargs)
1025             except GeoRestrictedError as e:
1026                 msg = e.msg
1027                 if e.countries:
1028                     msg += '\nThis video is available in %s.' % ', '.join(
1029                         map(ISO3166Utils.short2full, e.countries))
1030                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1031                 self.report_error(msg)
1032             except ExtractorError as e:  # An error we somewhat expected
1033                 self.report_error(compat_str(e), e.format_traceback())
1034             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1035                 raise
1036             except Exception as e:
1037                 if self.params.get('ignoreerrors', False):
1038                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1039                 else:
1040                     raise
1041         return wrapper
1042
1043     @__handle_extraction_exceptions
1044     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1045         ie_result = ie.extract(url)
1046         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1047             return
1048         if isinstance(ie_result, list):
1049             # Backwards compatibility: old IE result format
1050             ie_result = {
1051                 '_type': 'compat_list',
1052                 'entries': ie_result,
1053             }
1054         if info_dict:
1055             if info_dict.get('id'):
1056                 ie_result['id'] = info_dict['id']
1057             if info_dict.get('title'):
1058                 ie_result['title'] = info_dict['title']
1059         self.add_default_extra_info(ie_result, ie, url)
1060         if process:
1061             return self.process_ie_result(ie_result, download, extra_info)
1062         else:
1063             return ie_result
1064
1065     def add_default_extra_info(self, ie_result, ie, url):
1066         self.add_extra_info(ie_result, {
1067             'extractor': ie.IE_NAME,
1068             'webpage_url': url,
1069             'webpage_url_basename': url_basename(url),
1070             'extractor_key': ie.ie_key(),
1071         })
1072
1073     def process_ie_result(self, ie_result, download=True, extra_info={}):
1074         """
1075         Take the result of the ie(may be modified) and resolve all unresolved
1076         references (URLs, playlist items).
1077
1078         It will also download the videos if 'download'.
1079         Returns the resolved ie_result.
1080         """
1081         result_type = ie_result.get('_type', 'video')
1082
1083         if result_type in ('url', 'url_transparent'):
1084             ie_result['url'] = sanitize_url(ie_result['url'])
1085             extract_flat = self.params.get('extract_flat', False)
1086             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1087                     or extract_flat is True):
1088                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1089                 return ie_result
1090
1091         if result_type == 'video':
1092             self.add_extra_info(ie_result, extra_info)
1093             return self.process_video_result(ie_result, download=download)
1094         elif result_type == 'url':
1095             # We have to add extra_info to the results because it may be
1096             # contained in a playlist
1097             return self.extract_info(ie_result['url'],
1098                                      download, info_dict=ie_result,
1099                                      ie_key=ie_result.get('ie_key'),
1100                                      extra_info=extra_info)
1101         elif result_type == 'url_transparent':
1102             # Use the information from the embedding page
1103             info = self.extract_info(
1104                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1105                 extra_info=extra_info, download=False, process=False)
1106
1107             # extract_info may return None when ignoreerrors is enabled and
1108             # extraction failed with an error, don't crash and return early
1109             # in this case
1110             if not info:
1111                 return info
1112
1113             force_properties = dict(
1114                 (k, v) for k, v in ie_result.items() if v is not None)
1115             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1116                 if f in force_properties:
1117                     del force_properties[f]
1118             new_result = info.copy()
1119             new_result.update(force_properties)
1120
1121             # Extracted info may not be a video result (i.e.
1122             # info.get('_type', 'video') != video) but rather an url or
1123             # url_transparent. In such cases outer metadata (from ie_result)
1124             # should be propagated to inner one (info). For this to happen
1125             # _type of info should be overridden with url_transparent. This
1126             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1127             if new_result.get('_type') == 'url':
1128                 new_result['_type'] = 'url_transparent'
1129
1130             return self.process_ie_result(
1131                 new_result, download=download, extra_info=extra_info)
1132         elif result_type in ('playlist', 'multi_video'):
1133             # Protect from infinite recursion due to recursively nested playlists
1134             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1135             webpage_url = ie_result['webpage_url']
1136             if webpage_url in self._playlist_urls:
1137                 self.to_screen(
1138                     '[download] Skipping already downloaded playlist: %s'
1139                     % ie_result.get('title') or ie_result.get('id'))
1140                 return
1141
1142             self._playlist_level += 1
1143             self._playlist_urls.add(webpage_url)
1144             try:
1145                 return self.__process_playlist(ie_result, download)
1146             finally:
1147                 self._playlist_level -= 1
1148                 if not self._playlist_level:
1149                     self._playlist_urls.clear()
1150         elif result_type == 'compat_list':
1151             self.report_warning(
1152                 'Extractor %s returned a compat_list result. '
1153                 'It needs to be updated.' % ie_result.get('extractor'))
1154
1155             def _fixup(r):
1156                 self.add_extra_info(
1157                     r,
1158                     {
1159                         'extractor': ie_result['extractor'],
1160                         'webpage_url': ie_result['webpage_url'],
1161                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1162                         'extractor_key': ie_result['extractor_key'],
1163                     }
1164                 )
1165                 return r
1166             ie_result['entries'] = [
1167                 self.process_ie_result(_fixup(r), download, extra_info)
1168                 for r in ie_result['entries']
1169             ]
1170             return ie_result
1171         else:
1172             raise Exception('Invalid result type: %s' % result_type)
1173
1174     def __process_playlist(self, ie_result, download):
1175         # We process each entry in the playlist
1176         playlist = ie_result.get('title') or ie_result.get('id')
1177         self.to_screen('[download] Downloading playlist: %s' % playlist)
1178
1179         if self.params.get('allow_playlist_files', True):
1180             ie_copy = {
1181                 'playlist': playlist,
1182                 'playlist_id': ie_result.get('id'),
1183                 'playlist_title': ie_result.get('title'),
1184                 'playlist_uploader': ie_result.get('uploader'),
1185                 'playlist_uploader_id': ie_result.get('uploader_id'),
1186                 'playlist_index': 0
1187             }
1188             ie_copy.update(dict(ie_result))
1189
1190             def ensure_dir_exists(path):
1191                 return make_dir(path, self.report_error)
1192
1193             if self.params.get('writeinfojson', False):
1194                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1195                 if not ensure_dir_exists(encodeFilename(infofn)):
1196                     return
1197                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1198                     self.to_screen('[info] Playlist metadata is already present')
1199                 else:
1200                     playlist_info = dict(ie_result)
1201                     # playlist_info['entries'] = list(playlist_info['entries'])  # Entries is a generator which shouldnot be resolved here
1202                     del playlist_info['entries']
1203                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1204                     try:
1205                         write_json_file(self.filter_requested_info(playlist_info), infofn)
1206                     except (OSError, IOError):
1207                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1208
1209             if self.params.get('writedescription', False):
1210                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1211                 if not ensure_dir_exists(encodeFilename(descfn)):
1212                     return
1213                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1214                     self.to_screen('[info] Playlist description is already present')
1215                 elif ie_result.get('description') is None:
1216                     self.report_warning('There\'s no playlist description to write.')
1217                 else:
1218                     try:
1219                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1220                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1221                             descfile.write(ie_result['description'])
1222                     except (OSError, IOError):
1223                         self.report_error('Cannot write playlist description file ' + descfn)
1224                         return
1225
1226         playlist_results = []
1227
1228         playliststart = self.params.get('playliststart', 1) - 1
1229         playlistend = self.params.get('playlistend')
1230         # For backwards compatibility, interpret -1 as whole list
1231         if playlistend == -1:
1232             playlistend = None
1233
1234         playlistitems_str = self.params.get('playlist_items')
1235         playlistitems = None
1236         if playlistitems_str is not None:
1237             def iter_playlistitems(format):
1238                 for string_segment in format.split(','):
1239                     if '-' in string_segment:
1240                         start, end = string_segment.split('-')
1241                         for item in range(int(start), int(end) + 1):
1242                             yield int(item)
1243                     else:
1244                         yield int(string_segment)
1245             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1246
1247         ie_entries = ie_result['entries']
1248
1249         def make_playlistitems_entries(list_ie_entries):
1250             num_entries = len(list_ie_entries)
1251             return [
1252                 list_ie_entries[i - 1] for i in playlistitems
1253                 if -num_entries <= i - 1 < num_entries]
1254
1255         def report_download(num_entries):
1256             self.to_screen(
1257                 '[%s] playlist %s: Downloading %d videos' %
1258                 (ie_result['extractor'], playlist, num_entries))
1259
1260         if isinstance(ie_entries, list):
1261             n_all_entries = len(ie_entries)
1262             if playlistitems:
1263                 entries = make_playlistitems_entries(ie_entries)
1264             else:
1265                 entries = ie_entries[playliststart:playlistend]
1266             n_entries = len(entries)
1267             self.to_screen(
1268                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1269                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1270         elif isinstance(ie_entries, PagedList):
1271             if playlistitems:
1272                 entries = []
1273                 for item in playlistitems:
1274                     entries.extend(ie_entries.getslice(
1275                         item - 1, item
1276                     ))
1277             else:
1278                 entries = ie_entries.getslice(
1279                     playliststart, playlistend)
1280             n_entries = len(entries)
1281             report_download(n_entries)
1282         else:  # iterable
1283             if playlistitems:
1284                 entries = make_playlistitems_entries(list(itertools.islice(
1285                     ie_entries, 0, max(playlistitems))))
1286             else:
1287                 entries = list(itertools.islice(
1288                     ie_entries, playliststart, playlistend))
1289             n_entries = len(entries)
1290             report_download(n_entries)
1291
1292         if self.params.get('playlistreverse', False):
1293             entries = entries[::-1]
1294
1295         if self.params.get('playlistrandom', False):
1296             random.shuffle(entries)
1297
1298         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1299
1300         for i, entry in enumerate(entries, 1):
1301             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1302             # This __x_forwarded_for_ip thing is a bit ugly but requires
1303             # minimal changes
1304             if x_forwarded_for:
1305                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1306             extra = {
1307                 'n_entries': n_entries,
1308                 'playlist': playlist,
1309                 'playlist_id': ie_result.get('id'),
1310                 'playlist_title': ie_result.get('title'),
1311                 'playlist_uploader': ie_result.get('uploader'),
1312                 'playlist_uploader_id': ie_result.get('uploader_id'),
1313                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1314                 'extractor': ie_result['extractor'],
1315                 'webpage_url': ie_result['webpage_url'],
1316                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1317                 'extractor_key': ie_result['extractor_key'],
1318             }
1319
1320             if self._match_entry(entry, incomplete=True) is not None:
1321                 continue
1322
1323             entry_result = self.__process_iterable_entry(entry, download, extra)
1324             # TODO: skip failed (empty) entries?
1325             playlist_results.append(entry_result)
1326         ie_result['entries'] = playlist_results
1327         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1328         return ie_result
1329
1330     @__handle_extraction_exceptions
1331     def __process_iterable_entry(self, entry, download, extra_info):
1332         return self.process_ie_result(
1333             entry, download=download, extra_info=extra_info)
1334
1335     def _build_format_filter(self, filter_spec):
1336         " Returns a function to filter the formats according to the filter_spec "
1337
1338         OPERATORS = {
1339             '<': operator.lt,
1340             '<=': operator.le,
1341             '>': operator.gt,
1342             '>=': operator.ge,
1343             '=': operator.eq,
1344             '!=': operator.ne,
1345         }
1346         operator_rex = re.compile(r'''(?x)\s*
1347             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1348             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1349             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1350             $
1351             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1352         m = operator_rex.search(filter_spec)
1353         if m:
1354             try:
1355                 comparison_value = int(m.group('value'))
1356             except ValueError:
1357                 comparison_value = parse_filesize(m.group('value'))
1358                 if comparison_value is None:
1359                     comparison_value = parse_filesize(m.group('value') + 'B')
1360                 if comparison_value is None:
1361                     raise ValueError(
1362                         'Invalid value %r in format specification %r' % (
1363                             m.group('value'), filter_spec))
1364             op = OPERATORS[m.group('op')]
1365
1366         if not m:
1367             STR_OPERATORS = {
1368                 '=': operator.eq,
1369                 '^=': lambda attr, value: attr.startswith(value),
1370                 '$=': lambda attr, value: attr.endswith(value),
1371                 '*=': lambda attr, value: value in attr,
1372             }
1373             str_operator_rex = re.compile(r'''(?x)
1374                 \s*(?P<key>[a-zA-Z0-9._-]+)
1375                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1376                 \s*(?P<value>[a-zA-Z0-9._-]+)
1377                 \s*$
1378                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1379             m = str_operator_rex.search(filter_spec)
1380             if m:
1381                 comparison_value = m.group('value')
1382                 str_op = STR_OPERATORS[m.group('op')]
1383                 if m.group('negation'):
1384                     op = lambda attr, value: not str_op(attr, value)
1385                 else:
1386                     op = str_op
1387
1388         if not m:
1389             raise ValueError('Invalid filter specification %r' % filter_spec)
1390
1391         def _filter(f):
1392             actual_value = f.get(m.group('key'))
1393             if actual_value is None:
1394                 return m.group('none_inclusive')
1395             return op(actual_value, comparison_value)
1396         return _filter
1397
1398     def _default_format_spec(self, info_dict, download=True):
1399
1400         def can_merge():
1401             merger = FFmpegMergerPP(self)
1402             return merger.available and merger.can_merge()
1403
1404         prefer_best = (
1405             not self.params.get('simulate', False)
1406             and download
1407             and (
1408                 not can_merge()
1409                 or info_dict.get('is_live', False)
1410                 or self.outtmpl_dict['default'] == '-'))
1411
1412         return (
1413             'best/bestvideo+bestaudio'
1414             if prefer_best
1415             else 'bestvideo*+bestaudio/best'
1416             if not self.params.get('allow_multiple_audio_streams', False)
1417             else 'bestvideo+bestaudio/best')
1418
1419     def build_format_selector(self, format_spec):
1420         def syntax_error(note, start):
1421             message = (
1422                 'Invalid format specification: '
1423                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1424             return SyntaxError(message)
1425
1426         PICKFIRST = 'PICKFIRST'
1427         MERGE = 'MERGE'
1428         SINGLE = 'SINGLE'
1429         GROUP = 'GROUP'
1430         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1431
1432         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1433                                   'video': self.params.get('allow_multiple_video_streams', False)}
1434
1435         def _parse_filter(tokens):
1436             filter_parts = []
1437             for type, string, start, _, _ in tokens:
1438                 if type == tokenize.OP and string == ']':
1439                     return ''.join(filter_parts)
1440                 else:
1441                     filter_parts.append(string)
1442
1443         def _remove_unused_ops(tokens):
1444             # Remove operators that we don't use and join them with the surrounding strings
1445             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1446             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1447             last_string, last_start, last_end, last_line = None, None, None, None
1448             for type, string, start, end, line in tokens:
1449                 if type == tokenize.OP and string == '[':
1450                     if last_string:
1451                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1452                         last_string = None
1453                     yield type, string, start, end, line
1454                     # everything inside brackets will be handled by _parse_filter
1455                     for type, string, start, end, line in tokens:
1456                         yield type, string, start, end, line
1457                         if type == tokenize.OP and string == ']':
1458                             break
1459                 elif type == tokenize.OP and string in ALLOWED_OPS:
1460                     if last_string:
1461                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1462                         last_string = None
1463                     yield type, string, start, end, line
1464                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1465                     if not last_string:
1466                         last_string = string
1467                         last_start = start
1468                         last_end = end
1469                     else:
1470                         last_string += string
1471             if last_string:
1472                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1473
1474         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1475             selectors = []
1476             current_selector = None
1477             for type, string, start, _, _ in tokens:
1478                 # ENCODING is only defined in python 3.x
1479                 if type == getattr(tokenize, 'ENCODING', None):
1480                     continue
1481                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1482                     current_selector = FormatSelector(SINGLE, string, [])
1483                 elif type == tokenize.OP:
1484                     if string == ')':
1485                         if not inside_group:
1486                             # ')' will be handled by the parentheses group
1487                             tokens.restore_last_token()
1488                         break
1489                     elif inside_merge and string in ['/', ',']:
1490                         tokens.restore_last_token()
1491                         break
1492                     elif inside_choice and string == ',':
1493                         tokens.restore_last_token()
1494                         break
1495                     elif string == ',':
1496                         if not current_selector:
1497                             raise syntax_error('"," must follow a format selector', start)
1498                         selectors.append(current_selector)
1499                         current_selector = None
1500                     elif string == '/':
1501                         if not current_selector:
1502                             raise syntax_error('"/" must follow a format selector', start)
1503                         first_choice = current_selector
1504                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1505                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1506                     elif string == '[':
1507                         if not current_selector:
1508                             current_selector = FormatSelector(SINGLE, 'best', [])
1509                         format_filter = _parse_filter(tokens)
1510                         current_selector.filters.append(format_filter)
1511                     elif string == '(':
1512                         if current_selector:
1513                             raise syntax_error('Unexpected "("', start)
1514                         group = _parse_format_selection(tokens, inside_group=True)
1515                         current_selector = FormatSelector(GROUP, group, [])
1516                     elif string == '+':
1517                         if not current_selector:
1518                             raise syntax_error('Unexpected "+"', start)
1519                         selector_1 = current_selector
1520                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1521                         if not selector_2:
1522                             raise syntax_error('Expected a selector', start)
1523                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1524                     else:
1525                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1526                 elif type == tokenize.ENDMARKER:
1527                     break
1528             if current_selector:
1529                 selectors.append(current_selector)
1530             return selectors
1531
1532         def _build_selector_function(selector):
1533             if isinstance(selector, list):  # ,
1534                 fs = [_build_selector_function(s) for s in selector]
1535
1536                 def selector_function(ctx):
1537                     for f in fs:
1538                         for format in f(ctx):
1539                             yield format
1540                 return selector_function
1541
1542             elif selector.type == GROUP:  # ()
1543                 selector_function = _build_selector_function(selector.selector)
1544
1545             elif selector.type == PICKFIRST:  # /
1546                 fs = [_build_selector_function(s) for s in selector.selector]
1547
1548                 def selector_function(ctx):
1549                     for f in fs:
1550                         picked_formats = list(f(ctx))
1551                         if picked_formats:
1552                             return picked_formats
1553                     return []
1554
1555             elif selector.type == SINGLE:  # atom
1556                 format_spec = selector.selector if selector.selector is not None else 'best'
1557
1558                 if format_spec == 'all':
1559                     def selector_function(ctx):
1560                         formats = list(ctx['formats'])
1561                         if formats:
1562                             for f in formats:
1563                                 yield f
1564
1565                 else:
1566                     format_fallback = False
1567                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1568                     if format_spec_obj is not None:
1569                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1570                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1571                         not_format_type = 'v' if format_type == 'a' else 'a'
1572                         format_modified = format_spec_obj.group(3) is not None
1573
1574                         format_fallback = not format_type and not format_modified  # for b, w
1575                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1576                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1577                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1578                                     if format_type  # bv, ba, wv, wa
1579                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1580                                     if not format_modified  # b, w
1581                                     else None)  # b*, w*
1582                     else:
1583                         format_idx = -1
1584                         filter_f = ((lambda f: f.get('ext') == format_spec)
1585                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1586                                     else (lambda f: f.get('format_id') == format_spec))  # id
1587
1588                     def selector_function(ctx):
1589                         formats = list(ctx['formats'])
1590                         if not formats:
1591                             return
1592                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1593                         if matches:
1594                             yield matches[format_idx]
1595                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1596                             # for extractors with incomplete formats (audio only (soundcloud)
1597                             # or video only (imgur)) best/worst will fallback to
1598                             # best/worst {video,audio}-only format
1599                             yield formats[format_idx]
1600
1601             elif selector.type == MERGE:        # +
1602                 def _merge(formats_pair):
1603                     format_1, format_2 = formats_pair
1604
1605                     formats_info = []
1606                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1607                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1608
1609                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1610                         get_no_more = {"video": False, "audio": False}
1611                         for (i, fmt_info) in enumerate(formats_info):
1612                             for aud_vid in ["audio", "video"]:
1613                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1614                                     if get_no_more[aud_vid]:
1615                                         formats_info.pop(i)
1616                                     get_no_more[aud_vid] = True
1617
1618                     if len(formats_info) == 1:
1619                         return formats_info[0]
1620
1621                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1622                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1623
1624                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1625                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1626
1627                     output_ext = self.params.get('merge_output_format')
1628                     if not output_ext:
1629                         if the_only_video:
1630                             output_ext = the_only_video['ext']
1631                         elif the_only_audio and not video_fmts:
1632                             output_ext = the_only_audio['ext']
1633                         else:
1634                             output_ext = 'mkv'
1635
1636                     new_dict = {
1637                         'requested_formats': formats_info,
1638                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1639                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1640                         'ext': output_ext,
1641                     }
1642
1643                     if the_only_video:
1644                         new_dict.update({
1645                             'width': the_only_video.get('width'),
1646                             'height': the_only_video.get('height'),
1647                             'resolution': the_only_video.get('resolution'),
1648                             'fps': the_only_video.get('fps'),
1649                             'vcodec': the_only_video.get('vcodec'),
1650                             'vbr': the_only_video.get('vbr'),
1651                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1652                         })
1653
1654                     if the_only_audio:
1655                         new_dict.update({
1656                             'acodec': the_only_audio.get('acodec'),
1657                             'abr': the_only_audio.get('abr'),
1658                         })
1659
1660                     return new_dict
1661
1662                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1663
1664                 def selector_function(ctx):
1665                     for pair in itertools.product(
1666                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1667                         yield _merge(pair)
1668
1669             filters = [self._build_format_filter(f) for f in selector.filters]
1670
1671             def final_selector(ctx):
1672                 ctx_copy = copy.deepcopy(ctx)
1673                 for _filter in filters:
1674                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1675                 return selector_function(ctx_copy)
1676             return final_selector
1677
1678         stream = io.BytesIO(format_spec.encode('utf-8'))
1679         try:
1680             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1681         except tokenize.TokenError:
1682             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1683
1684         class TokenIterator(object):
1685             def __init__(self, tokens):
1686                 self.tokens = tokens
1687                 self.counter = 0
1688
1689             def __iter__(self):
1690                 return self
1691
1692             def __next__(self):
1693                 if self.counter >= len(self.tokens):
1694                     raise StopIteration()
1695                 value = self.tokens[self.counter]
1696                 self.counter += 1
1697                 return value
1698
1699             next = __next__
1700
1701             def restore_last_token(self):
1702                 self.counter -= 1
1703
1704         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1705         return _build_selector_function(parsed_selector)
1706
1707     def _calc_headers(self, info_dict):
1708         res = std_headers.copy()
1709
1710         add_headers = info_dict.get('http_headers')
1711         if add_headers:
1712             res.update(add_headers)
1713
1714         cookies = self._calc_cookies(info_dict)
1715         if cookies:
1716             res['Cookie'] = cookies
1717
1718         if 'X-Forwarded-For' not in res:
1719             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1720             if x_forwarded_for_ip:
1721                 res['X-Forwarded-For'] = x_forwarded_for_ip
1722
1723         return res
1724
1725     def _calc_cookies(self, info_dict):
1726         pr = sanitized_Request(info_dict['url'])
1727         self.cookiejar.add_cookie_header(pr)
1728         return pr.get_header('Cookie')
1729
1730     def process_video_result(self, info_dict, download=True):
1731         assert info_dict.get('_type', 'video') == 'video'
1732
1733         if 'id' not in info_dict:
1734             raise ExtractorError('Missing "id" field in extractor result')
1735         if 'title' not in info_dict:
1736             raise ExtractorError('Missing "title" field in extractor result')
1737
1738         def report_force_conversion(field, field_not, conversion):
1739             self.report_warning(
1740                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1741                 % (field, field_not, conversion))
1742
1743         def sanitize_string_field(info, string_field):
1744             field = info.get(string_field)
1745             if field is None or isinstance(field, compat_str):
1746                 return
1747             report_force_conversion(string_field, 'a string', 'string')
1748             info[string_field] = compat_str(field)
1749
1750         def sanitize_numeric_fields(info):
1751             for numeric_field in self._NUMERIC_FIELDS:
1752                 field = info.get(numeric_field)
1753                 if field is None or isinstance(field, compat_numeric_types):
1754                     continue
1755                 report_force_conversion(numeric_field, 'numeric', 'int')
1756                 info[numeric_field] = int_or_none(field)
1757
1758         sanitize_string_field(info_dict, 'id')
1759         sanitize_numeric_fields(info_dict)
1760
1761         if 'playlist' not in info_dict:
1762             # It isn't part of a playlist
1763             info_dict['playlist'] = None
1764             info_dict['playlist_index'] = None
1765
1766         thumbnails = info_dict.get('thumbnails')
1767         if thumbnails is None:
1768             thumbnail = info_dict.get('thumbnail')
1769             if thumbnail:
1770                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1771         if thumbnails:
1772             thumbnails.sort(key=lambda t: (
1773                 t.get('preference') if t.get('preference') is not None else -1,
1774                 t.get('width') if t.get('width') is not None else -1,
1775                 t.get('height') if t.get('height') is not None else -1,
1776                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1777             for i, t in enumerate(thumbnails):
1778                 t['url'] = sanitize_url(t['url'])
1779                 if t.get('width') and t.get('height'):
1780                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1781                 if t.get('id') is None:
1782                     t['id'] = '%d' % i
1783
1784         if self.params.get('list_thumbnails'):
1785             self.list_thumbnails(info_dict)
1786             return
1787
1788         thumbnail = info_dict.get('thumbnail')
1789         if thumbnail:
1790             info_dict['thumbnail'] = sanitize_url(thumbnail)
1791         elif thumbnails:
1792             info_dict['thumbnail'] = thumbnails[-1]['url']
1793
1794         if 'display_id' not in info_dict and 'id' in info_dict:
1795             info_dict['display_id'] = info_dict['id']
1796
1797         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1798             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1799             # see http://bugs.python.org/issue1646728)
1800             try:
1801                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1802                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1803             except (ValueError, OverflowError, OSError):
1804                 pass
1805
1806         # Auto generate title fields corresponding to the *_number fields when missing
1807         # in order to always have clean titles. This is very common for TV series.
1808         for field in ('chapter', 'season', 'episode'):
1809             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1810                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1811
1812         for cc_kind in ('subtitles', 'automatic_captions'):
1813             cc = info_dict.get(cc_kind)
1814             if cc:
1815                 for _, subtitle in cc.items():
1816                     for subtitle_format in subtitle:
1817                         if subtitle_format.get('url'):
1818                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1819                         if subtitle_format.get('ext') is None:
1820                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1821
1822         automatic_captions = info_dict.get('automatic_captions')
1823         subtitles = info_dict.get('subtitles')
1824
1825         if self.params.get('listsubtitles', False):
1826             if 'automatic_captions' in info_dict:
1827                 self.list_subtitles(
1828                     info_dict['id'], automatic_captions, 'automatic captions')
1829             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1830             return
1831
1832         info_dict['requested_subtitles'] = self.process_subtitles(
1833             info_dict['id'], subtitles, automatic_captions)
1834
1835         # We now pick which formats have to be downloaded
1836         if info_dict.get('formats') is None:
1837             # There's only one format available
1838             formats = [info_dict]
1839         else:
1840             formats = info_dict['formats']
1841
1842         if not formats:
1843             raise ExtractorError('No video formats found!')
1844
1845         def is_wellformed(f):
1846             url = f.get('url')
1847             if not url:
1848                 self.report_warning(
1849                     '"url" field is missing or empty - skipping format, '
1850                     'there is an error in extractor')
1851                 return False
1852             if isinstance(url, bytes):
1853                 sanitize_string_field(f, 'url')
1854             return True
1855
1856         # Filter out malformed formats for better extraction robustness
1857         formats = list(filter(is_wellformed, formats))
1858
1859         formats_dict = {}
1860
1861         # We check that all the formats have the format and format_id fields
1862         for i, format in enumerate(formats):
1863             sanitize_string_field(format, 'format_id')
1864             sanitize_numeric_fields(format)
1865             format['url'] = sanitize_url(format['url'])
1866             if not format.get('format_id'):
1867                 format['format_id'] = compat_str(i)
1868             else:
1869                 # Sanitize format_id from characters used in format selector expression
1870                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1871             format_id = format['format_id']
1872             if format_id not in formats_dict:
1873                 formats_dict[format_id] = []
1874             formats_dict[format_id].append(format)
1875
1876         # Make sure all formats have unique format_id
1877         for format_id, ambiguous_formats in formats_dict.items():
1878             if len(ambiguous_formats) > 1:
1879                 for i, format in enumerate(ambiguous_formats):
1880                     format['format_id'] = '%s-%d' % (format_id, i)
1881
1882         for i, format in enumerate(formats):
1883             if format.get('format') is None:
1884                 format['format'] = '{id} - {res}{note}'.format(
1885                     id=format['format_id'],
1886                     res=self.format_resolution(format),
1887                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1888                 )
1889             # Automatically determine file extension if missing
1890             if format.get('ext') is None:
1891                 format['ext'] = determine_ext(format['url']).lower()
1892             # Automatically determine protocol if missing (useful for format
1893             # selection purposes)
1894             if format.get('protocol') is None:
1895                 format['protocol'] = determine_protocol(format)
1896             # Add HTTP headers, so that external programs can use them from the
1897             # json output
1898             full_format_info = info_dict.copy()
1899             full_format_info.update(format)
1900             format['http_headers'] = self._calc_headers(full_format_info)
1901         # Remove private housekeeping stuff
1902         if '__x_forwarded_for_ip' in info_dict:
1903             del info_dict['__x_forwarded_for_ip']
1904
1905         # TODO Central sorting goes here
1906
1907         if formats[0] is not info_dict:
1908             # only set the 'formats' fields if the original info_dict list them
1909             # otherwise we end up with a circular reference, the first (and unique)
1910             # element in the 'formats' field in info_dict is info_dict itself,
1911             # which can't be exported to json
1912             info_dict['formats'] = formats
1913         if self.params.get('listformats'):
1914             self.list_formats(info_dict)
1915             return
1916
1917         req_format = self.params.get('format')
1918         if req_format is None:
1919             req_format = self._default_format_spec(info_dict, download=download)
1920             if self.params.get('verbose'):
1921                 self.to_screen('[debug] Default format spec: %s' % req_format)
1922
1923         format_selector = self.build_format_selector(req_format)
1924
1925         # While in format selection we may need to have an access to the original
1926         # format set in order to calculate some metrics or do some processing.
1927         # For now we need to be able to guess whether original formats provided
1928         # by extractor are incomplete or not (i.e. whether extractor provides only
1929         # video-only or audio-only formats) for proper formats selection for
1930         # extractors with such incomplete formats (see
1931         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1932         # Since formats may be filtered during format selection and may not match
1933         # the original formats the results may be incorrect. Thus original formats
1934         # or pre-calculated metrics should be passed to format selection routines
1935         # as well.
1936         # We will pass a context object containing all necessary additional data
1937         # instead of just formats.
1938         # This fixes incorrect format selection issue (see
1939         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1940         incomplete_formats = (
1941             # All formats are video-only or
1942             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1943             # all formats are audio-only
1944             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1945
1946         ctx = {
1947             'formats': formats,
1948             'incomplete_formats': incomplete_formats,
1949         }
1950
1951         formats_to_download = list(format_selector(ctx))
1952         if not formats_to_download:
1953             raise ExtractorError('requested format not available',
1954                                  expected=True)
1955
1956         if download:
1957             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1958             if len(formats_to_download) > 1:
1959                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1960             for format in formats_to_download:
1961                 new_info = dict(info_dict)
1962                 new_info.update(format)
1963                 self.process_info(new_info)
1964         # We update the info dict with the best quality format (backwards compatibility)
1965         info_dict.update(formats_to_download[-1])
1966         return info_dict
1967
1968     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1969         """Select the requested subtitles and their format"""
1970         available_subs = {}
1971         if normal_subtitles and self.params.get('writesubtitles'):
1972             available_subs.update(normal_subtitles)
1973         if automatic_captions and self.params.get('writeautomaticsub'):
1974             for lang, cap_info in automatic_captions.items():
1975                 if lang not in available_subs:
1976                     available_subs[lang] = cap_info
1977
1978         if (not self.params.get('writesubtitles') and not
1979                 self.params.get('writeautomaticsub') or not
1980                 available_subs):
1981             return None
1982
1983         if self.params.get('allsubtitles', False):
1984             requested_langs = available_subs.keys()
1985         else:
1986             if self.params.get('subtitleslangs', False):
1987                 requested_langs = self.params.get('subtitleslangs')
1988             elif 'en' in available_subs:
1989                 requested_langs = ['en']
1990             else:
1991                 requested_langs = [list(available_subs.keys())[0]]
1992
1993         formats_query = self.params.get('subtitlesformat', 'best')
1994         formats_preference = formats_query.split('/') if formats_query else []
1995         subs = {}
1996         for lang in requested_langs:
1997             formats = available_subs.get(lang)
1998             if formats is None:
1999                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2000                 continue
2001             for ext in formats_preference:
2002                 if ext == 'best':
2003                     f = formats[-1]
2004                     break
2005                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2006                 if matches:
2007                     f = matches[-1]
2008                     break
2009             else:
2010                 f = formats[-1]
2011                 self.report_warning(
2012                     'No subtitle format found matching "%s" for language %s, '
2013                     'using %s' % (formats_query, lang, f['ext']))
2014             subs[lang] = f
2015         return subs
2016
2017     def __forced_printings(self, info_dict, filename, incomplete):
2018         def print_mandatory(field):
2019             if (self.params.get('force%s' % field, False)
2020                     and (not incomplete or info_dict.get(field) is not None)):
2021                 self.to_stdout(info_dict[field])
2022
2023         def print_optional(field):
2024             if (self.params.get('force%s' % field, False)
2025                     and info_dict.get(field) is not None):
2026                 self.to_stdout(info_dict[field])
2027
2028         print_mandatory('title')
2029         print_mandatory('id')
2030         if self.params.get('forceurl', False) and not incomplete:
2031             if info_dict.get('requested_formats') is not None:
2032                 for f in info_dict['requested_formats']:
2033                     self.to_stdout(f['url'] + f.get('play_path', ''))
2034             else:
2035                 # For RTMP URLs, also include the playpath
2036                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2037         print_optional('thumbnail')
2038         print_optional('description')
2039         if self.params.get('forcefilename', False) and filename is not None:
2040             self.to_stdout(filename)
2041         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2042             self.to_stdout(formatSeconds(info_dict['duration']))
2043         print_mandatory('format')
2044         if self.params.get('forcejson', False):
2045             self.post_extract(info_dict)
2046             self.to_stdout(json.dumps(info_dict))
2047
2048     def process_info(self, info_dict):
2049         """Process a single resolved IE result."""
2050
2051         assert info_dict.get('_type', 'video') == 'video'
2052
2053         info_dict.setdefault('__postprocessors', [])
2054
2055         max_downloads = self.params.get('max_downloads')
2056         if max_downloads is not None:
2057             if self._num_downloads >= int(max_downloads):
2058                 raise MaxDownloadsReached()
2059
2060         # TODO: backward compatibility, to be removed
2061         info_dict['fulltitle'] = info_dict['title']
2062
2063         if 'format' not in info_dict:
2064             info_dict['format'] = info_dict['ext']
2065
2066         if self._match_entry(info_dict, incomplete=False) is not None:
2067             return
2068
2069         self.post_extract(info_dict)
2070         self._num_downloads += 1
2071
2072         info_dict = self.pre_process(info_dict)
2073
2074         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2075         temp_filename = self.prepare_filename(info_dict, 'temp')
2076         files_to_move = {}
2077         skip_dl = self.params.get('skip_download', False)
2078
2079         # Forced printings
2080         self.__forced_printings(info_dict, full_filename, incomplete=False)
2081
2082         if self.params.get('simulate', False):
2083             if self.params.get('force_write_download_archive', False):
2084                 self.record_download_archive(info_dict)
2085
2086             # Do nothing else if in simulate mode
2087             return
2088
2089         if full_filename is None:
2090             return
2091
2092         def ensure_dir_exists(path):
2093             return make_dir(path, self.report_error)
2094
2095         if not ensure_dir_exists(encodeFilename(full_filename)):
2096             return
2097         if not ensure_dir_exists(encodeFilename(temp_filename)):
2098             return
2099
2100         if self.params.get('writedescription', False):
2101             descfn = self.prepare_filename(info_dict, 'description')
2102             if not ensure_dir_exists(encodeFilename(descfn)):
2103                 return
2104             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2105                 self.to_screen('[info] Video description is already present')
2106             elif info_dict.get('description') is None:
2107                 self.report_warning('There\'s no description to write.')
2108             else:
2109                 try:
2110                     self.to_screen('[info] Writing video description to: ' + descfn)
2111                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2112                         descfile.write(info_dict['description'])
2113                 except (OSError, IOError):
2114                     self.report_error('Cannot write description file ' + descfn)
2115                     return
2116
2117         if self.params.get('writeannotations', False):
2118             annofn = self.prepare_filename(info_dict, 'annotation')
2119             if not ensure_dir_exists(encodeFilename(annofn)):
2120                 return
2121             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2122                 self.to_screen('[info] Video annotations are already present')
2123             elif not info_dict.get('annotations'):
2124                 self.report_warning('There are no annotations to write.')
2125             else:
2126                 try:
2127                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2128                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2129                         annofile.write(info_dict['annotations'])
2130                 except (KeyError, TypeError):
2131                     self.report_warning('There are no annotations to write.')
2132                 except (OSError, IOError):
2133                     self.report_error('Cannot write annotations file: ' + annofn)
2134                     return
2135
2136         def dl(name, info, subtitle=False):
2137             fd = get_suitable_downloader(info, self.params)(self, self.params)
2138             for ph in self._progress_hooks:
2139                 fd.add_progress_hook(ph)
2140             if self.params.get('verbose'):
2141                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2142             return fd.download(name, info, subtitle)
2143
2144         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2145                                        self.params.get('writeautomaticsub')])
2146
2147         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2148             # subtitles download errors are already managed as troubles in relevant IE
2149             # that way it will silently go on when used with unsupporting IE
2150             subtitles = info_dict['requested_subtitles']
2151             # ie = self.get_info_extractor(info_dict['extractor_key'])
2152             for sub_lang, sub_info in subtitles.items():
2153                 sub_format = sub_info['ext']
2154                 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2155                 sub_filename = subtitles_filename(
2156                     temp_filename if not skip_dl else sub_fn,
2157                     sub_lang, sub_format, info_dict.get('ext'))
2158                 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
2159                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2160                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2161                     files_to_move[sub_filename] = sub_filename_final
2162                 else:
2163                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2164                     if sub_info.get('data') is not None:
2165                         try:
2166                             # Use newline='' to prevent conversion of newline characters
2167                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2168                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2169                                 subfile.write(sub_info['data'])
2170                             files_to_move[sub_filename] = sub_filename_final
2171                         except (OSError, IOError):
2172                             self.report_error('Cannot write subtitles file ' + sub_filename)
2173                             return
2174                     else:
2175                         try:
2176                             dl(sub_filename, sub_info, subtitle=True)
2177                             files_to_move[sub_filename] = sub_filename_final
2178                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2179                             self.report_warning('Unable to download subtitle for "%s": %s' %
2180                                                 (sub_lang, error_to_compat_str(err)))
2181                             continue
2182
2183         if skip_dl:
2184             if self.params.get('convertsubtitles', False):
2185                 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2186                 filename_real_ext = os.path.splitext(full_filename)[1][1:]
2187                 filename_wo_ext = (
2188                     os.path.splitext(full_filename)[0]
2189                     if filename_real_ext == info_dict['ext']
2190                     else full_filename)
2191                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2192                 # if subconv.available:
2193                 #     info_dict['__postprocessors'].append(subconv)
2194                 if os.path.exists(encodeFilename(afilename)):
2195                     self.to_screen(
2196                         '[download] %s has already been downloaded and '
2197                         'converted' % afilename)
2198                 else:
2199                     try:
2200                         self.post_process(full_filename, info_dict, files_to_move)
2201                     except PostProcessingError as err:
2202                         self.report_error('Postprocessing: %s' % str(err))
2203                         return
2204
2205         if self.params.get('writeinfojson', False):
2206             infofn = self.prepare_filename(info_dict, 'infojson')
2207             if not ensure_dir_exists(encodeFilename(infofn)):
2208                 return
2209             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2210                 self.to_screen('[info] Video metadata is already present')
2211             else:
2212                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2213                 try:
2214                     write_json_file(self.filter_requested_info(info_dict), infofn)
2215                 except (OSError, IOError):
2216                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2217                     return
2218             info_dict['__infojson_filename'] = infofn
2219
2220         thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2221         thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2222         for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2223             thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2224             thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2225             files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
2226
2227         # Write internet shortcut files
2228         url_link = webloc_link = desktop_link = False
2229         if self.params.get('writelink', False):
2230             if sys.platform == "darwin":  # macOS.
2231                 webloc_link = True
2232             elif sys.platform.startswith("linux"):
2233                 desktop_link = True
2234             else:  # if sys.platform in ['win32', 'cygwin']:
2235                 url_link = True
2236         if self.params.get('writeurllink', False):
2237             url_link = True
2238         if self.params.get('writewebloclink', False):
2239             webloc_link = True
2240         if self.params.get('writedesktoplink', False):
2241             desktop_link = True
2242
2243         if url_link or webloc_link or desktop_link:
2244             if 'webpage_url' not in info_dict:
2245                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2246                 return
2247             ascii_url = iri_to_uri(info_dict['webpage_url'])
2248
2249         def _write_link_file(extension, template, newline, embed_filename):
2250             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2251             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2252                 self.to_screen('[info] Internet shortcut is already present')
2253             else:
2254                 try:
2255                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2256                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2257                         template_vars = {'url': ascii_url}
2258                         if embed_filename:
2259                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2260                         linkfile.write(template % template_vars)
2261                 except (OSError, IOError):
2262                     self.report_error('Cannot write internet shortcut ' + linkfn)
2263                     return False
2264             return True
2265
2266         if url_link:
2267             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2268                 return
2269         if webloc_link:
2270             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2271                 return
2272         if desktop_link:
2273             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2274                 return
2275
2276         # Download
2277         must_record_download_archive = False
2278         if not skip_dl:
2279             try:
2280
2281                 def existing_file(*filepaths):
2282                     ext = info_dict.get('ext')
2283                     final_ext = self.params.get('final_ext', ext)
2284                     existing_files = []
2285                     for file in orderedSet(filepaths):
2286                         if final_ext != ext:
2287                             converted = replace_extension(file, final_ext, ext)
2288                             if os.path.exists(encodeFilename(converted)):
2289                                 existing_files.append(converted)
2290                         if os.path.exists(encodeFilename(file)):
2291                             existing_files.append(file)
2292
2293                     if not existing_files or self.params.get('overwrites', False):
2294                         for file in orderedSet(existing_files):
2295                             self.report_file_delete(file)
2296                             os.remove(encodeFilename(file))
2297                         return None
2298
2299                     self.report_file_already_downloaded(existing_files[0])
2300                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2301                     return existing_files[0]
2302
2303                 success = True
2304                 if info_dict.get('requested_formats') is not None:
2305                     downloaded = []
2306                     merger = FFmpegMergerPP(self)
2307                     if self.params.get('allow_unplayable_formats'):
2308                         self.report_warning(
2309                             'You have requested merging of multiple formats '
2310                             'while also allowing unplayable formats to be downloaded. '
2311                             'The formats won\'t be merged to prevent data corruption.')
2312                     elif not merger.available:
2313                         self.report_warning(
2314                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2315                             'The formats won\'t be merged.')
2316
2317                     def compatible_formats(formats):
2318                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2319                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2320                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2321                         if len(video_formats) > 2 or len(audio_formats) > 2:
2322                             return False
2323
2324                         # Check extension
2325                         exts = set(format.get('ext') for format in formats)
2326                         COMPATIBLE_EXTS = (
2327                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2328                             set(('webm',)),
2329                         )
2330                         for ext_sets in COMPATIBLE_EXTS:
2331                             if ext_sets.issuperset(exts):
2332                                 return True
2333                         # TODO: Check acodec/vcodec
2334                         return False
2335
2336                     requested_formats = info_dict['requested_formats']
2337                     old_ext = info_dict['ext']
2338                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2339                         info_dict['ext'] = 'mkv'
2340                         self.report_warning(
2341                             'Requested formats are incompatible for merge and will be merged into mkv.')
2342
2343                     def correct_ext(filename):
2344                         filename_real_ext = os.path.splitext(filename)[1][1:]
2345                         filename_wo_ext = (
2346                             os.path.splitext(filename)[0]
2347                             if filename_real_ext == old_ext
2348                             else filename)
2349                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2350
2351                     # Ensure filename always has a correct extension for successful merge
2352                     full_filename = correct_ext(full_filename)
2353                     temp_filename = correct_ext(temp_filename)
2354                     dl_filename = existing_file(full_filename, temp_filename)
2355                     info_dict['__real_download'] = False
2356                     if dl_filename is None:
2357                         for f in requested_formats:
2358                             new_info = dict(info_dict)
2359                             new_info.update(f)
2360                             fname = prepend_extension(
2361                                 self.prepare_filename(new_info, 'temp'),
2362                                 'f%s' % f['format_id'], new_info['ext'])
2363                             if not ensure_dir_exists(fname):
2364                                 return
2365                             downloaded.append(fname)
2366                             partial_success, real_download = dl(fname, new_info)
2367                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2368                             success = success and partial_success
2369                         if merger.available and not self.params.get('allow_unplayable_formats'):
2370                             info_dict['__postprocessors'].append(merger)
2371                             info_dict['__files_to_merge'] = downloaded
2372                             # Even if there were no downloads, it is being merged only now
2373                             info_dict['__real_download'] = True
2374                         else:
2375                             for file in downloaded:
2376                                 files_to_move[file] = None
2377                 else:
2378                     # Just a single file
2379                     dl_filename = existing_file(full_filename, temp_filename)
2380                     if dl_filename is None:
2381                         success, real_download = dl(temp_filename, info_dict)
2382                         info_dict['__real_download'] = real_download
2383
2384                 dl_filename = dl_filename or temp_filename
2385                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2386
2387             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2388                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2389                 return
2390             except (OSError, IOError) as err:
2391                 raise UnavailableVideoError(err)
2392             except (ContentTooShortError, ) as err:
2393                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2394                 return
2395
2396             if success and full_filename != '-':
2397                 # Fixup content
2398                 fixup_policy = self.params.get('fixup')
2399                 if fixup_policy is None:
2400                     fixup_policy = 'detect_or_warn'
2401
2402                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2403
2404                 stretched_ratio = info_dict.get('stretched_ratio')
2405                 if stretched_ratio is not None and stretched_ratio != 1:
2406                     if fixup_policy == 'warn':
2407                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2408                             info_dict['id'], stretched_ratio))
2409                     elif fixup_policy == 'detect_or_warn':
2410                         stretched_pp = FFmpegFixupStretchedPP(self)
2411                         if stretched_pp.available:
2412                             info_dict['__postprocessors'].append(stretched_pp)
2413                         else:
2414                             self.report_warning(
2415                                 '%s: Non-uniform pixel ratio (%s). %s'
2416                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2417                     else:
2418                         assert fixup_policy in ('ignore', 'never')
2419
2420                 if (info_dict.get('requested_formats') is None
2421                         and info_dict.get('container') == 'm4a_dash'
2422                         and info_dict.get('ext') == 'm4a'):
2423                     if fixup_policy == 'warn':
2424                         self.report_warning(
2425                             '%s: writing DASH m4a. '
2426                             'Only some players support this container.'
2427                             % info_dict['id'])
2428                     elif fixup_policy == 'detect_or_warn':
2429                         fixup_pp = FFmpegFixupM4aPP(self)
2430                         if fixup_pp.available:
2431                             info_dict['__postprocessors'].append(fixup_pp)
2432                         else:
2433                             self.report_warning(
2434                                 '%s: writing DASH m4a. '
2435                                 'Only some players support this container. %s'
2436                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2437                     else:
2438                         assert fixup_policy in ('ignore', 'never')
2439
2440                 if ('protocol' in info_dict
2441                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2442                     if fixup_policy == 'warn':
2443                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2444                             info_dict['id']))
2445                     elif fixup_policy == 'detect_or_warn':
2446                         fixup_pp = FFmpegFixupM3u8PP(self)
2447                         if fixup_pp.available:
2448                             info_dict['__postprocessors'].append(fixup_pp)
2449                         else:
2450                             self.report_warning(
2451                                 '%s: malformed AAC bitstream detected. %s'
2452                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2453                     else:
2454                         assert fixup_policy in ('ignore', 'never')
2455
2456                 try:
2457                     self.post_process(dl_filename, info_dict, files_to_move)
2458                 except PostProcessingError as err:
2459                     self.report_error('Postprocessing: %s' % str(err))
2460                     return
2461                 try:
2462                     for ph in self._post_hooks:
2463                         ph(full_filename)
2464                 except Exception as err:
2465                     self.report_error('post hooks: %s' % str(err))
2466                     return
2467                 must_record_download_archive = True
2468
2469         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2470             self.record_download_archive(info_dict)
2471         max_downloads = self.params.get('max_downloads')
2472         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2473             raise MaxDownloadsReached()
2474
2475     def download(self, url_list):
2476         """Download a given list of URLs."""
2477         outtmpl = self.outtmpl_dict['default']
2478         if (len(url_list) > 1
2479                 and outtmpl != '-'
2480                 and '%' not in outtmpl
2481                 and self.params.get('max_downloads') != 1):
2482             raise SameFileError(outtmpl)
2483
2484         for url in url_list:
2485             try:
2486                 # It also downloads the videos
2487                 res = self.extract_info(
2488                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2489             except UnavailableVideoError:
2490                 self.report_error('unable to download video')
2491             except MaxDownloadsReached:
2492                 self.to_screen('[info] Maximum number of downloaded files reached')
2493                 raise
2494             except ExistingVideoReached:
2495                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2496                 raise
2497             except RejectedVideoReached:
2498                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2499                 raise
2500             else:
2501                 if self.params.get('dump_single_json', False):
2502                     self.post_extract(res)
2503                     self.to_stdout(json.dumps(res))
2504
2505         return self._download_retcode
2506
2507     def download_with_info_file(self, info_filename):
2508         with contextlib.closing(fileinput.FileInput(
2509                 [info_filename], mode='r',
2510                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2511             # FileInput doesn't have a read method, we can't call json.load
2512             info = self.filter_requested_info(json.loads('\n'.join(f)))
2513         try:
2514             self.process_ie_result(info, download=True)
2515         except DownloadError:
2516             webpage_url = info.get('webpage_url')
2517             if webpage_url is not None:
2518                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2519                 return self.download([webpage_url])
2520             else:
2521                 raise
2522         return self._download_retcode
2523
2524     @staticmethod
2525     def filter_requested_info(info_dict):
2526         fields_to_remove = ('requested_formats', 'requested_subtitles')
2527         return dict(
2528             (k, v) for k, v in info_dict.items()
2529             if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
2530
2531     def run_pp(self, pp, infodict, files_to_move={}):
2532         files_to_delete = []
2533         files_to_delete, infodict = pp.run(infodict)
2534         if not files_to_delete:
2535             return files_to_move, infodict
2536
2537         if self.params.get('keepvideo', False):
2538             for f in files_to_delete:
2539                 files_to_move.setdefault(f, '')
2540         else:
2541             for old_filename in set(files_to_delete):
2542                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2543                 try:
2544                     os.remove(encodeFilename(old_filename))
2545                 except (IOError, OSError):
2546                     self.report_warning('Unable to remove downloaded original file')
2547                 if old_filename in files_to_move:
2548                     del files_to_move[old_filename]
2549         return files_to_move, infodict
2550
2551     @staticmethod
2552     def post_extract(info_dict):
2553         def actual_post_extract(info_dict):
2554             if info_dict.get('_type') in ('playlist', 'multi_video'):
2555                 for video_dict in info_dict.get('entries', {}):
2556                     actual_post_extract(video_dict)
2557                 return
2558
2559             if '__post_extractor' not in info_dict:
2560                 return
2561             post_extractor = info_dict['__post_extractor']
2562             if post_extractor:
2563                 info_dict.update(post_extractor().items())
2564             del info_dict['__post_extractor']
2565             return
2566
2567         actual_post_extract(info_dict)
2568
2569     def pre_process(self, ie_info):
2570         info = dict(ie_info)
2571         for pp in self._pps['beforedl']:
2572             info = self.run_pp(pp, info)[1]
2573         return info
2574
2575     def post_process(self, filename, ie_info, files_to_move={}):
2576         """Run all the postprocessors on the given file."""
2577         info = dict(ie_info)
2578         info['filepath'] = filename
2579         info['__files_to_move'] = {}
2580
2581         for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2582             files_to_move, info = self.run_pp(pp, info, files_to_move)
2583         info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
2584         for pp in self._pps['aftermove']:
2585             info = self.run_pp(pp, info, {})[1]
2586
2587     def _make_archive_id(self, info_dict):
2588         video_id = info_dict.get('id')
2589         if not video_id:
2590             return
2591         # Future-proof against any change in case
2592         # and backwards compatibility with prior versions
2593         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2594         if extractor is None:
2595             url = str_or_none(info_dict.get('url'))
2596             if not url:
2597                 return
2598             # Try to find matching extractor for the URL and take its ie_key
2599             for ie in self._ies:
2600                 if ie.suitable(url):
2601                     extractor = ie.ie_key()
2602                     break
2603             else:
2604                 return
2605         return '%s %s' % (extractor.lower(), video_id)
2606
2607     def in_download_archive(self, info_dict):
2608         fn = self.params.get('download_archive')
2609         if fn is None:
2610             return False
2611
2612         vid_id = self._make_archive_id(info_dict)
2613         if not vid_id:
2614             return False  # Incomplete video information
2615
2616         return vid_id in self.archive
2617
2618     def record_download_archive(self, info_dict):
2619         fn = self.params.get('download_archive')
2620         if fn is None:
2621             return
2622         vid_id = self._make_archive_id(info_dict)
2623         assert vid_id
2624         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2625             archive_file.write(vid_id + '\n')
2626         self.archive.add(vid_id)
2627
2628     @staticmethod
2629     def format_resolution(format, default='unknown'):
2630         if format.get('vcodec') == 'none':
2631             return 'audio only'
2632         if format.get('resolution') is not None:
2633             return format['resolution']
2634         if format.get('height') is not None:
2635             if format.get('width') is not None:
2636                 res = '%sx%s' % (format['width'], format['height'])
2637             else:
2638                 res = '%sp' % format['height']
2639         elif format.get('width') is not None:
2640             res = '%dx?' % format['width']
2641         else:
2642             res = default
2643         return res
2644
2645     def _format_note(self, fdict):
2646         res = ''
2647         if fdict.get('ext') in ['f4f', 'f4m']:
2648             res += '(unsupported) '
2649         if fdict.get('language'):
2650             if res:
2651                 res += ' '
2652             res += '[%s] ' % fdict['language']
2653         if fdict.get('format_note') is not None:
2654             res += fdict['format_note'] + ' '
2655         if fdict.get('tbr') is not None:
2656             res += '%4dk ' % fdict['tbr']
2657         if fdict.get('container') is not None:
2658             if res:
2659                 res += ', '
2660             res += '%s container' % fdict['container']
2661         if (fdict.get('vcodec') is not None
2662                 and fdict.get('vcodec') != 'none'):
2663             if res:
2664                 res += ', '
2665             res += fdict['vcodec']
2666             if fdict.get('vbr') is not None:
2667                 res += '@'
2668         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2669             res += 'video@'
2670         if fdict.get('vbr') is not None:
2671             res += '%4dk' % fdict['vbr']
2672         if fdict.get('fps') is not None:
2673             if res:
2674                 res += ', '
2675             res += '%sfps' % fdict['fps']
2676         if fdict.get('acodec') is not None:
2677             if res:
2678                 res += ', '
2679             if fdict['acodec'] == 'none':
2680                 res += 'video only'
2681             else:
2682                 res += '%-5s' % fdict['acodec']
2683         elif fdict.get('abr') is not None:
2684             if res:
2685                 res += ', '
2686             res += 'audio'
2687         if fdict.get('abr') is not None:
2688             res += '@%3dk' % fdict['abr']
2689         if fdict.get('asr') is not None:
2690             res += ' (%5dHz)' % fdict['asr']
2691         if fdict.get('filesize') is not None:
2692             if res:
2693                 res += ', '
2694             res += format_bytes(fdict['filesize'])
2695         elif fdict.get('filesize_approx') is not None:
2696             if res:
2697                 res += ', '
2698             res += '~' + format_bytes(fdict['filesize_approx'])
2699         return res
2700
2701     def _format_note_table(self, f):
2702         def join_fields(*vargs):
2703             return ', '.join((val for val in vargs if val != ''))
2704
2705         return join_fields(
2706             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2707             format_field(f, 'language', '[%s]'),
2708             format_field(f, 'format_note'),
2709             format_field(f, 'container', ignore=(None, f.get('ext'))),
2710             format_field(f, 'asr', '%5dHz'))
2711
2712     def list_formats(self, info_dict):
2713         formats = info_dict.get('formats', [info_dict])
2714         new_format = self.params.get('listformats_table', False)
2715         if new_format:
2716             table = [
2717                 [
2718                     format_field(f, 'format_id'),
2719                     format_field(f, 'ext'),
2720                     self.format_resolution(f),
2721                     format_field(f, 'fps', '%d'),
2722                     '|',
2723                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2724                     format_field(f, 'tbr', '%4dk'),
2725                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
2726                     '|',
2727                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2728                     format_field(f, 'vbr', '%4dk'),
2729                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2730                     format_field(f, 'abr', '%3dk'),
2731                     format_field(f, 'asr', '%5dHz'),
2732                     self._format_note_table(f)]
2733                 for f in formats
2734                 if f.get('preference') is None or f['preference'] >= -1000]
2735             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2736                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2737         else:
2738             table = [
2739                 [
2740                     format_field(f, 'format_id'),
2741                     format_field(f, 'ext'),
2742                     self.format_resolution(f),
2743                     self._format_note(f)]
2744                 for f in formats
2745                 if f.get('preference') is None or f['preference'] >= -1000]
2746             header_line = ['format code', 'extension', 'resolution', 'note']
2747
2748         self.to_screen(
2749             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2750                 header_line,
2751                 table,
2752                 delim=new_format,
2753                 extraGap=(0 if new_format else 1),
2754                 hideEmpty=new_format)))
2755
2756     def list_thumbnails(self, info_dict):
2757         thumbnails = info_dict.get('thumbnails')
2758         if not thumbnails:
2759             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2760             return
2761
2762         self.to_screen(
2763             '[info] Thumbnails for %s:' % info_dict['id'])
2764         self.to_screen(render_table(
2765             ['ID', 'width', 'height', 'URL'],
2766             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2767
2768     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2769         if not subtitles:
2770             self.to_screen('%s has no %s' % (video_id, name))
2771             return
2772         self.to_screen(
2773             'Available %s for %s:' % (name, video_id))
2774         self.to_screen(render_table(
2775             ['Language', 'formats'],
2776             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2777                 for lang, formats in subtitles.items()]))
2778
2779     def urlopen(self, req):
2780         """ Start an HTTP download """
2781         if isinstance(req, compat_basestring):
2782             req = sanitized_Request(req)
2783         return self._opener.open(req, timeout=self._socket_timeout)
2784
2785     def print_debug_header(self):
2786         if not self.params.get('verbose'):
2787             return
2788
2789         if type('') is not compat_str:
2790             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2791             self.report_warning(
2792                 'Your Python is broken! Update to a newer and supported version')
2793
2794         stdout_encoding = getattr(
2795             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2796         encoding_str = (
2797             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2798                 locale.getpreferredencoding(),
2799                 sys.getfilesystemencoding(),
2800                 stdout_encoding,
2801                 self.get_encoding()))
2802         write_string(encoding_str, encoding=None)
2803
2804         source = (
2805             '(exe)' if hasattr(sys, 'frozen')
2806             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2807             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2808             else '')
2809         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
2810         if _LAZY_LOADER:
2811             self._write_string('[debug] Lazy loading extractors enabled\n')
2812         if _PLUGIN_CLASSES:
2813             self._write_string(
2814                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2815         try:
2816             sp = subprocess.Popen(
2817                 ['git', 'rev-parse', '--short', 'HEAD'],
2818                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2819                 cwd=os.path.dirname(os.path.abspath(__file__)))
2820             out, err = process_communicate_or_kill(sp)
2821             out = out.decode().strip()
2822             if re.match('[0-9a-f]+', out):
2823                 self._write_string('[debug] Git HEAD: %s\n' % out)
2824         except Exception:
2825             try:
2826                 sys.exc_clear()
2827             except Exception:
2828                 pass
2829
2830         def python_implementation():
2831             impl_name = platform.python_implementation()
2832             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2833                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2834             return impl_name
2835
2836         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2837             platform.python_version(),
2838             python_implementation(),
2839             platform.architecture()[0],
2840             platform_name()))
2841
2842         exe_versions = FFmpegPostProcessor.get_versions(self)
2843         exe_versions['rtmpdump'] = rtmpdump_version()
2844         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2845         exe_str = ', '.join(
2846             '%s %s' % (exe, v)
2847             for exe, v in sorted(exe_versions.items())
2848             if v
2849         )
2850         if not exe_str:
2851             exe_str = 'none'
2852         self._write_string('[debug] exe versions: %s\n' % exe_str)
2853
2854         proxy_map = {}
2855         for handler in self._opener.handlers:
2856             if hasattr(handler, 'proxies'):
2857                 proxy_map.update(handler.proxies)
2858         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2859
2860         if self.params.get('call_home', False):
2861             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2862             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2863             return
2864             latest_version = self.urlopen(
2865                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2866             if version_tuple(latest_version) > version_tuple(__version__):
2867                 self.report_warning(
2868                     'You are using an outdated version (newest version: %s)! '
2869                     'See https://yt-dl.org/update if you need help updating.' %
2870                     latest_version)
2871
2872     def _setup_opener(self):
2873         timeout_val = self.params.get('socket_timeout')
2874         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2875
2876         opts_cookiefile = self.params.get('cookiefile')
2877         opts_proxy = self.params.get('proxy')
2878
2879         if opts_cookiefile is None:
2880             self.cookiejar = compat_cookiejar.CookieJar()
2881         else:
2882             opts_cookiefile = expand_path(opts_cookiefile)
2883             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2884             if os.access(opts_cookiefile, os.R_OK):
2885                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2886
2887         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2888         if opts_proxy is not None:
2889             if opts_proxy == '':
2890                 proxies = {}
2891             else:
2892                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2893         else:
2894             proxies = compat_urllib_request.getproxies()
2895             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2896             if 'http' in proxies and 'https' not in proxies:
2897                 proxies['https'] = proxies['http']
2898         proxy_handler = PerRequestProxyHandler(proxies)
2899
2900         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2901         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2902         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2903         redirect_handler = YoutubeDLRedirectHandler()
2904         data_handler = compat_urllib_request_DataHandler()
2905
2906         # When passing our own FileHandler instance, build_opener won't add the
2907         # default FileHandler and allows us to disable the file protocol, which
2908         # can be used for malicious purposes (see
2909         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2910         file_handler = compat_urllib_request.FileHandler()
2911
2912         def file_open(*args, **kwargs):
2913             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
2914         file_handler.file_open = file_open
2915
2916         opener = compat_urllib_request.build_opener(
2917             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2918
2919         # Delete the default user-agent header, which would otherwise apply in
2920         # cases where our custom HTTP handler doesn't come into play
2921         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2922         opener.addheaders = []
2923         self._opener = opener
2924
2925     def encode(self, s):
2926         if isinstance(s, bytes):
2927             return s  # Already encoded
2928
2929         try:
2930             return s.encode(self.get_encoding())
2931         except UnicodeEncodeError as err:
2932             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2933             raise
2934
2935     def get_encoding(self):
2936         encoding = self.params.get('encoding')
2937         if encoding is None:
2938             encoding = preferredencoding()
2939         return encoding
2940
2941     def _write_thumbnails(self, info_dict, filename):  # return the extensions
2942         write_all = self.params.get('write_all_thumbnails', False)
2943         thumbnails = []
2944         if write_all or self.params.get('writethumbnail', False):
2945             thumbnails = info_dict.get('thumbnails') or []
2946         multiple = write_all and len(thumbnails) > 1
2947
2948         ret = []
2949         for t in thumbnails[::1 if write_all else -1]:
2950             thumb_ext = determine_ext(t['url'], 'jpg')
2951             suffix = '%s.' % t['id'] if multiple else ''
2952             thumb_display_id = '%s ' % t['id'] if multiple else ''
2953             t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
2954
2955             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2956                 ret.append(suffix + thumb_ext)
2957                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2958                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2959             else:
2960                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
2961                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2962                 try:
2963                     uf = self.urlopen(t['url'])
2964                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2965                         shutil.copyfileobj(uf, thumbf)
2966                     ret.append(suffix + thumb_ext)
2967                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2968                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2969                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2970                     self.report_warning('Unable to download thumbnail "%s": %s' %
2971                                         (t['url'], error_to_compat_str(err)))
2972             if ret and not write_all:
2973                 break
2974         return ret