yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30 from zipimport import zipimporter
  31
  32 from .compat import (
  33     compat_basestring,
  34     compat_cookiejar,
  35     compat_get_terminal_size,
  36     compat_http_client,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_str,
  41     compat_tokenize_tokenize,
  42     compat_urllib_error,
  43     compat_urllib_request,
  44     compat_urllib_request_DataHandler,
  45 )
  46 from .utils import (
  47     age_restricted,
  48     args_to_str,
  49     ContentTooShortError,
  50     date_from_str,
  51     DateRange,
  52     DEFAULT_OUTTMPL,
  53     OUTTMPL_TYPES,
  54     determine_ext,
  55     determine_protocol,
  56     DOT_DESKTOP_LINK_TEMPLATE,
  57     DOT_URL_LINK_TEMPLATE,
  58     DOT_WEBLOC_LINK_TEMPLATE,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     error_to_compat_str,
  63     ExistingVideoReached,
  64     expand_path,
  65     ExtractorError,
  66     float_or_none,
  67     format_bytes,
  68     format_field,
  69     formatSeconds,
  70     GeoRestrictedError,
  71     int_or_none,
  72     iri_to_uri,
  73     ISO3166Utils,
  74     locked_file,
  75     make_dir,
  76     make_HTTPS_handler,
  77     MaxDownloadsReached,
  78     orderedSet,
  79     PagedList,
  80     parse_filesize,
  81     PerRequestProxyHandler,
  82     platform_name,
  83     PostProcessingError,
  84     preferredencoding,
  85     prepend_extension,
  86     register_socks_protocols,
  87     render_table,
  88     replace_extension,
  89     RejectedVideoReached,
  90     SameFileError,
  91     sanitize_filename,
  92     sanitize_path,
  93     sanitize_url,
  94     sanitized_Request,
  95     std_headers,
  96     str_or_none,
  97     strftime_or_none,
  98     subtitles_filename,
  99     to_high_limit_path,
 100     UnavailableVideoError,
 101     url_basename,
 102     version_tuple,
 103     write_json_file,
 104     write_string,
 105     YoutubeDLCookieJar,
 106     YoutubeDLCookieProcessor,
 107     YoutubeDLHandler,
 108     YoutubeDLRedirectHandler,
 109     process_communicate_or_kill,
 110 )
 111 from .cache import Cache
 112 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
 113 from .extractor.openload import PhantomJSwrapper
 114 from .downloader import get_suitable_downloader
 115 from .downloader.rtmp import rtmpdump_version
 116 from .postprocessor import (
 117     FFmpegFixupM3u8PP,
 118     FFmpegFixupM4aPP,
 119     FFmpegFixupStretchedPP,
 120     FFmpegMergerPP,
 121     FFmpegPostProcessor,
 122     # FFmpegSubtitlesConvertorPP,
 123     get_postprocessor,
 124     MoveFilesAfterDownloadPP,
 125 )
 126 from .version import __version__
 127
 128 if compat_os_name == 'nt':
 129     import ctypes
 130
 131
 132 class YoutubeDL(object):
 133     """YoutubeDL class.
 134
 135     YoutubeDL objects are the ones responsible of downloading the
 136     actual video file and writing it to disk if the user has requested
 137     it, among some other tasks. In most cases there should be one per
 138     program. As, given a video URL, the downloader doesn't know how to
 139     extract all the needed information, task that InfoExtractors do, it
 140     has to pass the URL to one of them.
 141
 142     For this, YoutubeDL objects have a method that allows
 143     InfoExtractors to be registered in a given order. When it is passed
 144     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 145     finds that reports being able to handle it. The InfoExtractor extracts
 146     all the information about the video or videos the URL refers to, and
 147     YoutubeDL process the extracted information, possibly using a File
 148     Downloader to download the video.
 149
 150     YoutubeDL objects accept a lot of parameters. In order not to saturate
 151     the object constructor with arguments, it receives a dictionary of
 152     options instead. These options are available through the params
 153     attribute for the InfoExtractors to use. The YoutubeDL also
 154     registers itself as the downloader in charge for the InfoExtractors
 155     that are added to it, so this is a "mutual registration".
 156
 157     Available options:
 158
 159     username:          Username for authentication purposes.
 160     password:          Password for authentication purposes.
 161     videopassword:     Password for accessing a video.
 162     ap_mso:            Adobe Pass multiple-system operator identifier.
 163     ap_username:       Multiple-system operator account username.
 164     ap_password:       Multiple-system operator account password.
 165     usenetrc:          Use netrc for authentication instead.
 166     verbose:           Print additional info to stdout.
 167     quiet:             Do not print messages to stdout.
 168     no_warnings:       Do not print out anything for warnings.
 169     forceurl:          Force printing final URL.
 170     forcetitle:        Force printing title.
 171     forceid:           Force printing ID.
 172     forcethumbnail:    Force printing thumbnail URL.
 173     forcedescription:  Force printing description.
 174     forcefilename:     Force printing final filename.
 175     forceduration:     Force printing duration.
 176     forcejson:         Force printing info_dict as JSON.
 177     dump_single_json:  Force printing the info_dict of the whole playlist
 178                        (or video) as a single JSON line.
 179     force_write_download_archive: Force writing download archive regardless
 180                        of 'skip_download' or 'simulate'.
 181     simulate:          Do not download the video files.
 182     format:            Video format code. see "FORMAT SELECTION" for more details.
 183     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 184     format_sort:       How to sort the video formats. see "Sorting Formats"
 185                        for more details.
 186     format_sort_force: Force the given format_sort. see "Sorting Formats"
 187                        for more details.
 188     allow_multiple_video_streams:   Allow multiple video streams to be merged
 189                        into a single file
 190     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 191                        into a single file
 192     paths:             Dictionary of output paths. The allowed keys are 'home'
 193                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 194     outtmpl:           Dictionary of templates for output names. Allowed keys
 195                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 196                        A string a also accepted for backward compatibility
 197     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 198     restrictfilenames: Do not allow "&" and spaces in file names
 199     trim_file_name:    Limit length of filename (extension excluded)
 200     windowsfilenames:  Force the filenames to be windows compatible
 201     ignoreerrors:      Do not stop on download errors
 202                        (Default True when running yt-dlp,
 203                        but False when directly accessing YoutubeDL class)
 204     force_generic_extractor: Force downloader to use the generic extractor
 205     overwrites:        Overwrite all video and metadata files if True,
 206                        overwrite only non-video files if None
 207                        and don't overwrite any file if False
 208     playliststart:     Playlist item to start at.
 209     playlistend:       Playlist item to end at.
 210     playlist_items:    Specific indices of playlist to download.
 211     playlistreverse:   Download playlist items in reverse order.
 212     playlistrandom:    Download playlist items in random order.
 213     matchtitle:        Download only matching titles.
 214     rejecttitle:       Reject downloads for matching titles.
 215     logger:            Log messages to a logging.Logger instance.
 216     logtostderr:       Log messages to stderr instead of stdout.
 217     writedescription:  Write the video description to a .description file
 218     writeinfojson:     Write the video description to a .info.json file
 219     clean_infojson:    Remove private fields from the infojson
 220     writecomments:     Extract video comments. This will not be written to disk
 221                        unless writeinfojson is also given
 222     writeannotations:  Write the video annotations to a .annotations.xml file
 223     writethumbnail:    Write the thumbnail image to a file
 224     allow_playlist_files: Whether to write playlists' description, infojson etc
 225                        also to disk when using the 'write*' options
 226     write_all_thumbnails:  Write all thumbnail formats to files
 227     writelink:         Write an internet shortcut file, depending on the
 228                        current platform (.url/.webloc/.desktop)
 229     writeurllink:      Write a Windows internet shortcut file (.url)
 230     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 231     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 232     writesubtitles:    Write the video subtitles to a file
 233     writeautomaticsub: Write the automatically generated subtitles to a file
 234     allsubtitles:      Downloads all the subtitles of the video
 235                        (requires writesubtitles or writeautomaticsub)
 236     listsubtitles:     Lists all available subtitles for the video
 237     subtitlesformat:   The format code for subtitles
 238     subtitleslangs:    List of languages of the subtitles to download
 239     keepvideo:         Keep the video file after post-processing
 240     daterange:         A DateRange object, download only if the upload_date is in the range.
 241     skip_download:     Skip the actual download of the video file
 242     cachedir:          Location of the cache files in the filesystem.
 243                        False to disable filesystem cache.
 244     noplaylist:        Download single video instead of a playlist if in doubt.
 245     age_limit:         An integer representing the user's age in years.
 246                        Unsuitable videos for the given age are skipped.
 247     min_views:         An integer representing the minimum view count the video
 248                        must have in order to not be skipped.
 249                        Videos without view count information are always
 250                        downloaded. None for no limit.
 251     max_views:         An integer representing the maximum view count.
 252                        Videos that are more popular than that are not
 253                        downloaded.
 254                        Videos without view count information are always
 255                        downloaded. None for no limit.
 256     download_archive:  File name of a file where all downloads are recorded.
 257                        Videos already present in the file are not downloaded
 258                        again.
 259     break_on_existing: Stop the download process after attempting to download a
 260                        file that is in the archive.
 261     break_on_reject:   Stop the download process when encountering a video that
 262                        has been filtered out.
 263     cookiefile:        File name where cookies should be read from and dumped to
 264     nocheckcertificate:Do not verify SSL certificates
 265     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 266                        At the moment, this is only supported by YouTube.
 267     proxy:             URL of the proxy server to use
 268     geo_verification_proxy:  URL of the proxy to use for IP address verification
 269                        on geo-restricted sites.
 270     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 271     bidi_workaround:   Work around buggy terminals without bidirectional text
 272                        support, using fridibi
 273     debug_printtraffic:Print out sent and received HTTP traffic
 274     include_ads:       Download ads as well
 275     default_search:    Prepend this string if an input url is not valid.
 276                        'auto' for elaborate guessing
 277     encoding:          Use this encoding instead of the system-specified.
 278     extract_flat:      Do not resolve URLs, return the immediate result.
 279                        Pass in 'in_playlist' to only show this behavior for
 280                        playlist items.
 281     postprocessors:    A list of dictionaries, each with an entry
 282                        * key:  The name of the postprocessor. See
 283                                yt_dlp/postprocessor/__init__.py for a list.
 284                        * _after_move: Optional. If True, run this post_processor
 285                                after 'MoveFilesAfterDownload'
 286                        as well as any further keyword arguments for the
 287                        postprocessor.
 288     post_hooks:        A list of functions that get called as the final step
 289                        for each video file, after all postprocessors have been
 290                        called. The filename will be passed as the only argument.
 291     progress_hooks:    A list of functions that get called on download
 292                        progress, with a dictionary with the entries
 293                        * status: One of "downloading", "error", or "finished".
 294                                  Check this first and ignore unknown values.
 295
 296                        If status is one of "downloading", or "finished", the
 297                        following properties may also be present:
 298                        * filename: The final filename (always present)
 299                        * tmpfilename: The filename we're currently writing to
 300                        * downloaded_bytes: Bytes on disk
 301                        * total_bytes: Size of the whole file, None if unknown
 302                        * total_bytes_estimate: Guess of the eventual file size,
 303                                                None if unavailable.
 304                        * elapsed: The number of seconds since download started.
 305                        * eta: The estimated time in seconds, None if unknown
 306                        * speed: The download speed in bytes/second, None if
 307                                 unknown
 308                        * fragment_index: The counter of the currently
 309                                          downloaded video fragment.
 310                        * fragment_count: The number of fragments (= individual
 311                                          files that will be merged)
 312
 313                        Progress hooks are guaranteed to be called at least once
 314                        (with status "finished") if the download is successful.
 315     merge_output_format: Extension to use when merging formats.
 316     final_ext:         Expected final extension; used to detect when the file was
 317                        already downloaded and converted. "merge_output_format" is
 318                        replaced by this extension when given
 319     fixup:             Automatically correct known faults of the file.
 320                        One of:
 321                        - "never": do nothing
 322                        - "warn": only emit a warning
 323                        - "detect_or_warn": check whether we can do anything
 324                                            about it, warn otherwise (default)
 325     source_address:    Client-side IP address to bind to.
 326     call_home:         Boolean, true iff we are allowed to contact the
 327                        yt-dlp servers for debugging. (BROKEN)
 328     sleep_interval_requests: Number of seconds to sleep between requests
 329                        during extraction
 330     sleep_interval:    Number of seconds to sleep before each download when
 331                        used alone or a lower bound of a range for randomized
 332                        sleep before each download (minimum possible number
 333                        of seconds to sleep) when used along with
 334                        max_sleep_interval.
 335     max_sleep_interval:Upper bound of a range for randomized sleep before each
 336                        download (maximum possible number of seconds to sleep).
 337                        Must only be used along with sleep_interval.
 338                        Actual sleep time will be a random float from range
 339                        [sleep_interval; max_sleep_interval].
 340     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 341     listformats:       Print an overview of available video formats and exit.
 342     list_thumbnails:   Print a table of all thumbnails and exit.
 343     match_filter:      A function that gets called with the info_dict of
 344                        every video.
 345                        If it returns a message, the video is ignored.
 346                        If it returns None, the video is downloaded.
 347                        match_filter_func in utils.py is one example for this.
 348     no_color:          Do not emit color codes in output.
 349     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 350                        HTTP header
 351     geo_bypass_country:
 352                        Two-letter ISO 3166-2 country code that will be used for
 353                        explicit geographic restriction bypassing via faking
 354                        X-Forwarded-For HTTP header
 355     geo_bypass_ip_block:
 356                        IP range in CIDR notation that will be used similarly to
 357                        geo_bypass_country
 358
 359     The following options determine which downloader is picked:
 360     external_downloader: Executable of the external downloader to call.
 361                        None or unset for standard (built-in) downloader.
 362     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 363                        if True, otherwise use ffmpeg/avconv if False, otherwise
 364                        use downloader suggested by extractor if None.
 365
 366     The following parameters are not used by YoutubeDL itself, they are used by
 367     the downloader (see yt_dlp/downloader/common.py):
 368     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 369     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 370     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 371     http_chunk_size.
 372
 373     The following options are used by the post processors:
 374     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 375                        otherwise prefer ffmpeg. (avconv support is deprecated)
 376     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 377                        to the binary or its containing directory.
 378     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 379                         and a list of additional command-line arguments for the
 380                         postprocessor/executable. The dict can also have "PP+EXE" keys
 381                         which are used when the given exe is used by the given PP.
 382                         Use 'default' as the name for arguments to passed to all PP
 383
 384     The following options are used by the extractors:
 385     extractor_retries: Number of times to retry for known errors
 386     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 387     hls_split_discontinuity: Split HLS playlists to different formats at
 388                        discontinuities such as ad breaks (default: False)
 389     youtube_include_dash_manifest: If True (default), DASH manifests and related
 390                        data will be downloaded and processed by extractor.
 391                        You can reduce network I/O by disabling it if you don't
 392                        care about DASH. (only for youtube)
 393     youtube_include_hls_manifest: If True (default), HLS manifests and related
 394                        data will be downloaded and processed by extractor.
 395                        You can reduce network I/O by disabling it if you don't
 396                        care about HLS. (only for youtube)
 397     """
 398
 399     _NUMERIC_FIELDS = set((
 400         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 401         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 402         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 403         'average_rating', 'comment_count', 'age_limit',
 404         'start_time', 'end_time',
 405         'chapter_number', 'season_number', 'episode_number',
 406         'track_number', 'disc_number', 'release_year',
 407         'playlist_index',
 408     ))
 409
 410     params = None
 411     _ies = []
 412     _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 413     __prepare_filename_warned = False
 414     _first_webpage_request = True
 415     _download_retcode = None
 416     _num_downloads = None
 417     _playlist_level = 0
 418     _playlist_urls = set()
 419     _screen_file = None
 420
 421     def __init__(self, params=None, auto_init=True):
 422         """Create a FileDownloader object with the given options."""
 423         if params is None:
 424             params = {}
 425         self._ies = []
 426         self._ies_instances = {}
 427         self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
 428         self.__prepare_filename_warned = False
 429         self._first_webpage_request = True
 430         self._post_hooks = []
 431         self._progress_hooks = []
 432         self._download_retcode = 0
 433         self._num_downloads = 0
 434         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 435         self._err_file = sys.stderr
 436         self.params = {
 437             # Default parameters
 438             'nocheckcertificate': False,
 439         }
 440         self.params.update(params)
 441         self.cache = Cache(self)
 442         self.archive = set()
 443
 444         """Preload the archive, if any is specified"""
 445         def preload_download_archive(self):
 446             fn = self.params.get('download_archive')
 447             if fn is None:
 448                 return False
 449             try:
 450                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 451                     for line in archive_file:
 452                         self.archive.add(line.strip())
 453             except IOError as ioe:
 454                 if ioe.errno != errno.ENOENT:
 455                     raise
 456                 return False
 457             return True
 458
 459         def check_deprecated(param, option, suggestion):
 460             if self.params.get(param) is not None:
 461                 self.report_warning(
 462                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 463                 return True
 464             return False
 465
 466         if self.params.get('verbose'):
 467             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 468
 469         preload_download_archive(self)
 470
 471         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 472             if self.params.get('geo_verification_proxy') is None:
 473                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 474
 475         if self.params.get('final_ext'):
 476             if self.params.get('merge_output_format'):
 477                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 478             self.params['merge_output_format'] = self.params['final_ext']
 479
 480         if 'overwrites' in self.params and self.params['overwrites'] is None:
 481             del self.params['overwrites']
 482
 483         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 484         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 485         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 486
 487         if params.get('bidi_workaround', False):
 488             try:
 489                 import pty
 490                 master, slave = pty.openpty()
 491                 width = compat_get_terminal_size().columns
 492                 if width is None:
 493                     width_args = []
 494                 else:
 495                     width_args = ['-w', str(width)]
 496                 sp_kwargs = dict(
 497                     stdin=subprocess.PIPE,
 498                     stdout=slave,
 499                     stderr=self._err_file)
 500                 try:
 501                     self._output_process = subprocess.Popen(
 502                         ['bidiv'] + width_args, **sp_kwargs
 503                     )
 504                 except OSError:
 505                     self._output_process = subprocess.Popen(
 506                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 507                 self._output_channel = os.fdopen(master, 'rb')
 508             except OSError as ose:
 509                 if ose.errno == errno.ENOENT:
 510                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 511                 else:
 512                     raise
 513
 514         if (sys.platform != 'win32'
 515                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 516                 and not params.get('restrictfilenames', False)):
 517             # Unicode filesystem API will throw errors (#1474, #13027)
 518             self.report_warning(
 519                 'Assuming --restrict-filenames since file system encoding '
 520                 'cannot encode all characters. '
 521                 'Set the LC_ALL environment variable to fix this.')
 522             self.params['restrictfilenames'] = True
 523
 524         self.outtmpl_dict = self.parse_outtmpl()
 525
 526         self._setup_opener()
 527
 528         if auto_init:
 529             self.print_debug_header()
 530             self.add_default_info_extractors()
 531
 532         for pp_def_raw in self.params.get('postprocessors', []):
 533             pp_class = get_postprocessor(pp_def_raw['key'])
 534             pp_def = dict(pp_def_raw)
 535             del pp_def['key']
 536             if 'when' in pp_def:
 537                 when = pp_def['when']
 538                 del pp_def['when']
 539             else:
 540                 when = 'normal'
 541             pp = pp_class(self, **compat_kwargs(pp_def))
 542             self.add_post_processor(pp, when=when)
 543
 544         for ph in self.params.get('post_hooks', []):
 545             self.add_post_hook(ph)
 546
 547         for ph in self.params.get('progress_hooks', []):
 548             self.add_progress_hook(ph)
 549
 550         register_socks_protocols()
 551
 552     def warn_if_short_id(self, argv):
 553         # short YouTube ID starting with dash?
 554         idxs = [
 555             i for i, a in enumerate(argv)
 556             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 557         if idxs:
 558             correct_argv = (
 559                 ['yt-dlp']
 560                 + [a for i, a in enumerate(argv) if i not in idxs]
 561                 + ['--'] + [argv[i] for i in idxs]
 562             )
 563             self.report_warning(
 564                 'Long argument string detected. '
 565                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 566                 args_to_str(correct_argv))
 567
 568     def add_info_extractor(self, ie):
 569         """Add an InfoExtractor object to the end of the list."""
 570         self._ies.append(ie)
 571         if not isinstance(ie, type):
 572             self._ies_instances[ie.ie_key()] = ie
 573             ie.set_downloader(self)
 574
 575     def get_info_extractor(self, ie_key):
 576         """
 577         Get an instance of an IE with name ie_key, it will try to get one from
 578         the _ies list, if there's no instance it will create a new one and add
 579         it to the extractor list.
 580         """
 581         ie = self._ies_instances.get(ie_key)
 582         if ie is None:
 583             ie = get_info_extractor(ie_key)()
 584             self.add_info_extractor(ie)
 585         return ie
 586
 587     def add_default_info_extractors(self):
 588         """
 589         Add the InfoExtractors returned by gen_extractors to the end of the list
 590         """
 591         for ie in gen_extractor_classes():
 592             self.add_info_extractor(ie)
 593
 594     def add_post_processor(self, pp, when='normal'):
 595         """Add a PostProcessor object to the end of the chain."""
 596         self._pps[when].append(pp)
 597         pp.set_downloader(self)
 598
 599     def add_post_hook(self, ph):
 600         """Add the post hook"""
 601         self._post_hooks.append(ph)
 602
 603     def add_progress_hook(self, ph):
 604         """Add the progress hook (currently only for the file downloader)"""
 605         self._progress_hooks.append(ph)
 606
 607     def _bidi_workaround(self, message):
 608         if not hasattr(self, '_output_channel'):
 609             return message
 610
 611         assert hasattr(self, '_output_process')
 612         assert isinstance(message, compat_str)
 613         line_count = message.count('\n') + 1
 614         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 615         self._output_process.stdin.flush()
 616         res = ''.join(self._output_channel.readline().decode('utf-8')
 617                       for _ in range(line_count))
 618         return res[:-len('\n')]
 619
 620     def to_screen(self, message, skip_eol=False):
 621         """Print message to stdout if not in quiet mode."""
 622         return self.to_stdout(message, skip_eol, check_quiet=True)
 623
 624     def _write_string(self, s, out=None):
 625         write_string(s, out=out, encoding=self.params.get('encoding'))
 626
 627     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 628         """Print message to stdout if not in quiet mode."""
 629         if self.params.get('logger'):
 630             self.params['logger'].debug(message)
 631         elif not check_quiet or not self.params.get('quiet', False):
 632             message = self._bidi_workaround(message)
 633             terminator = ['\n', ''][skip_eol]
 634             output = message + terminator
 635
 636             self._write_string(output, self._screen_file)
 637
 638     def to_stderr(self, message):
 639         """Print message to stderr."""
 640         assert isinstance(message, compat_str)
 641         if self.params.get('logger'):
 642             self.params['logger'].error(message)
 643         else:
 644             message = self._bidi_workaround(message)
 645             output = message + '\n'
 646             self._write_string(output, self._err_file)
 647
 648     def to_console_title(self, message):
 649         if not self.params.get('consoletitle', False):
 650             return
 651         if compat_os_name == 'nt':
 652             if ctypes.windll.kernel32.GetConsoleWindow():
 653                 # c_wchar_p() might not be necessary if `message` is
 654                 # already of type unicode()
 655                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 656         elif 'TERM' in os.environ:
 657             self._write_string('\033]0;%s\007' % message, self._screen_file)
 658
 659     def save_console_title(self):
 660         if not self.params.get('consoletitle', False):
 661             return
 662         if self.params.get('simulate', False):
 663             return
 664         if compat_os_name != 'nt' and 'TERM' in os.environ:
 665             # Save the title on stack
 666             self._write_string('\033[22;0t', self._screen_file)
 667
 668     def restore_console_title(self):
 669         if not self.params.get('consoletitle', False):
 670             return
 671         if self.params.get('simulate', False):
 672             return
 673         if compat_os_name != 'nt' and 'TERM' in os.environ:
 674             # Restore the title from stack
 675             self._write_string('\033[23;0t', self._screen_file)
 676
 677     def __enter__(self):
 678         self.save_console_title()
 679         return self
 680
 681     def __exit__(self, *args):
 682         self.restore_console_title()
 683
 684         if self.params.get('cookiefile') is not None:
 685             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 686
 687     def trouble(self, message=None, tb=None):
 688         """Determine action to take when a download problem appears.
 689
 690         Depending on if the downloader has been configured to ignore
 691         download errors or not, this method may throw an exception or
 692         not when errors are found, after printing the message.
 693
 694         tb, if given, is additional traceback information.
 695         """
 696         if message is not None:
 697             self.to_stderr(message)
 698         if self.params.get('verbose'):
 699             if tb is None:
 700                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 701                     tb = ''
 702                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 703                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 704                     tb += encode_compat_str(traceback.format_exc())
 705                 else:
 706                     tb_data = traceback.format_list(traceback.extract_stack())
 707                     tb = ''.join(tb_data)
 708             self.to_stderr(tb)
 709         if not self.params.get('ignoreerrors', False):
 710             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 711                 exc_info = sys.exc_info()[1].exc_info
 712             else:
 713                 exc_info = sys.exc_info()
 714             raise DownloadError(message, exc_info)
 715         self._download_retcode = 1
 716
 717     def report_warning(self, message):
 718         '''
 719         Print the message to stderr, it will be prefixed with 'WARNING:'
 720         If stderr is a tty file the 'WARNING:' will be colored
 721         '''
 722         if self.params.get('logger') is not None:
 723             self.params['logger'].warning(message)
 724         else:
 725             if self.params.get('no_warnings'):
 726                 return
 727             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 728                 _msg_header = '\033[0;33mWARNING:\033[0m'
 729             else:
 730                 _msg_header = 'WARNING:'
 731             warning_message = '%s %s' % (_msg_header, message)
 732             self.to_stderr(warning_message)
 733
 734     def report_error(self, message, tb=None):
 735         '''
 736         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 737         in red if stderr is a tty file.
 738         '''
 739         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 740             _msg_header = '\033[0;31mERROR:\033[0m'
 741         else:
 742             _msg_header = 'ERROR:'
 743         error_message = '%s %s' % (_msg_header, message)
 744         self.trouble(error_message, tb)
 745
 746     def report_file_already_downloaded(self, file_name):
 747         """Report file has already been fully downloaded."""
 748         try:
 749             self.to_screen('[download] %s has already been downloaded' % file_name)
 750         except UnicodeEncodeError:
 751             self.to_screen('[download] The file has already been downloaded')
 752
 753     def report_file_delete(self, file_name):
 754         """Report that existing file will be deleted."""
 755         try:
 756             self.to_screen('Deleting existing file %s' % file_name)
 757         except UnicodeEncodeError:
 758             self.to_screen('Deleting existing file')
 759
 760     def parse_outtmpl(self):
 761         outtmpl_dict = self.params.get('outtmpl', {})
 762         if not isinstance(outtmpl_dict, dict):
 763             outtmpl_dict = {'default': outtmpl_dict}
 764         outtmpl_dict.update({
 765             k: v for k, v in DEFAULT_OUTTMPL.items()
 766             if not outtmpl_dict.get(k)})
 767         for key, val in outtmpl_dict.items():
 768             if isinstance(val, bytes):
 769                 self.report_warning(
 770                     'Parameter outtmpl is bytes, but should be a unicode string. '
 771                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 772         return outtmpl_dict
 773
 774     def _prepare_filename(self, info_dict, tmpl_type='default'):
 775         try:
 776             template_dict = dict(info_dict)
 777
 778             template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 779                 formatSeconds(info_dict['duration'], '-')
 780                 if info_dict.get('duration', None) is not None
 781                 else None)
 782
 783             template_dict['epoch'] = int(time.time())
 784             autonumber_size = self.params.get('autonumber_size')
 785             if autonumber_size is None:
 786                 autonumber_size = 5
 787             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 788             if template_dict.get('resolution') is None:
 789                 if template_dict.get('width') and template_dict.get('height'):
 790                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 791                 elif template_dict.get('height'):
 792                     template_dict['resolution'] = '%sp' % template_dict['height']
 793                 elif template_dict.get('width'):
 794                     template_dict['resolution'] = '%dx?' % template_dict['width']
 795
 796             sanitize = lambda k, v: sanitize_filename(
 797                 compat_str(v),
 798                 restricted=self.params.get('restrictfilenames'),
 799                 is_id=(k == 'id' or k.endswith('_id')))
 800             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 801                                  for k, v in template_dict.items()
 802                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 803             na = self.params.get('outtmpl_na_placeholder', 'NA')
 804             template_dict = collections.defaultdict(lambda: na, template_dict)
 805
 806             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 807             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 808
 809             # For fields playlist_index and autonumber convert all occurrences
 810             # of %(field)s to %(field)0Nd for backward compatibility
 811             field_size_compat_map = {
 812                 'playlist_index': len(str(template_dict['n_entries'])),
 813                 'autonumber': autonumber_size,
 814             }
 815             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 816             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 817             if mobj:
 818                 outtmpl = re.sub(
 819                     FIELD_SIZE_COMPAT_RE,
 820                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 821                     outtmpl)
 822
 823             # As of [1] format syntax is:
 824             #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 825             # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 826             FORMAT_RE = r'''(?x)
 827                 (?<!%)
 828                 %
 829                 \({0}\)  # mapping key
 830                 (?:[#0\-+ ]+)?  # conversion flags (optional)
 831                 (?:\d+)?  # minimum field width (optional)
 832                 (?:\.\d+)?  # precision (optional)
 833                 [hlL]?  # length modifier (optional)
 834                 (?P<type>[diouxXeEfFgGcrs%])  # conversion type
 835             '''
 836
 837             numeric_fields = list(self._NUMERIC_FIELDS)
 838
 839             # Format date
 840             FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
 841             for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
 842                 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
 843                 if key in template_dict:
 844                     continue
 845                 value = strftime_or_none(template_dict.get(field), frmt, na)
 846                 if conv_type in 'crs':  # string
 847                     value = sanitize(field, value)
 848                 else:  # number
 849                     numeric_fields.append(key)
 850                     value = float_or_none(value, default=None)
 851                 if value is not None:
 852                     template_dict[key] = value
 853
 854             # Missing numeric fields used together with integer presentation types
 855             # in format specification will break the argument substitution since
 856             # string NA placeholder is returned for missing fields. We will patch
 857             # output template for missing fields to meet string presentation type.
 858             for numeric_field in numeric_fields:
 859                 if numeric_field not in template_dict:
 860                     outtmpl = re.sub(
 861                         FORMAT_RE.format(re.escape(numeric_field)),
 862                         r'%({0})s'.format(numeric_field), outtmpl)
 863
 864             # expand_path translates '%%' into '%' and '$$' into '$'
 865             # correspondingly that is not what we want since we need to keep
 866             # '%%' intact for template dict substitution step. Working around
 867             # with boundary-alike separator hack.
 868             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 869             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 870
 871             # outtmpl should be expand_path'ed before template dict substitution
 872             # because meta fields may contain env variables we don't want to
 873             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 874             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 875             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 876
 877             if force_ext is not None:
 878                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 879
 880             # https://github.com/blackjack4494/youtube-dlc/issues/85
 881             trim_file_name = self.params.get('trim_file_name', False)
 882             if trim_file_name:
 883                 fn_groups = filename.rsplit('.')
 884                 ext = fn_groups[-1]
 885                 sub_ext = ''
 886                 if len(fn_groups) > 2:
 887                     sub_ext = fn_groups[-2]
 888                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 889
 890             return filename
 891         except ValueError as err:
 892             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 893             return None
 894
 895     def prepare_filename(self, info_dict, dir_type='', warn=False):
 896         """Generate the output filename."""
 897         paths = self.params.get('paths', {})
 898         assert isinstance(paths, dict)
 899         filename = self._prepare_filename(info_dict, dir_type or 'default')
 900
 901         if warn and not self.__prepare_filename_warned:
 902             if not paths:
 903                 pass
 904             elif filename == '-':
 905                 self.report_warning('--paths is ignored when an outputting to stdout')
 906             elif os.path.isabs(filename):
 907                 self.report_warning('--paths is ignored since an absolute path is given in output template')
 908             self.__prepare_filename_warned = True
 909         if filename == '-' or not filename:
 910             return filename
 911
 912         homepath = expand_path(paths.get('home', '').strip())
 913         assert isinstance(homepath, compat_str)
 914         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
 915         assert isinstance(subdir, compat_str)
 916         path = os.path.join(homepath, subdir, filename)
 917
 918         # Temporary fix for #4787
 919         # 'Treat' all problem characters by passing filename through preferredencoding
 920         # to workaround encoding issues with subprocess on python2 @ Windows
 921         if sys.version_info < (3, 0) and sys.platform == 'win32':
 922             path = encodeFilename(path, True).decode(preferredencoding())
 923         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 924
 925     def _match_entry(self, info_dict, incomplete):
 926         """ Returns None if the file should be downloaded """
 927
 928         def check_filter():
 929             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 930             if 'title' in info_dict:
 931                 # This can happen when we're just evaluating the playlist
 932                 title = info_dict['title']
 933                 matchtitle = self.params.get('matchtitle', False)
 934                 if matchtitle:
 935                     if not re.search(matchtitle, title, re.IGNORECASE):
 936                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 937                 rejecttitle = self.params.get('rejecttitle', False)
 938                 if rejecttitle:
 939                     if re.search(rejecttitle, title, re.IGNORECASE):
 940                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 941             date = info_dict.get('upload_date')
 942             if date is not None:
 943                 dateRange = self.params.get('daterange', DateRange())
 944                 if date not in dateRange:
 945                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 946             view_count = info_dict.get('view_count')
 947             if view_count is not None:
 948                 min_views = self.params.get('min_views')
 949                 if min_views is not None and view_count < min_views:
 950                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 951                 max_views = self.params.get('max_views')
 952                 if max_views is not None and view_count > max_views:
 953                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 954             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 955                 return 'Skipping "%s" because it is age restricted' % video_title
 956             if self.in_download_archive(info_dict):
 957                 return '%s has already been recorded in archive' % video_title
 958
 959             if not incomplete:
 960                 match_filter = self.params.get('match_filter')
 961                 if match_filter is not None:
 962                     ret = match_filter(info_dict)
 963                     if ret is not None:
 964                         return ret
 965             return None
 966
 967         reason = check_filter()
 968         if reason is not None:
 969             self.to_screen('[download] ' + reason)
 970             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 971                 raise ExistingVideoReached()
 972             elif self.params.get('break_on_reject', False):
 973                 raise RejectedVideoReached()
 974         return reason
 975
 976     @staticmethod
 977     def add_extra_info(info_dict, extra_info):
 978         '''Set the keys from extra_info in info dict if they are missing'''
 979         for key, value in extra_info.items():
 980             info_dict.setdefault(key, value)
 981
 982     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 983                      process=True, force_generic_extractor=False):
 984         '''
 985         Returns a list with a dictionary for each video we find.
 986         If 'download', also downloads the videos.
 987         extra_info is a dict containing the extra values to add to each result
 988         '''
 989
 990         if not ie_key and force_generic_extractor:
 991             ie_key = 'Generic'
 992
 993         if ie_key:
 994             ies = [self.get_info_extractor(ie_key)]
 995         else:
 996             ies = self._ies
 997
 998         for ie in ies:
 999             if not ie.suitable(url):
1000                 continue
1001
1002             ie_key = ie.ie_key()
1003             ie = self.get_info_extractor(ie_key)
1004             if not ie.working():
1005                 self.report_warning('The program functionality for this site has been marked as broken, '
1006                                     'and will probably not work.')
1007
1008             try:
1009                 temp_id = str_or_none(
1010                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1011                     else ie._match_id(url))
1012             except (AssertionError, IndexError, AttributeError):
1013                 temp_id = None
1014             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1015                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1016                                ie_key, temp_id))
1017                 break
1018             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
1019         else:
1020             self.report_error('no suitable InfoExtractor for URL %s' % url)
1021
1022     def __handle_extraction_exceptions(func):
1023         def wrapper(self, *args, **kwargs):
1024             try:
1025                 return func(self, *args, **kwargs)
1026             except GeoRestrictedError as e:
1027                 msg = e.msg
1028                 if e.countries:
1029                     msg += '\nThis video is available in %s.' % ', '.join(
1030                         map(ISO3166Utils.short2full, e.countries))
1031                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1032                 self.report_error(msg)
1033             except ExtractorError as e:  # An error we somewhat expected
1034                 self.report_error(compat_str(e), e.format_traceback())
1035             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1036                 raise
1037             except Exception as e:
1038                 if self.params.get('ignoreerrors', False):
1039                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1040                 else:
1041                     raise
1042         return wrapper
1043
1044     @__handle_extraction_exceptions
1045     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1046         ie_result = ie.extract(url)
1047         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1048             return
1049         if isinstance(ie_result, list):
1050             # Backwards compatibility: old IE result format
1051             ie_result = {
1052                 '_type': 'compat_list',
1053                 'entries': ie_result,
1054             }
1055         if info_dict:
1056             if info_dict.get('id'):
1057                 ie_result['id'] = info_dict['id']
1058             if info_dict.get('title'):
1059                 ie_result['title'] = info_dict['title']
1060         self.add_default_extra_info(ie_result, ie, url)
1061         if process:
1062             return self.process_ie_result(ie_result, download, extra_info)
1063         else:
1064             return ie_result
1065
1066     def add_default_extra_info(self, ie_result, ie, url):
1067         self.add_extra_info(ie_result, {
1068             'extractor': ie.IE_NAME,
1069             'webpage_url': url,
1070             'webpage_url_basename': url_basename(url),
1071             'extractor_key': ie.ie_key(),
1072         })
1073
1074     def process_ie_result(self, ie_result, download=True, extra_info={}):
1075         """
1076         Take the result of the ie(may be modified) and resolve all unresolved
1077         references (URLs, playlist items).
1078
1079         It will also download the videos if 'download'.
1080         Returns the resolved ie_result.
1081         """
1082         result_type = ie_result.get('_type', 'video')
1083
1084         if result_type in ('url', 'url_transparent'):
1085             ie_result['url'] = sanitize_url(ie_result['url'])
1086             extract_flat = self.params.get('extract_flat', False)
1087             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1088                     or extract_flat is True):
1089                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1090                 return ie_result
1091
1092         if result_type == 'video':
1093             self.add_extra_info(ie_result, extra_info)
1094             return self.process_video_result(ie_result, download=download)
1095         elif result_type == 'url':
1096             # We have to add extra_info to the results because it may be
1097             # contained in a playlist
1098             return self.extract_info(ie_result['url'],
1099                                      download, info_dict=ie_result,
1100                                      ie_key=ie_result.get('ie_key'),
1101                                      extra_info=extra_info)
1102         elif result_type == 'url_transparent':
1103             # Use the information from the embedding page
1104             info = self.extract_info(
1105                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1106                 extra_info=extra_info, download=False, process=False)
1107
1108             # extract_info may return None when ignoreerrors is enabled and
1109             # extraction failed with an error, don't crash and return early
1110             # in this case
1111             if not info:
1112                 return info
1113
1114             force_properties = dict(
1115                 (k, v) for k, v in ie_result.items() if v is not None)
1116             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1117                 if f in force_properties:
1118                     del force_properties[f]
1119             new_result = info.copy()
1120             new_result.update(force_properties)
1121
1122             # Extracted info may not be a video result (i.e.
1123             # info.get('_type', 'video') != video) but rather an url or
1124             # url_transparent. In such cases outer metadata (from ie_result)
1125             # should be propagated to inner one (info). For this to happen
1126             # _type of info should be overridden with url_transparent. This
1127             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1128             if new_result.get('_type') == 'url':
1129                 new_result['_type'] = 'url_transparent'
1130
1131             return self.process_ie_result(
1132                 new_result, download=download, extra_info=extra_info)
1133         elif result_type in ('playlist', 'multi_video'):
1134             # Protect from infinite recursion due to recursively nested playlists
1135             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1136             webpage_url = ie_result['webpage_url']
1137             if webpage_url in self._playlist_urls:
1138                 self.to_screen(
1139                     '[download] Skipping already downloaded playlist: %s'
1140                     % ie_result.get('title') or ie_result.get('id'))
1141                 return
1142
1143             self._playlist_level += 1
1144             self._playlist_urls.add(webpage_url)
1145             try:
1146                 return self.__process_playlist(ie_result, download)
1147             finally:
1148                 self._playlist_level -= 1
1149                 if not self._playlist_level:
1150                     self._playlist_urls.clear()
1151         elif result_type == 'compat_list':
1152             self.report_warning(
1153                 'Extractor %s returned a compat_list result. '
1154                 'It needs to be updated.' % ie_result.get('extractor'))
1155
1156             def _fixup(r):
1157                 self.add_extra_info(
1158                     r,
1159                     {
1160                         'extractor': ie_result['extractor'],
1161                         'webpage_url': ie_result['webpage_url'],
1162                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1163                         'extractor_key': ie_result['extractor_key'],
1164                     }
1165                 )
1166                 return r
1167             ie_result['entries'] = [
1168                 self.process_ie_result(_fixup(r), download, extra_info)
1169                 for r in ie_result['entries']
1170             ]
1171             return ie_result
1172         else:
1173             raise Exception('Invalid result type: %s' % result_type)
1174
1175     def _ensure_dir_exists(self, path):
1176         return make_dir(path, self.report_error)
1177
1178     def __process_playlist(self, ie_result, download):
1179         # We process each entry in the playlist
1180         playlist = ie_result.get('title') or ie_result.get('id')
1181         self.to_screen('[download] Downloading playlist: %s' % playlist)
1182
1183         if self.params.get('allow_playlist_files', True):
1184             ie_copy = {
1185                 'playlist': playlist,
1186                 'playlist_id': ie_result.get('id'),
1187                 'playlist_title': ie_result.get('title'),
1188                 'playlist_uploader': ie_result.get('uploader'),
1189                 'playlist_uploader_id': ie_result.get('uploader_id'),
1190                 'playlist_index': 0
1191             }
1192             ie_copy.update(dict(ie_result))
1193
1194             if self.params.get('writeinfojson', False):
1195                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1196                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1197                     return
1198                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1199                     self.to_screen('[info] Playlist metadata is already present')
1200                 else:
1201                     playlist_info = dict(ie_result)
1202                     # playlist_info['entries'] = list(playlist_info['entries'])  # Entries is a generator which shouldnot be resolved here
1203                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1204                     try:
1205                         write_json_file(self.filter_requested_info(playlist_info, self.params.get('clean_infojson', True)), infofn)
1206                     except (OSError, IOError):
1207                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1208
1209             if self.params.get('writedescription', False):
1210                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1211                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1212                     return
1213                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1214                     self.to_screen('[info] Playlist description is already present')
1215                 elif ie_result.get('description') is None:
1216                     self.report_warning('There\'s no playlist description to write.')
1217                 else:
1218                     try:
1219                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1220                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1221                             descfile.write(ie_result['description'])
1222                     except (OSError, IOError):
1223                         self.report_error('Cannot write playlist description file ' + descfn)
1224                         return
1225
1226         playlist_results = []
1227
1228         playliststart = self.params.get('playliststart', 1) - 1
1229         playlistend = self.params.get('playlistend')
1230         # For backwards compatibility, interpret -1 as whole list
1231         if playlistend == -1:
1232             playlistend = None
1233
1234         playlistitems_str = self.params.get('playlist_items')
1235         playlistitems = None
1236         if playlistitems_str is not None:
1237             def iter_playlistitems(format):
1238                 for string_segment in format.split(','):
1239                     if '-' in string_segment:
1240                         start, end = string_segment.split('-')
1241                         for item in range(int(start), int(end) + 1):
1242                             yield int(item)
1243                     else:
1244                         yield int(string_segment)
1245             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1246
1247         ie_entries = ie_result['entries']
1248
1249         def make_playlistitems_entries(list_ie_entries):
1250             num_entries = len(list_ie_entries)
1251             return [
1252                 list_ie_entries[i - 1] for i in playlistitems
1253                 if -num_entries <= i - 1 < num_entries]
1254
1255         def report_download(num_entries):
1256             self.to_screen(
1257                 '[%s] playlist %s: Downloading %d videos' %
1258                 (ie_result['extractor'], playlist, num_entries))
1259
1260         if isinstance(ie_entries, list):
1261             n_all_entries = len(ie_entries)
1262             if playlistitems:
1263                 entries = make_playlistitems_entries(ie_entries)
1264             else:
1265                 entries = ie_entries[playliststart:playlistend]
1266             n_entries = len(entries)
1267             self.to_screen(
1268                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1269                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1270         elif isinstance(ie_entries, PagedList):
1271             if playlistitems:
1272                 entries = []
1273                 for item in playlistitems:
1274                     entries.extend(ie_entries.getslice(
1275                         item - 1, item
1276                     ))
1277             else:
1278                 entries = ie_entries.getslice(
1279                     playliststart, playlistend)
1280             n_entries = len(entries)
1281             report_download(n_entries)
1282         else:  # iterable
1283             if playlistitems:
1284                 entries = make_playlistitems_entries(list(itertools.islice(
1285                     ie_entries, 0, max(playlistitems))))
1286             else:
1287                 entries = list(itertools.islice(
1288                     ie_entries, playliststart, playlistend))
1289             n_entries = len(entries)
1290             report_download(n_entries)
1291
1292         if self.params.get('playlistreverse', False):
1293             entries = entries[::-1]
1294
1295         if self.params.get('playlistrandom', False):
1296             random.shuffle(entries)
1297
1298         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1299
1300         for i, entry in enumerate(entries, 1):
1301             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1302             # This __x_forwarded_for_ip thing is a bit ugly but requires
1303             # minimal changes
1304             if x_forwarded_for:
1305                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1306             extra = {
1307                 'n_entries': n_entries,
1308                 'playlist': playlist,
1309                 'playlist_id': ie_result.get('id'),
1310                 'playlist_title': ie_result.get('title'),
1311                 'playlist_uploader': ie_result.get('uploader'),
1312                 'playlist_uploader_id': ie_result.get('uploader_id'),
1313                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1314                 'extractor': ie_result['extractor'],
1315                 'webpage_url': ie_result['webpage_url'],
1316                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1317                 'extractor_key': ie_result['extractor_key'],
1318             }
1319
1320             if self._match_entry(entry, incomplete=True) is not None:
1321                 continue
1322
1323             entry_result = self.__process_iterable_entry(entry, download, extra)
1324             # TODO: skip failed (empty) entries?
1325             playlist_results.append(entry_result)
1326         ie_result['entries'] = playlist_results
1327         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1328         return ie_result
1329
1330     @__handle_extraction_exceptions
1331     def __process_iterable_entry(self, entry, download, extra_info):
1332         return self.process_ie_result(
1333             entry, download=download, extra_info=extra_info)
1334
1335     def _build_format_filter(self, filter_spec):
1336         " Returns a function to filter the formats according to the filter_spec "
1337
1338         OPERATORS = {
1339             '<': operator.lt,
1340             '<=': operator.le,
1341             '>': operator.gt,
1342             '>=': operator.ge,
1343             '=': operator.eq,
1344             '!=': operator.ne,
1345         }
1346         operator_rex = re.compile(r'''(?x)\s*
1347             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1348             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1349             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1350             $
1351             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1352         m = operator_rex.search(filter_spec)
1353         if m:
1354             try:
1355                 comparison_value = int(m.group('value'))
1356             except ValueError:
1357                 comparison_value = parse_filesize(m.group('value'))
1358                 if comparison_value is None:
1359                     comparison_value = parse_filesize(m.group('value') + 'B')
1360                 if comparison_value is None:
1361                     raise ValueError(
1362                         'Invalid value %r in format specification %r' % (
1363                             m.group('value'), filter_spec))
1364             op = OPERATORS[m.group('op')]
1365
1366         if not m:
1367             STR_OPERATORS = {
1368                 '=': operator.eq,
1369                 '^=': lambda attr, value: attr.startswith(value),
1370                 '$=': lambda attr, value: attr.endswith(value),
1371                 '*=': lambda attr, value: value in attr,
1372             }
1373             str_operator_rex = re.compile(r'''(?x)
1374                 \s*(?P<key>[a-zA-Z0-9._-]+)
1375                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1376                 \s*(?P<value>[a-zA-Z0-9._-]+)
1377                 \s*$
1378                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1379             m = str_operator_rex.search(filter_spec)
1380             if m:
1381                 comparison_value = m.group('value')
1382                 str_op = STR_OPERATORS[m.group('op')]
1383                 if m.group('negation'):
1384                     op = lambda attr, value: not str_op(attr, value)
1385                 else:
1386                     op = str_op
1387
1388         if not m:
1389             raise ValueError('Invalid filter specification %r' % filter_spec)
1390
1391         def _filter(f):
1392             actual_value = f.get(m.group('key'))
1393             if actual_value is None:
1394                 return m.group('none_inclusive')
1395             return op(actual_value, comparison_value)
1396         return _filter
1397
1398     def _default_format_spec(self, info_dict, download=True):
1399
1400         def can_merge():
1401             merger = FFmpegMergerPP(self)
1402             return merger.available and merger.can_merge()
1403
1404         prefer_best = (
1405             not self.params.get('simulate', False)
1406             and download
1407             and (
1408                 not can_merge()
1409                 or info_dict.get('is_live', False)
1410                 or self.outtmpl_dict['default'] == '-'))
1411
1412         return (
1413             'best/bestvideo+bestaudio'
1414             if prefer_best
1415             else 'bestvideo*+bestaudio/best'
1416             if not self.params.get('allow_multiple_audio_streams', False)
1417             else 'bestvideo+bestaudio/best')
1418
1419     def build_format_selector(self, format_spec):
1420         def syntax_error(note, start):
1421             message = (
1422                 'Invalid format specification: '
1423                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1424             return SyntaxError(message)
1425
1426         PICKFIRST = 'PICKFIRST'
1427         MERGE = 'MERGE'
1428         SINGLE = 'SINGLE'
1429         GROUP = 'GROUP'
1430         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1431
1432         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1433                                   'video': self.params.get('allow_multiple_video_streams', False)}
1434
1435         def _parse_filter(tokens):
1436             filter_parts = []
1437             for type, string, start, _, _ in tokens:
1438                 if type == tokenize.OP and string == ']':
1439                     return ''.join(filter_parts)
1440                 else:
1441                     filter_parts.append(string)
1442
1443         def _remove_unused_ops(tokens):
1444             # Remove operators that we don't use and join them with the surrounding strings
1445             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1446             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1447             last_string, last_start, last_end, last_line = None, None, None, None
1448             for type, string, start, end, line in tokens:
1449                 if type == tokenize.OP and string == '[':
1450                     if last_string:
1451                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1452                         last_string = None
1453                     yield type, string, start, end, line
1454                     # everything inside brackets will be handled by _parse_filter
1455                     for type, string, start, end, line in tokens:
1456                         yield type, string, start, end, line
1457                         if type == tokenize.OP and string == ']':
1458                             break
1459                 elif type == tokenize.OP and string in ALLOWED_OPS:
1460                     if last_string:
1461                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1462                         last_string = None
1463                     yield type, string, start, end, line
1464                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1465                     if not last_string:
1466                         last_string = string
1467                         last_start = start
1468                         last_end = end
1469                     else:
1470                         last_string += string
1471             if last_string:
1472                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1473
1474         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1475             selectors = []
1476             current_selector = None
1477             for type, string, start, _, _ in tokens:
1478                 # ENCODING is only defined in python 3.x
1479                 if type == getattr(tokenize, 'ENCODING', None):
1480                     continue
1481                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1482                     current_selector = FormatSelector(SINGLE, string, [])
1483                 elif type == tokenize.OP:
1484                     if string == ')':
1485                         if not inside_group:
1486                             # ')' will be handled by the parentheses group
1487                             tokens.restore_last_token()
1488                         break
1489                     elif inside_merge and string in ['/', ',']:
1490                         tokens.restore_last_token()
1491                         break
1492                     elif inside_choice and string == ',':
1493                         tokens.restore_last_token()
1494                         break
1495                     elif string == ',':
1496                         if not current_selector:
1497                             raise syntax_error('"," must follow a format selector', start)
1498                         selectors.append(current_selector)
1499                         current_selector = None
1500                     elif string == '/':
1501                         if not current_selector:
1502                             raise syntax_error('"/" must follow a format selector', start)
1503                         first_choice = current_selector
1504                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1505                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1506                     elif string == '[':
1507                         if not current_selector:
1508                             current_selector = FormatSelector(SINGLE, 'best', [])
1509                         format_filter = _parse_filter(tokens)
1510                         current_selector.filters.append(format_filter)
1511                     elif string == '(':
1512                         if current_selector:
1513                             raise syntax_error('Unexpected "("', start)
1514                         group = _parse_format_selection(tokens, inside_group=True)
1515                         current_selector = FormatSelector(GROUP, group, [])
1516                     elif string == '+':
1517                         if not current_selector:
1518                             raise syntax_error('Unexpected "+"', start)
1519                         selector_1 = current_selector
1520                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1521                         if not selector_2:
1522                             raise syntax_error('Expected a selector', start)
1523                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1524                     else:
1525                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1526                 elif type == tokenize.ENDMARKER:
1527                     break
1528             if current_selector:
1529                 selectors.append(current_selector)
1530             return selectors
1531
1532         def _build_selector_function(selector):
1533             if isinstance(selector, list):  # ,
1534                 fs = [_build_selector_function(s) for s in selector]
1535
1536                 def selector_function(ctx):
1537                     for f in fs:
1538                         for format in f(ctx):
1539                             yield format
1540                 return selector_function
1541
1542             elif selector.type == GROUP:  # ()
1543                 selector_function = _build_selector_function(selector.selector)
1544
1545             elif selector.type == PICKFIRST:  # /
1546                 fs = [_build_selector_function(s) for s in selector.selector]
1547
1548                 def selector_function(ctx):
1549                     for f in fs:
1550                         picked_formats = list(f(ctx))
1551                         if picked_formats:
1552                             return picked_formats
1553                     return []
1554
1555             elif selector.type == SINGLE:  # atom
1556                 format_spec = selector.selector if selector.selector is not None else 'best'
1557
1558                 if format_spec == 'all':
1559                     def selector_function(ctx):
1560                         formats = list(ctx['formats'])
1561                         if formats:
1562                             for f in formats:
1563                                 yield f
1564
1565                 else:
1566                     format_fallback = False
1567                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1568                     if format_spec_obj is not None:
1569                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1570                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1571                         not_format_type = 'v' if format_type == 'a' else 'a'
1572                         format_modified = format_spec_obj.group(3) is not None
1573
1574                         format_fallback = not format_type and not format_modified  # for b, w
1575                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1576                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1577                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1578                                     if format_type  # bv, ba, wv, wa
1579                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1580                                     if not format_modified  # b, w
1581                                     else None)  # b*, w*
1582                     else:
1583                         format_idx = -1
1584                         filter_f = ((lambda f: f.get('ext') == format_spec)
1585                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1586                                     else (lambda f: f.get('format_id') == format_spec))  # id
1587
1588                     def selector_function(ctx):
1589                         formats = list(ctx['formats'])
1590                         if not formats:
1591                             return
1592                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1593                         if matches:
1594                             yield matches[format_idx]
1595                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1596                             # for extractors with incomplete formats (audio only (soundcloud)
1597                             # or video only (imgur)) best/worst will fallback to
1598                             # best/worst {video,audio}-only format
1599                             yield formats[format_idx]
1600
1601             elif selector.type == MERGE:        # +
1602                 def _merge(formats_pair):
1603                     format_1, format_2 = formats_pair
1604
1605                     formats_info = []
1606                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1607                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1608
1609                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1610                         get_no_more = {"video": False, "audio": False}
1611                         for (i, fmt_info) in enumerate(formats_info):
1612                             for aud_vid in ["audio", "video"]:
1613                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1614                                     if get_no_more[aud_vid]:
1615                                         formats_info.pop(i)
1616                                     get_no_more[aud_vid] = True
1617
1618                     if len(formats_info) == 1:
1619                         return formats_info[0]
1620
1621                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1622                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1623
1624                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1625                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1626
1627                     output_ext = self.params.get('merge_output_format')
1628                     if not output_ext:
1629                         if the_only_video:
1630                             output_ext = the_only_video['ext']
1631                         elif the_only_audio and not video_fmts:
1632                             output_ext = the_only_audio['ext']
1633                         else:
1634                             output_ext = 'mkv'
1635
1636                     new_dict = {
1637                         'requested_formats': formats_info,
1638                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1639                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1640                         'ext': output_ext,
1641                     }
1642
1643                     if the_only_video:
1644                         new_dict.update({
1645                             'width': the_only_video.get('width'),
1646                             'height': the_only_video.get('height'),
1647                             'resolution': the_only_video.get('resolution'),
1648                             'fps': the_only_video.get('fps'),
1649                             'vcodec': the_only_video.get('vcodec'),
1650                             'vbr': the_only_video.get('vbr'),
1651                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1652                         })
1653
1654                     if the_only_audio:
1655                         new_dict.update({
1656                             'acodec': the_only_audio.get('acodec'),
1657                             'abr': the_only_audio.get('abr'),
1658                         })
1659
1660                     return new_dict
1661
1662                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1663
1664                 def selector_function(ctx):
1665                     for pair in itertools.product(
1666                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1667                         yield _merge(pair)
1668
1669             filters = [self._build_format_filter(f) for f in selector.filters]
1670
1671             def final_selector(ctx):
1672                 ctx_copy = copy.deepcopy(ctx)
1673                 for _filter in filters:
1674                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1675                 return selector_function(ctx_copy)
1676             return final_selector
1677
1678         stream = io.BytesIO(format_spec.encode('utf-8'))
1679         try:
1680             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1681         except tokenize.TokenError:
1682             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1683
1684         class TokenIterator(object):
1685             def __init__(self, tokens):
1686                 self.tokens = tokens
1687                 self.counter = 0
1688
1689             def __iter__(self):
1690                 return self
1691
1692             def __next__(self):
1693                 if self.counter >= len(self.tokens):
1694                     raise StopIteration()
1695                 value = self.tokens[self.counter]
1696                 self.counter += 1
1697                 return value
1698
1699             next = __next__
1700
1701             def restore_last_token(self):
1702                 self.counter -= 1
1703
1704         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1705         return _build_selector_function(parsed_selector)
1706
1707     def _calc_headers(self, info_dict):
1708         res = std_headers.copy()
1709
1710         add_headers = info_dict.get('http_headers')
1711         if add_headers:
1712             res.update(add_headers)
1713
1714         cookies = self._calc_cookies(info_dict)
1715         if cookies:
1716             res['Cookie'] = cookies
1717
1718         if 'X-Forwarded-For' not in res:
1719             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1720             if x_forwarded_for_ip:
1721                 res['X-Forwarded-For'] = x_forwarded_for_ip
1722
1723         return res
1724
1725     def _calc_cookies(self, info_dict):
1726         pr = sanitized_Request(info_dict['url'])
1727         self.cookiejar.add_cookie_header(pr)
1728         return pr.get_header('Cookie')
1729
1730     def process_video_result(self, info_dict, download=True):
1731         assert info_dict.get('_type', 'video') == 'video'
1732
1733         if 'id' not in info_dict:
1734             raise ExtractorError('Missing "id" field in extractor result')
1735         if 'title' not in info_dict:
1736             raise ExtractorError('Missing "title" field in extractor result')
1737
1738         def report_force_conversion(field, field_not, conversion):
1739             self.report_warning(
1740                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1741                 % (field, field_not, conversion))
1742
1743         def sanitize_string_field(info, string_field):
1744             field = info.get(string_field)
1745             if field is None or isinstance(field, compat_str):
1746                 return
1747             report_force_conversion(string_field, 'a string', 'string')
1748             info[string_field] = compat_str(field)
1749
1750         def sanitize_numeric_fields(info):
1751             for numeric_field in self._NUMERIC_FIELDS:
1752                 field = info.get(numeric_field)
1753                 if field is None or isinstance(field, compat_numeric_types):
1754                     continue
1755                 report_force_conversion(numeric_field, 'numeric', 'int')
1756                 info[numeric_field] = int_or_none(field)
1757
1758         sanitize_string_field(info_dict, 'id')
1759         sanitize_numeric_fields(info_dict)
1760
1761         if 'playlist' not in info_dict:
1762             # It isn't part of a playlist
1763             info_dict['playlist'] = None
1764             info_dict['playlist_index'] = None
1765
1766         thumbnails = info_dict.get('thumbnails')
1767         if thumbnails is None:
1768             thumbnail = info_dict.get('thumbnail')
1769             if thumbnail:
1770                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1771         if thumbnails:
1772             thumbnails.sort(key=lambda t: (
1773                 t.get('preference') if t.get('preference') is not None else -1,
1774                 t.get('width') if t.get('width') is not None else -1,
1775                 t.get('height') if t.get('height') is not None else -1,
1776                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1777             for i, t in enumerate(thumbnails):
1778                 t['url'] = sanitize_url(t['url'])
1779                 if t.get('width') and t.get('height'):
1780                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1781                 if t.get('id') is None:
1782                     t['id'] = '%d' % i
1783
1784         if self.params.get('list_thumbnails'):
1785             self.list_thumbnails(info_dict)
1786             return
1787
1788         thumbnail = info_dict.get('thumbnail')
1789         if thumbnail:
1790             info_dict['thumbnail'] = sanitize_url(thumbnail)
1791         elif thumbnails:
1792             info_dict['thumbnail'] = thumbnails[-1]['url']
1793
1794         if 'display_id' not in info_dict and 'id' in info_dict:
1795             info_dict['display_id'] = info_dict['id']
1796
1797         for ts_key, date_key in (
1798                 ('timestamp', 'upload_date'),
1799                 ('release_timestamp', 'release_date'),
1800         ):
1801             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1802                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1803                 # see http://bugs.python.org/issue1646728)
1804                 try:
1805                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1806                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
1807                 except (ValueError, OverflowError, OSError):
1808                     pass
1809
1810         # Auto generate title fields corresponding to the *_number fields when missing
1811         # in order to always have clean titles. This is very common for TV series.
1812         for field in ('chapter', 'season', 'episode'):
1813             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1814                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1815
1816         for cc_kind in ('subtitles', 'automatic_captions'):
1817             cc = info_dict.get(cc_kind)
1818             if cc:
1819                 for _, subtitle in cc.items():
1820                     for subtitle_format in subtitle:
1821                         if subtitle_format.get('url'):
1822                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1823                         if subtitle_format.get('ext') is None:
1824                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1825
1826         automatic_captions = info_dict.get('automatic_captions')
1827         subtitles = info_dict.get('subtitles')
1828
1829         if self.params.get('listsubtitles', False):
1830             if 'automatic_captions' in info_dict:
1831                 self.list_subtitles(
1832                     info_dict['id'], automatic_captions, 'automatic captions')
1833             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1834             return
1835
1836         info_dict['requested_subtitles'] = self.process_subtitles(
1837             info_dict['id'], subtitles, automatic_captions)
1838
1839         # We now pick which formats have to be downloaded
1840         if info_dict.get('formats') is None:
1841             # There's only one format available
1842             formats = [info_dict]
1843         else:
1844             formats = info_dict['formats']
1845
1846         if not formats:
1847             raise ExtractorError('No video formats found!')
1848
1849         def is_wellformed(f):
1850             url = f.get('url')
1851             if not url:
1852                 self.report_warning(
1853                     '"url" field is missing or empty - skipping format, '
1854                     'there is an error in extractor')
1855                 return False
1856             if isinstance(url, bytes):
1857                 sanitize_string_field(f, 'url')
1858             return True
1859
1860         # Filter out malformed formats for better extraction robustness
1861         formats = list(filter(is_wellformed, formats))
1862
1863         formats_dict = {}
1864
1865         # We check that all the formats have the format and format_id fields
1866         for i, format in enumerate(formats):
1867             sanitize_string_field(format, 'format_id')
1868             sanitize_numeric_fields(format)
1869             format['url'] = sanitize_url(format['url'])
1870             if not format.get('format_id'):
1871                 format['format_id'] = compat_str(i)
1872             else:
1873                 # Sanitize format_id from characters used in format selector expression
1874                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1875             format_id = format['format_id']
1876             if format_id not in formats_dict:
1877                 formats_dict[format_id] = []
1878             formats_dict[format_id].append(format)
1879
1880         # Make sure all formats have unique format_id
1881         for format_id, ambiguous_formats in formats_dict.items():
1882             if len(ambiguous_formats) > 1:
1883                 for i, format in enumerate(ambiguous_formats):
1884                     format['format_id'] = '%s-%d' % (format_id, i)
1885
1886         for i, format in enumerate(formats):
1887             if format.get('format') is None:
1888                 format['format'] = '{id} - {res}{note}'.format(
1889                     id=format['format_id'],
1890                     res=self.format_resolution(format),
1891                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1892                 )
1893             # Automatically determine file extension if missing
1894             if format.get('ext') is None:
1895                 format['ext'] = determine_ext(format['url']).lower()
1896             # Automatically determine protocol if missing (useful for format
1897             # selection purposes)
1898             if format.get('protocol') is None:
1899                 format['protocol'] = determine_protocol(format)
1900             # Add HTTP headers, so that external programs can use them from the
1901             # json output
1902             full_format_info = info_dict.copy()
1903             full_format_info.update(format)
1904             format['http_headers'] = self._calc_headers(full_format_info)
1905         # Remove private housekeeping stuff
1906         if '__x_forwarded_for_ip' in info_dict:
1907             del info_dict['__x_forwarded_for_ip']
1908
1909         # TODO Central sorting goes here
1910
1911         if formats[0] is not info_dict:
1912             # only set the 'formats' fields if the original info_dict list them
1913             # otherwise we end up with a circular reference, the first (and unique)
1914             # element in the 'formats' field in info_dict is info_dict itself,
1915             # which can't be exported to json
1916             info_dict['formats'] = formats
1917         if self.params.get('listformats'):
1918             self.list_formats(info_dict)
1919             return
1920
1921         req_format = self.params.get('format')
1922         if req_format is None:
1923             req_format = self._default_format_spec(info_dict, download=download)
1924             if self.params.get('verbose'):
1925                 self.to_screen('[debug] Default format spec: %s' % req_format)
1926
1927         format_selector = self.build_format_selector(req_format)
1928
1929         # While in format selection we may need to have an access to the original
1930         # format set in order to calculate some metrics or do some processing.
1931         # For now we need to be able to guess whether original formats provided
1932         # by extractor are incomplete or not (i.e. whether extractor provides only
1933         # video-only or audio-only formats) for proper formats selection for
1934         # extractors with such incomplete formats (see
1935         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1936         # Since formats may be filtered during format selection and may not match
1937         # the original formats the results may be incorrect. Thus original formats
1938         # or pre-calculated metrics should be passed to format selection routines
1939         # as well.
1940         # We will pass a context object containing all necessary additional data
1941         # instead of just formats.
1942         # This fixes incorrect format selection issue (see
1943         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1944         incomplete_formats = (
1945             # All formats are video-only or
1946             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1947             # all formats are audio-only
1948             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1949
1950         ctx = {
1951             'formats': formats,
1952             'incomplete_formats': incomplete_formats,
1953         }
1954
1955         formats_to_download = list(format_selector(ctx))
1956         if not formats_to_download:
1957             raise ExtractorError('requested format not available',
1958                                  expected=True)
1959
1960         if download:
1961             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1962             if len(formats_to_download) > 1:
1963                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1964             for format in formats_to_download:
1965                 new_info = dict(info_dict)
1966                 new_info.update(format)
1967                 self.process_info(new_info)
1968         # We update the info dict with the best quality format (backwards compatibility)
1969         info_dict.update(formats_to_download[-1])
1970         return info_dict
1971
1972     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1973         """Select the requested subtitles and their format"""
1974         available_subs = {}
1975         if normal_subtitles and self.params.get('writesubtitles'):
1976             available_subs.update(normal_subtitles)
1977         if automatic_captions and self.params.get('writeautomaticsub'):
1978             for lang, cap_info in automatic_captions.items():
1979                 if lang not in available_subs:
1980                     available_subs[lang] = cap_info
1981
1982         if (not self.params.get('writesubtitles') and not
1983                 self.params.get('writeautomaticsub') or not
1984                 available_subs):
1985             return None
1986
1987         if self.params.get('allsubtitles', False):
1988             requested_langs = available_subs.keys()
1989         else:
1990             if self.params.get('subtitleslangs', False):
1991                 requested_langs = self.params.get('subtitleslangs')
1992             elif 'en' in available_subs:
1993                 requested_langs = ['en']
1994             else:
1995                 requested_langs = [list(available_subs.keys())[0]]
1996
1997         formats_query = self.params.get('subtitlesformat', 'best')
1998         formats_preference = formats_query.split('/') if formats_query else []
1999         subs = {}
2000         for lang in requested_langs:
2001             formats = available_subs.get(lang)
2002             if formats is None:
2003                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2004                 continue
2005             for ext in formats_preference:
2006                 if ext == 'best':
2007                     f = formats[-1]
2008                     break
2009                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2010                 if matches:
2011                     f = matches[-1]
2012                     break
2013             else:
2014                 f = formats[-1]
2015                 self.report_warning(
2016                     'No subtitle format found matching "%s" for language %s, '
2017                     'using %s' % (formats_query, lang, f['ext']))
2018             subs[lang] = f
2019         return subs
2020
2021     def __forced_printings(self, info_dict, filename, incomplete):
2022         def print_mandatory(field):
2023             if (self.params.get('force%s' % field, False)
2024                     and (not incomplete or info_dict.get(field) is not None)):
2025                 self.to_stdout(info_dict[field])
2026
2027         def print_optional(field):
2028             if (self.params.get('force%s' % field, False)
2029                     and info_dict.get(field) is not None):
2030                 self.to_stdout(info_dict[field])
2031
2032         print_mandatory('title')
2033         print_mandatory('id')
2034         if self.params.get('forceurl', False) and not incomplete:
2035             if info_dict.get('requested_formats') is not None:
2036                 for f in info_dict['requested_formats']:
2037                     self.to_stdout(f['url'] + f.get('play_path', ''))
2038             else:
2039                 # For RTMP URLs, also include the playpath
2040                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2041         print_optional('thumbnail')
2042         print_optional('description')
2043         if self.params.get('forcefilename', False) and filename is not None:
2044             self.to_stdout(filename)
2045         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2046             self.to_stdout(formatSeconds(info_dict['duration']))
2047         print_mandatory('format')
2048         if self.params.get('forcejson', False):
2049             self.post_extract(info_dict)
2050             self.to_stdout(json.dumps(info_dict, default=repr))
2051
2052     def process_info(self, info_dict):
2053         """Process a single resolved IE result."""
2054
2055         assert info_dict.get('_type', 'video') == 'video'
2056
2057         info_dict.setdefault('__postprocessors', [])
2058
2059         max_downloads = self.params.get('max_downloads')
2060         if max_downloads is not None:
2061             if self._num_downloads >= int(max_downloads):
2062                 raise MaxDownloadsReached()
2063
2064         # TODO: backward compatibility, to be removed
2065         info_dict['fulltitle'] = info_dict['title']
2066
2067         if 'format' not in info_dict:
2068             info_dict['format'] = info_dict['ext']
2069
2070         if self._match_entry(info_dict, incomplete=False) is not None:
2071             return
2072
2073         self.post_extract(info_dict)
2074         self._num_downloads += 1
2075
2076         info_dict = self.pre_process(info_dict)
2077
2078         # info_dict['_filename'] needs to be set for backward compatibility
2079         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2080         temp_filename = self.prepare_filename(info_dict, 'temp')
2081         files_to_move = {}
2082         skip_dl = self.params.get('skip_download', False)
2083
2084         # Forced printings
2085         self.__forced_printings(info_dict, full_filename, incomplete=False)
2086
2087         if self.params.get('simulate', False):
2088             if self.params.get('force_write_download_archive', False):
2089                 self.record_download_archive(info_dict)
2090
2091             # Do nothing else if in simulate mode
2092             return
2093
2094         if full_filename is None:
2095             return
2096
2097         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2098             return
2099         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2100             return
2101
2102         if self.params.get('writedescription', False):
2103             descfn = self.prepare_filename(info_dict, 'description')
2104             if not self._ensure_dir_exists(encodeFilename(descfn)):
2105                 return
2106             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2107                 self.to_screen('[info] Video description is already present')
2108             elif info_dict.get('description') is None:
2109                 self.report_warning('There\'s no description to write.')
2110             else:
2111                 try:
2112                     self.to_screen('[info] Writing video description to: ' + descfn)
2113                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2114                         descfile.write(info_dict['description'])
2115                 except (OSError, IOError):
2116                     self.report_error('Cannot write description file ' + descfn)
2117                     return
2118
2119         if self.params.get('writeannotations', False):
2120             annofn = self.prepare_filename(info_dict, 'annotation')
2121             if not self._ensure_dir_exists(encodeFilename(annofn)):
2122                 return
2123             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2124                 self.to_screen('[info] Video annotations are already present')
2125             elif not info_dict.get('annotations'):
2126                 self.report_warning('There are no annotations to write.')
2127             else:
2128                 try:
2129                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2130                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2131                         annofile.write(info_dict['annotations'])
2132                 except (KeyError, TypeError):
2133                     self.report_warning('There are no annotations to write.')
2134                 except (OSError, IOError):
2135                     self.report_error('Cannot write annotations file: ' + annofn)
2136                     return
2137
2138         def dl(name, info, subtitle=False):
2139             fd = get_suitable_downloader(info, self.params)(self, self.params)
2140             for ph in self._progress_hooks:
2141                 fd.add_progress_hook(ph)
2142             if self.params.get('verbose'):
2143                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
2144             return fd.download(name, info, subtitle)
2145
2146         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2147                                        self.params.get('writeautomaticsub')])
2148
2149         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2150             # subtitles download errors are already managed as troubles in relevant IE
2151             # that way it will silently go on when used with unsupporting IE
2152             subtitles = info_dict['requested_subtitles']
2153             # ie = self.get_info_extractor(info_dict['extractor_key'])
2154             for sub_lang, sub_info in subtitles.items():
2155                 sub_format = sub_info['ext']
2156                 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2157                 sub_filename = subtitles_filename(
2158                     temp_filename if not skip_dl else sub_fn,
2159                     sub_lang, sub_format, info_dict.get('ext'))
2160                 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
2161                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2162                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2163                     sub_info['filepath'] = sub_filename
2164                     files_to_move[sub_filename] = sub_filename_final
2165                 else:
2166                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2167                     if sub_info.get('data') is not None:
2168                         try:
2169                             # Use newline='' to prevent conversion of newline characters
2170                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2171                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2172                                 subfile.write(sub_info['data'])
2173                             sub_info['filepath'] = sub_filename
2174                             files_to_move[sub_filename] = sub_filename_final
2175                         except (OSError, IOError):
2176                             self.report_error('Cannot write subtitles file ' + sub_filename)
2177                             return
2178                     else:
2179                         try:
2180                             dl(sub_filename, sub_info.copy(), subtitle=True)
2181                             sub_info['filepath'] = sub_filename
2182                             files_to_move[sub_filename] = sub_filename_final
2183                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2184                             self.report_warning('Unable to download subtitle for "%s": %s' %
2185                                                 (sub_lang, error_to_compat_str(err)))
2186                             continue
2187
2188         if skip_dl:
2189             if self.params.get('convertsubtitles', False):
2190                 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2191                 filename_real_ext = os.path.splitext(full_filename)[1][1:]
2192                 filename_wo_ext = (
2193                     os.path.splitext(full_filename)[0]
2194                     if filename_real_ext == info_dict['ext']
2195                     else full_filename)
2196                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2197                 # if subconv.available:
2198                 #     info_dict['__postprocessors'].append(subconv)
2199                 if os.path.exists(encodeFilename(afilename)):
2200                     self.to_screen(
2201                         '[download] %s has already been downloaded and '
2202                         'converted' % afilename)
2203                 else:
2204                     try:
2205                         self.post_process(full_filename, info_dict, files_to_move)
2206                     except PostProcessingError as err:
2207                         self.report_error('Postprocessing: %s' % str(err))
2208                         return
2209
2210         if self.params.get('writeinfojson', False):
2211             infofn = self.prepare_filename(info_dict, 'infojson')
2212             if not self._ensure_dir_exists(encodeFilename(infofn)):
2213                 return
2214             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2215                 self.to_screen('[info] Video metadata is already present')
2216             else:
2217                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2218                 try:
2219                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2220                 except (OSError, IOError):
2221                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2222                     return
2223             info_dict['__infojson_filename'] = infofn
2224
2225         thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2226         thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2227         for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2228             thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2229             thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2230             files_to_move[thumb_filename_temp] = thumb_filename
2231
2232         # Write internet shortcut files
2233         url_link = webloc_link = desktop_link = False
2234         if self.params.get('writelink', False):
2235             if sys.platform == "darwin":  # macOS.
2236                 webloc_link = True
2237             elif sys.platform.startswith("linux"):
2238                 desktop_link = True
2239             else:  # if sys.platform in ['win32', 'cygwin']:
2240                 url_link = True
2241         if self.params.get('writeurllink', False):
2242             url_link = True
2243         if self.params.get('writewebloclink', False):
2244             webloc_link = True
2245         if self.params.get('writedesktoplink', False):
2246             desktop_link = True
2247
2248         if url_link or webloc_link or desktop_link:
2249             if 'webpage_url' not in info_dict:
2250                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2251                 return
2252             ascii_url = iri_to_uri(info_dict['webpage_url'])
2253
2254         def _write_link_file(extension, template, newline, embed_filename):
2255             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2256             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2257                 self.to_screen('[info] Internet shortcut is already present')
2258             else:
2259                 try:
2260                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2261                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2262                         template_vars = {'url': ascii_url}
2263                         if embed_filename:
2264                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2265                         linkfile.write(template % template_vars)
2266                 except (OSError, IOError):
2267                     self.report_error('Cannot write internet shortcut ' + linkfn)
2268                     return False
2269             return True
2270
2271         if url_link:
2272             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2273                 return
2274         if webloc_link:
2275             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2276                 return
2277         if desktop_link:
2278             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2279                 return
2280
2281         # Download
2282         must_record_download_archive = False
2283         if not skip_dl:
2284             try:
2285
2286                 def existing_file(*filepaths):
2287                     ext = info_dict.get('ext')
2288                     final_ext = self.params.get('final_ext', ext)
2289                     existing_files = []
2290                     for file in orderedSet(filepaths):
2291                         if final_ext != ext:
2292                             converted = replace_extension(file, final_ext, ext)
2293                             if os.path.exists(encodeFilename(converted)):
2294                                 existing_files.append(converted)
2295                         if os.path.exists(encodeFilename(file)):
2296                             existing_files.append(file)
2297
2298                     if not existing_files or self.params.get('overwrites', False):
2299                         for file in orderedSet(existing_files):
2300                             self.report_file_delete(file)
2301                             os.remove(encodeFilename(file))
2302                         return None
2303
2304                     self.report_file_already_downloaded(existing_files[0])
2305                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2306                     return existing_files[0]
2307
2308                 success = True
2309                 if info_dict.get('requested_formats') is not None:
2310                     downloaded = []
2311                     merger = FFmpegMergerPP(self)
2312                     if self.params.get('allow_unplayable_formats'):
2313                         self.report_warning(
2314                             'You have requested merging of multiple formats '
2315                             'while also allowing unplayable formats to be downloaded. '
2316                             'The formats won\'t be merged to prevent data corruption.')
2317                     elif not merger.available:
2318                         self.report_warning(
2319                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2320                             'The formats won\'t be merged.')
2321
2322                     def compatible_formats(formats):
2323                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2324                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2325                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2326                         if len(video_formats) > 2 or len(audio_formats) > 2:
2327                             return False
2328
2329                         # Check extension
2330                         exts = set(format.get('ext') for format in formats)
2331                         COMPATIBLE_EXTS = (
2332                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2333                             set(('webm',)),
2334                         )
2335                         for ext_sets in COMPATIBLE_EXTS:
2336                             if ext_sets.issuperset(exts):
2337                                 return True
2338                         # TODO: Check acodec/vcodec
2339                         return False
2340
2341                     requested_formats = info_dict['requested_formats']
2342                     old_ext = info_dict['ext']
2343                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2344                         info_dict['ext'] = 'mkv'
2345                         self.report_warning(
2346                             'Requested formats are incompatible for merge and will be merged into mkv.')
2347
2348                     def correct_ext(filename):
2349                         filename_real_ext = os.path.splitext(filename)[1][1:]
2350                         filename_wo_ext = (
2351                             os.path.splitext(filename)[0]
2352                             if filename_real_ext == old_ext
2353                             else filename)
2354                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2355
2356                     # Ensure filename always has a correct extension for successful merge
2357                     full_filename = correct_ext(full_filename)
2358                     temp_filename = correct_ext(temp_filename)
2359                     dl_filename = existing_file(full_filename, temp_filename)
2360                     info_dict['__real_download'] = False
2361                     if dl_filename is None:
2362                         for f in requested_formats:
2363                             new_info = dict(info_dict)
2364                             new_info.update(f)
2365                             fname = prepend_extension(
2366                                 self.prepare_filename(new_info, 'temp'),
2367                                 'f%s' % f['format_id'], new_info['ext'])
2368                             if not self._ensure_dir_exists(fname):
2369                                 return
2370                             downloaded.append(fname)
2371                             partial_success, real_download = dl(fname, new_info)
2372                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2373                             success = success and partial_success
2374                         if merger.available and not self.params.get('allow_unplayable_formats'):
2375                             info_dict['__postprocessors'].append(merger)
2376                             info_dict['__files_to_merge'] = downloaded
2377                             # Even if there were no downloads, it is being merged only now
2378                             info_dict['__real_download'] = True
2379                         else:
2380                             for file in downloaded:
2381                                 files_to_move[file] = None
2382                 else:
2383                     # Just a single file
2384                     dl_filename = existing_file(full_filename, temp_filename)
2385                     if dl_filename is None:
2386                         success, real_download = dl(temp_filename, info_dict)
2387                         info_dict['__real_download'] = real_download
2388
2389                 dl_filename = dl_filename or temp_filename
2390                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2391
2392             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2393                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2394                 return
2395             except (OSError, IOError) as err:
2396                 raise UnavailableVideoError(err)
2397             except (ContentTooShortError, ) as err:
2398                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2399                 return
2400
2401             if success and full_filename != '-':
2402                 # Fixup content
2403                 fixup_policy = self.params.get('fixup')
2404                 if fixup_policy is None:
2405                     fixup_policy = 'detect_or_warn'
2406
2407                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2408
2409                 stretched_ratio = info_dict.get('stretched_ratio')
2410                 if stretched_ratio is not None and stretched_ratio != 1:
2411                     if fixup_policy == 'warn':
2412                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2413                             info_dict['id'], stretched_ratio))
2414                     elif fixup_policy == 'detect_or_warn':
2415                         stretched_pp = FFmpegFixupStretchedPP(self)
2416                         if stretched_pp.available:
2417                             info_dict['__postprocessors'].append(stretched_pp)
2418                         else:
2419                             self.report_warning(
2420                                 '%s: Non-uniform pixel ratio (%s). %s'
2421                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2422                     else:
2423                         assert fixup_policy in ('ignore', 'never')
2424
2425                 if (info_dict.get('requested_formats') is None
2426                         and info_dict.get('container') == 'm4a_dash'
2427                         and info_dict.get('ext') == 'm4a'):
2428                     if fixup_policy == 'warn':
2429                         self.report_warning(
2430                             '%s: writing DASH m4a. '
2431                             'Only some players support this container.'
2432                             % info_dict['id'])
2433                     elif fixup_policy == 'detect_or_warn':
2434                         fixup_pp = FFmpegFixupM4aPP(self)
2435                         if fixup_pp.available:
2436                             info_dict['__postprocessors'].append(fixup_pp)
2437                         else:
2438                             self.report_warning(
2439                                 '%s: writing DASH m4a. '
2440                                 'Only some players support this container. %s'
2441                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2442                     else:
2443                         assert fixup_policy in ('ignore', 'never')
2444
2445                 if ('protocol' in info_dict
2446                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2447                     if fixup_policy == 'warn':
2448                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2449                             info_dict['id']))
2450                     elif fixup_policy == 'detect_or_warn':
2451                         fixup_pp = FFmpegFixupM3u8PP(self)
2452                         if fixup_pp.available:
2453                             info_dict['__postprocessors'].append(fixup_pp)
2454                         else:
2455                             self.report_warning(
2456                                 '%s: malformed AAC bitstream detected. %s'
2457                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2458                     else:
2459                         assert fixup_policy in ('ignore', 'never')
2460
2461                 try:
2462                     self.post_process(dl_filename, info_dict, files_to_move)
2463                 except PostProcessingError as err:
2464                     self.report_error('Postprocessing: %s' % str(err))
2465                     return
2466                 try:
2467                     for ph in self._post_hooks:
2468                         ph(full_filename)
2469                 except Exception as err:
2470                     self.report_error('post hooks: %s' % str(err))
2471                     return
2472                 must_record_download_archive = True
2473
2474         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2475             self.record_download_archive(info_dict)
2476         max_downloads = self.params.get('max_downloads')
2477         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2478             raise MaxDownloadsReached()
2479
2480     def download(self, url_list):
2481         """Download a given list of URLs."""
2482         outtmpl = self.outtmpl_dict['default']
2483         if (len(url_list) > 1
2484                 and outtmpl != '-'
2485                 and '%' not in outtmpl
2486                 and self.params.get('max_downloads') != 1):
2487             raise SameFileError(outtmpl)
2488
2489         for url in url_list:
2490             try:
2491                 # It also downloads the videos
2492                 res = self.extract_info(
2493                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2494             except UnavailableVideoError:
2495                 self.report_error('unable to download video')
2496             except MaxDownloadsReached:
2497                 self.to_screen('[info] Maximum number of downloaded files reached')
2498                 raise
2499             except ExistingVideoReached:
2500                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2501                 raise
2502             except RejectedVideoReached:
2503                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2504                 raise
2505             else:
2506                 if self.params.get('dump_single_json', False):
2507                     self.post_extract(res)
2508                     self.to_stdout(json.dumps(res, default=repr))
2509
2510         return self._download_retcode
2511
2512     def download_with_info_file(self, info_filename):
2513         with contextlib.closing(fileinput.FileInput(
2514                 [info_filename], mode='r',
2515                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2516             # FileInput doesn't have a read method, we can't call json.load
2517             info = self.filter_requested_info(json.loads('\n'.join(f)))
2518         try:
2519             self.process_ie_result(info, download=True)
2520         except DownloadError:
2521             webpage_url = info.get('webpage_url')
2522             if webpage_url is not None:
2523                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2524                 return self.download([webpage_url])
2525             else:
2526                 raise
2527         return self._download_retcode
2528
2529     @staticmethod
2530     def filter_requested_info(info_dict, actually_filter=True):
2531         if not actually_filter:
2532             return info_dict
2533         exceptions = {
2534             'remove': ['requested_formats', 'requested_subtitles', 'filepath', 'entries'],
2535             'keep': ['_type'],
2536         }
2537         keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2538         filter_fn = lambda obj: (
2539                 list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2540                 else obj if not isinstance(obj, dict)
2541                 else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
2542         return filter_fn(info_dict)
2543
2544     def run_pp(self, pp, infodict):
2545         files_to_delete = []
2546         if '__files_to_move' not in infodict:
2547             infodict['__files_to_move'] = {}
2548         files_to_delete, infodict = pp.run(infodict)
2549         if not files_to_delete:
2550             return infodict
2551
2552         if self.params.get('keepvideo', False):
2553             for f in files_to_delete:
2554                 infodict['__files_to_move'].setdefault(f, '')
2555         else:
2556             for old_filename in set(files_to_delete):
2557                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2558                 try:
2559                     os.remove(encodeFilename(old_filename))
2560                 except (IOError, OSError):
2561                     self.report_warning('Unable to remove downloaded original file')
2562                 if old_filename in infodict['__files_to_move']:
2563                     del infodict['__files_to_move'][old_filename]
2564         return infodict
2565
2566     @staticmethod
2567     def post_extract(info_dict):
2568         def actual_post_extract(info_dict):
2569             if info_dict.get('_type') in ('playlist', 'multi_video'):
2570                 for video_dict in info_dict.get('entries', {}):
2571                     actual_post_extract(video_dict)
2572                 return
2573
2574             if '__post_extractor' not in info_dict:
2575                 return
2576             post_extractor = info_dict['__post_extractor']
2577             if post_extractor:
2578                 info_dict.update(post_extractor().items())
2579             del info_dict['__post_extractor']
2580             return
2581
2582         actual_post_extract(info_dict)
2583
2584     def pre_process(self, ie_info):
2585         info = dict(ie_info)
2586         for pp in self._pps['beforedl']:
2587             info = self.run_pp(pp, info)
2588         return info
2589
2590     def post_process(self, filename, ie_info, files_to_move=None):
2591         """Run all the postprocessors on the given file."""
2592         info = dict(ie_info)
2593         info['filepath'] = filename
2594         info['__files_to_move'] = files_to_move or {}
2595
2596         for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2597             info = self.run_pp(pp, info)
2598         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2599         del info['__files_to_move']
2600         for pp in self._pps['aftermove']:
2601             info = self.run_pp(pp, info)
2602
2603     def _make_archive_id(self, info_dict):
2604         video_id = info_dict.get('id')
2605         if not video_id:
2606             return
2607         # Future-proof against any change in case
2608         # and backwards compatibility with prior versions
2609         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2610         if extractor is None:
2611             url = str_or_none(info_dict.get('url'))
2612             if not url:
2613                 return
2614             # Try to find matching extractor for the URL and take its ie_key
2615             for ie in self._ies:
2616                 if ie.suitable(url):
2617                     extractor = ie.ie_key()
2618                     break
2619             else:
2620                 return
2621         return '%s %s' % (extractor.lower(), video_id)
2622
2623     def in_download_archive(self, info_dict):
2624         fn = self.params.get('download_archive')
2625         if fn is None:
2626             return False
2627
2628         vid_id = self._make_archive_id(info_dict)
2629         if not vid_id:
2630             return False  # Incomplete video information
2631
2632         return vid_id in self.archive
2633
2634     def record_download_archive(self, info_dict):
2635         fn = self.params.get('download_archive')
2636         if fn is None:
2637             return
2638         vid_id = self._make_archive_id(info_dict)
2639         assert vid_id
2640         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2641             archive_file.write(vid_id + '\n')
2642         self.archive.add(vid_id)
2643
2644     @staticmethod
2645     def format_resolution(format, default='unknown'):
2646         if format.get('vcodec') == 'none':
2647             return 'audio only'
2648         if format.get('resolution') is not None:
2649             return format['resolution']
2650         if format.get('height') is not None:
2651             if format.get('width') is not None:
2652                 res = '%sx%s' % (format['width'], format['height'])
2653             else:
2654                 res = '%sp' % format['height']
2655         elif format.get('width') is not None:
2656             res = '%dx?' % format['width']
2657         else:
2658             res = default
2659         return res
2660
2661     def _format_note(self, fdict):
2662         res = ''
2663         if fdict.get('ext') in ['f4f', 'f4m']:
2664             res += '(unsupported) '
2665         if fdict.get('language'):
2666             if res:
2667                 res += ' '
2668             res += '[%s] ' % fdict['language']
2669         if fdict.get('format_note') is not None:
2670             res += fdict['format_note'] + ' '
2671         if fdict.get('tbr') is not None:
2672             res += '%4dk ' % fdict['tbr']
2673         if fdict.get('container') is not None:
2674             if res:
2675                 res += ', '
2676             res += '%s container' % fdict['container']
2677         if (fdict.get('vcodec') is not None
2678                 and fdict.get('vcodec') != 'none'):
2679             if res:
2680                 res += ', '
2681             res += fdict['vcodec']
2682             if fdict.get('vbr') is not None:
2683                 res += '@'
2684         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2685             res += 'video@'
2686         if fdict.get('vbr') is not None:
2687             res += '%4dk' % fdict['vbr']
2688         if fdict.get('fps') is not None:
2689             if res:
2690                 res += ', '
2691             res += '%sfps' % fdict['fps']
2692         if fdict.get('acodec') is not None:
2693             if res:
2694                 res += ', '
2695             if fdict['acodec'] == 'none':
2696                 res += 'video only'
2697             else:
2698                 res += '%-5s' % fdict['acodec']
2699         elif fdict.get('abr') is not None:
2700             if res:
2701                 res += ', '
2702             res += 'audio'
2703         if fdict.get('abr') is not None:
2704             res += '@%3dk' % fdict['abr']
2705         if fdict.get('asr') is not None:
2706             res += ' (%5dHz)' % fdict['asr']
2707         if fdict.get('filesize') is not None:
2708             if res:
2709                 res += ', '
2710             res += format_bytes(fdict['filesize'])
2711         elif fdict.get('filesize_approx') is not None:
2712             if res:
2713                 res += ', '
2714             res += '~' + format_bytes(fdict['filesize_approx'])
2715         return res
2716
2717     def _format_note_table(self, f):
2718         def join_fields(*vargs):
2719             return ', '.join((val for val in vargs if val != ''))
2720
2721         return join_fields(
2722             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2723             format_field(f, 'language', '[%s]'),
2724             format_field(f, 'format_note'),
2725             format_field(f, 'container', ignore=(None, f.get('ext'))),
2726             format_field(f, 'asr', '%5dHz'))
2727
2728     def list_formats(self, info_dict):
2729         formats = info_dict.get('formats', [info_dict])
2730         new_format = self.params.get('listformats_table', False)
2731         if new_format:
2732             table = [
2733                 [
2734                     format_field(f, 'format_id'),
2735                     format_field(f, 'ext'),
2736                     self.format_resolution(f),
2737                     format_field(f, 'fps', '%d'),
2738                     '|',
2739                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2740                     format_field(f, 'tbr', '%4dk'),
2741                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
2742                     '|',
2743                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2744                     format_field(f, 'vbr', '%4dk'),
2745                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2746                     format_field(f, 'abr', '%3dk'),
2747                     format_field(f, 'asr', '%5dHz'),
2748                     self._format_note_table(f)]
2749                 for f in formats
2750                 if f.get('preference') is None or f['preference'] >= -1000]
2751             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2752                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2753         else:
2754             table = [
2755                 [
2756                     format_field(f, 'format_id'),
2757                     format_field(f, 'ext'),
2758                     self.format_resolution(f),
2759                     self._format_note(f)]
2760                 for f in formats
2761                 if f.get('preference') is None or f['preference'] >= -1000]
2762             header_line = ['format code', 'extension', 'resolution', 'note']
2763
2764         self.to_screen(
2765             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2766                 header_line,
2767                 table,
2768                 delim=new_format,
2769                 extraGap=(0 if new_format else 1),
2770                 hideEmpty=new_format)))
2771
2772     def list_thumbnails(self, info_dict):
2773         thumbnails = info_dict.get('thumbnails')
2774         if not thumbnails:
2775             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2776             return
2777
2778         self.to_screen(
2779             '[info] Thumbnails for %s:' % info_dict['id'])
2780         self.to_screen(render_table(
2781             ['ID', 'width', 'height', 'URL'],
2782             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2783
2784     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2785         if not subtitles:
2786             self.to_screen('%s has no %s' % (video_id, name))
2787             return
2788         self.to_screen(
2789             'Available %s for %s:' % (name, video_id))
2790         self.to_screen(render_table(
2791             ['Language', 'formats'],
2792             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2793                 for lang, formats in subtitles.items()]))
2794
2795     def urlopen(self, req):
2796         """ Start an HTTP download """
2797         if isinstance(req, compat_basestring):
2798             req = sanitized_Request(req)
2799         return self._opener.open(req, timeout=self._socket_timeout)
2800
2801     def print_debug_header(self):
2802         if not self.params.get('verbose'):
2803             return
2804
2805         if type('') is not compat_str:
2806             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2807             self.report_warning(
2808                 'Your Python is broken! Update to a newer and supported version')
2809
2810         stdout_encoding = getattr(
2811             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2812         encoding_str = (
2813             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2814                 locale.getpreferredencoding(),
2815                 sys.getfilesystemencoding(),
2816                 stdout_encoding,
2817                 self.get_encoding()))
2818         write_string(encoding_str, encoding=None)
2819
2820         source = (
2821             '(exe)' if hasattr(sys, 'frozen')
2822             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
2823             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
2824             else '')
2825         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
2826         if _LAZY_LOADER:
2827             self._write_string('[debug] Lazy loading extractors enabled\n')
2828         if _PLUGIN_CLASSES:
2829             self._write_string(
2830                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
2831         try:
2832             sp = subprocess.Popen(
2833                 ['git', 'rev-parse', '--short', 'HEAD'],
2834                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2835                 cwd=os.path.dirname(os.path.abspath(__file__)))
2836             out, err = process_communicate_or_kill(sp)
2837             out = out.decode().strip()
2838             if re.match('[0-9a-f]+', out):
2839                 self._write_string('[debug] Git HEAD: %s\n' % out)
2840         except Exception:
2841             try:
2842                 sys.exc_clear()
2843             except Exception:
2844                 pass
2845
2846         def python_implementation():
2847             impl_name = platform.python_implementation()
2848             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2849                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2850             return impl_name
2851
2852         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
2853             platform.python_version(),
2854             python_implementation(),
2855             platform.architecture()[0],
2856             platform_name()))
2857
2858         exe_versions = FFmpegPostProcessor.get_versions(self)
2859         exe_versions['rtmpdump'] = rtmpdump_version()
2860         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2861         exe_str = ', '.join(
2862             '%s %s' % (exe, v)
2863             for exe, v in sorted(exe_versions.items())
2864             if v
2865         )
2866         if not exe_str:
2867             exe_str = 'none'
2868         self._write_string('[debug] exe versions: %s\n' % exe_str)
2869
2870         proxy_map = {}
2871         for handler in self._opener.handlers:
2872             if hasattr(handler, 'proxies'):
2873                 proxy_map.update(handler.proxies)
2874         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2875
2876         if self.params.get('call_home', False):
2877             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2878             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2879             return
2880             latest_version = self.urlopen(
2881                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2882             if version_tuple(latest_version) > version_tuple(__version__):
2883                 self.report_warning(
2884                     'You are using an outdated version (newest version: %s)! '
2885                     'See https://yt-dl.org/update if you need help updating.' %
2886                     latest_version)
2887
2888     def _setup_opener(self):
2889         timeout_val = self.params.get('socket_timeout')
2890         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2891
2892         opts_cookiefile = self.params.get('cookiefile')
2893         opts_proxy = self.params.get('proxy')
2894
2895         if opts_cookiefile is None:
2896             self.cookiejar = compat_cookiejar.CookieJar()
2897         else:
2898             opts_cookiefile = expand_path(opts_cookiefile)
2899             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2900             if os.access(opts_cookiefile, os.R_OK):
2901                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2902
2903         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2904         if opts_proxy is not None:
2905             if opts_proxy == '':
2906                 proxies = {}
2907             else:
2908                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2909         else:
2910             proxies = compat_urllib_request.getproxies()
2911             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2912             if 'http' in proxies and 'https' not in proxies:
2913                 proxies['https'] = proxies['http']
2914         proxy_handler = PerRequestProxyHandler(proxies)
2915
2916         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2917         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2918         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2919         redirect_handler = YoutubeDLRedirectHandler()
2920         data_handler = compat_urllib_request_DataHandler()
2921
2922         # When passing our own FileHandler instance, build_opener won't add the
2923         # default FileHandler and allows us to disable the file protocol, which
2924         # can be used for malicious purposes (see
2925         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2926         file_handler = compat_urllib_request.FileHandler()
2927
2928         def file_open(*args, **kwargs):
2929             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
2930         file_handler.file_open = file_open
2931
2932         opener = compat_urllib_request.build_opener(
2933             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2934
2935         # Delete the default user-agent header, which would otherwise apply in
2936         # cases where our custom HTTP handler doesn't come into play
2937         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2938         opener.addheaders = []
2939         self._opener = opener
2940
2941     def encode(self, s):
2942         if isinstance(s, bytes):
2943             return s  # Already encoded
2944
2945         try:
2946             return s.encode(self.get_encoding())
2947         except UnicodeEncodeError as err:
2948             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2949             raise
2950
2951     def get_encoding(self):
2952         encoding = self.params.get('encoding')
2953         if encoding is None:
2954             encoding = preferredencoding()
2955         return encoding
2956
2957     def _write_thumbnails(self, info_dict, filename):  # return the extensions
2958         write_all = self.params.get('write_all_thumbnails', False)
2959         thumbnails = []
2960         if write_all or self.params.get('writethumbnail', False):
2961             thumbnails = info_dict.get('thumbnails') or []
2962         multiple = write_all and len(thumbnails) > 1
2963
2964         ret = []
2965         for t in thumbnails[::1 if write_all else -1]:
2966             thumb_ext = determine_ext(t['url'], 'jpg')
2967             suffix = '%s.' % t['id'] if multiple else ''
2968             thumb_display_id = '%s ' % t['id'] if multiple else ''
2969             t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
2970
2971             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2972                 ret.append(suffix + thumb_ext)
2973                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2974                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2975             else:
2976                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
2977                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2978                 try:
2979                     uf = self.urlopen(t['url'])
2980                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2981                         shutil.copyfileobj(uf, thumbf)
2982                     ret.append(suffix + thumb_ext)
2983                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2984                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2985                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2986                     self.report_warning('Unable to download thumbnail "%s": %s' %
2987                                         (t['url'], error_to_compat_str(err)))
2988             if ret and not write_all:
2989                 break
2990         return ret