youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DownloadError,
  55     encode_compat_str,
  56     encodeFilename,
  57     error_to_compat_str,
  58     expand_path,
  59     ExtractorError,
  60     format_bytes,
  61     formatSeconds,
  62     GeoRestrictedError,
  63     int_or_none,
  64     ISO3166Utils,
  65     locked_file,
  66     make_HTTPS_handler,
  67     MaxDownloadsReached,
  68     orderedSet,
  69     PagedList,
  70     parse_filesize,
  71     PerRequestProxyHandler,
  72     platform_name,
  73     PostProcessingError,
  74     preferredencoding,
  75     prepend_extension,
  76     register_socks_protocols,
  77     render_table,
  78     replace_extension,
  79     SameFileError,
  80     sanitize_filename,
  81     sanitize_path,
  82     sanitize_url,
  83     sanitized_Request,
  84     std_headers,
  85     str_or_none,
  86     subtitles_filename,
  87     UnavailableVideoError,
  88     url_basename,
  89     version_tuple,
  90     write_json_file,
  91     write_string,
  92     YoutubeDLCookieJar,
  93     YoutubeDLCookieProcessor,
  94     YoutubeDLHandler,
  95     YoutubeDLRedirectHandler,
  96 )
  97 from .cache import Cache
  98 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  99 from .extractor.openload import PhantomJSwrapper
 100 from .downloader import get_suitable_downloader
 101 from .downloader.rtmp import rtmpdump_version
 102 from .postprocessor import (
 103     FFmpegFixupM3u8PP,
 104     FFmpegFixupM4aPP,
 105     FFmpegFixupStretchedPP,
 106     FFmpegMergerPP,
 107     FFmpegPostProcessor,
 108     FFmpegSubtitlesConvertorPP,
 109     get_postprocessor,
 110 )
 111 from .version import __version__
 112
 113 if compat_os_name == 'nt':
 114     import ctypes
 115
 116
 117 class YoutubeDL(object):
 118     """YoutubeDL class.
 119
 120     YoutubeDL objects are the ones responsible of downloading the
 121     actual video file and writing it to disk if the user has requested
 122     it, among some other tasks. In most cases there should be one per
 123     program. As, given a video URL, the downloader doesn't know how to
 124     extract all the needed information, task that InfoExtractors do, it
 125     has to pass the URL to one of them.
 126
 127     For this, YoutubeDL objects have a method that allows
 128     InfoExtractors to be registered in a given order. When it is passed
 129     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 130     finds that reports being able to handle it. The InfoExtractor extracts
 131     all the information about the video or videos the URL refers to, and
 132     YoutubeDL process the extracted information, possibly using a File
 133     Downloader to download the video.
 134
 135     YoutubeDL objects accept a lot of parameters. In order not to saturate
 136     the object constructor with arguments, it receives a dictionary of
 137     options instead. These options are available through the params
 138     attribute for the InfoExtractors to use. The YoutubeDL also
 139     registers itself as the downloader in charge for the InfoExtractors
 140     that are added to it, so this is a "mutual registration".
 141
 142     Available options:
 143
 144     username:          Username for authentication purposes.
 145     password:          Password for authentication purposes.
 146     videopassword:     Password for accessing a video.
 147     ap_mso:            Adobe Pass multiple-system operator identifier.
 148     ap_username:       Multiple-system operator account username.
 149     ap_password:       Multiple-system operator account password.
 150     usenetrc:          Use netrc for authentication instead.
 151     verbose:           Print additional info to stdout.
 152     quiet:             Do not print messages to stdout.
 153     no_warnings:       Do not print out anything for warnings.
 154     forceurl:          Force printing final URL.
 155     forcetitle:        Force printing title.
 156     forceid:           Force printing ID.
 157     forcethumbnail:    Force printing thumbnail URL.
 158     forcedescription:  Force printing description.
 159     forcefilename:     Force printing final filename.
 160     forceduration:     Force printing duration.
 161     forcejson:         Force printing info_dict as JSON.
 162     dump_single_json:  Force printing the info_dict of the whole playlist
 163                        (or video) as a single JSON line.
 164     simulate:          Do not download the video files.
 165     format:            Video format code. See options.py for more information.
 166     outtmpl:           Template for output names.
 167     restrictfilenames: Do not allow "&" and spaces in file names.
 168     trim_file_name:    Limit length of filename (extension excluded).
 169     ignoreerrors:      Do not stop on download errors.
 170     force_generic_extractor: Force downloader to use the generic extractor
 171     nooverwrites:      Prevent overwriting files.
 172     playliststart:     Playlist item to start at.
 173     playlistend:       Playlist item to end at.
 174     playlist_items:    Specific indices of playlist to download.
 175     playlistreverse:   Download playlist items in reverse order.
 176     playlistrandom:    Download playlist items in random order.
 177     matchtitle:        Download only matching titles.
 178     rejecttitle:       Reject downloads for matching titles.
 179     logger:            Log messages to a logging.Logger instance.
 180     logtostderr:       Log messages to stderr instead of stdout.
 181     writedescription:  Write the video description to a .description file
 182     writeinfojson:     Write the video description to a .info.json file
 183     writeannotations:  Write the video annotations to a .annotations.xml file
 184     writethumbnail:    Write the thumbnail image to a file
 185     write_all_thumbnails:  Write all thumbnail formats to files
 186     writesubtitles:    Write the video subtitles to a file
 187     writeautomaticsub: Write the automatically generated subtitles to a file
 188     allsubtitles:      Downloads all the subtitles of the video
 189                        (requires writesubtitles or writeautomaticsub)
 190     listsubtitles:     Lists all available subtitles for the video
 191     subtitlesformat:   The format code for subtitles
 192     subtitleslangs:    List of languages of the subtitles to download
 193     keepvideo:         Keep the video file after post-processing
 194     daterange:         A DateRange object, download only if the upload_date is in the range.
 195     skip_download:     Skip the actual download of the video file
 196     cachedir:          Location of the cache files in the filesystem.
 197                        False to disable filesystem cache.
 198     noplaylist:        Download single video instead of a playlist if in doubt.
 199     age_limit:         An integer representing the user's age in years.
 200                        Unsuitable videos for the given age are skipped.
 201     min_views:         An integer representing the minimum view count the video
 202                        must have in order to not be skipped.
 203                        Videos without view count information are always
 204                        downloaded. None for no limit.
 205     max_views:         An integer representing the maximum view count.
 206                        Videos that are more popular than that are not
 207                        downloaded.
 208                        Videos without view count information are always
 209                        downloaded. None for no limit.
 210     download_archive:  File name of a file where all downloads are recorded.
 211                        Videos already present in the file are not downloaded
 212                        again.
 213     cookiefile:        File name where cookies should be read from and dumped to.
 214     nocheckcertificate:Do not verify SSL certificates
 215     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 216                        At the moment, this is only supported by YouTube.
 217     proxy:             URL of the proxy server to use
 218     geo_verification_proxy:  URL of the proxy to use for IP address verification
 219                        on geo-restricted sites.
 220     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 221     bidi_workaround:   Work around buggy terminals without bidirectional text
 222                        support, using fridibi
 223     debug_printtraffic:Print out sent and received HTTP traffic
 224     include_ads:       Download ads as well
 225     default_search:    Prepend this string if an input url is not valid.
 226                        'auto' for elaborate guessing
 227     encoding:          Use this encoding instead of the system-specified.
 228     extract_flat:      Do not resolve URLs, return the immediate result.
 229                        Pass in 'in_playlist' to only show this behavior for
 230                        playlist items.
 231     postprocessors:    A list of dictionaries, each with an entry
 232                        * key:  The name of the postprocessor. See
 233                                youtube_dlc/postprocessor/__init__.py for a list.
 234                        as well as any further keyword arguments for the
 235                        postprocessor.
 236     progress_hooks:    A list of functions that get called on download
 237                        progress, with a dictionary with the entries
 238                        * status: One of "downloading", "error", or "finished".
 239                                  Check this first and ignore unknown values.
 240
 241                        If status is one of "downloading", or "finished", the
 242                        following properties may also be present:
 243                        * filename: The final filename (always present)
 244                        * tmpfilename: The filename we're currently writing to
 245                        * downloaded_bytes: Bytes on disk
 246                        * total_bytes: Size of the whole file, None if unknown
 247                        * total_bytes_estimate: Guess of the eventual file size,
 248                                                None if unavailable.
 249                        * elapsed: The number of seconds since download started.
 250                        * eta: The estimated time in seconds, None if unknown
 251                        * speed: The download speed in bytes/second, None if
 252                                 unknown
 253                        * fragment_index: The counter of the currently
 254                                          downloaded video fragment.
 255                        * fragment_count: The number of fragments (= individual
 256                                          files that will be merged)
 257
 258                        Progress hooks are guaranteed to be called at least once
 259                        (with status "finished") if the download is successful.
 260     merge_output_format: Extension to use when merging formats.
 261     fixup:             Automatically correct known faults of the file.
 262                        One of:
 263                        - "never": do nothing
 264                        - "warn": only emit a warning
 265                        - "detect_or_warn": check whether we can do anything
 266                                            about it, warn otherwise (default)
 267     source_address:    Client-side IP address to bind to.
 268     call_home:         Boolean, true iff we are allowed to contact the
 269                        youtube-dlc servers for debugging.
 270     sleep_interval:    Number of seconds to sleep before each download when
 271                        used alone or a lower bound of a range for randomized
 272                        sleep before each download (minimum possible number
 273                        of seconds to sleep) when used along with
 274                        max_sleep_interval.
 275     max_sleep_interval:Upper bound of a range for randomized sleep before each
 276                        download (maximum possible number of seconds to sleep).
 277                        Must only be used along with sleep_interval.
 278                        Actual sleep time will be a random float from range
 279                        [sleep_interval; max_sleep_interval].
 280     listformats:       Print an overview of available video formats and exit.
 281     list_thumbnails:   Print a table of all thumbnails and exit.
 282     match_filter:      A function that gets called with the info_dict of
 283                        every video.
 284                        If it returns a message, the video is ignored.
 285                        If it returns None, the video is downloaded.
 286                        match_filter_func in utils.py is one example for this.
 287     no_color:          Do not emit color codes in output.
 288     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 289                        HTTP header
 290     geo_bypass_country:
 291                        Two-letter ISO 3166-2 country code that will be used for
 292                        explicit geographic restriction bypassing via faking
 293                        X-Forwarded-For HTTP header
 294     geo_bypass_ip_block:
 295                        IP range in CIDR notation that will be used similarly to
 296                        geo_bypass_country
 297
 298     The following options determine which downloader is picked:
 299     external_downloader: Executable of the external downloader to call.
 300                        None or unset for standard (built-in) downloader.
 301     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 302                        if True, otherwise use ffmpeg/avconv if False, otherwise
 303                        use downloader suggested by extractor if None.
 304
 305     The following parameters are not used by YoutubeDL itself, they are used by
 306     the downloader (see youtube_dlc/downloader/common.py):
 307     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 308     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 309     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 310     http_chunk_size.
 311
 312     The following options are used by the post processors:
 313     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 314                        otherwise prefer ffmpeg.
 315     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 316                        to the binary or its containing directory.
 317     postprocessor_args: A list of additional command-line arguments for the
 318                         postprocessor.
 319
 320     The following options are used by the Youtube extractor:
 321     youtube_include_dash_manifest: If True (default), DASH manifests and related
 322                         data will be downloaded and processed by extractor.
 323                         You can reduce network I/O by disabling it if you don't
 324                         care about DASH.
 325     """
 326
 327     _NUMERIC_FIELDS = set((
 328         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 329         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 330         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 331         'average_rating', 'comment_count', 'age_limit',
 332         'start_time', 'end_time',
 333         'chapter_number', 'season_number', 'episode_number',
 334         'track_number', 'disc_number', 'release_year',
 335         'playlist_index',
 336     ))
 337
 338     params = None
 339     _ies = []
 340     _pps = []
 341     _download_retcode = None
 342     _num_downloads = None
 343     _screen_file = None
 344
 345     def __init__(self, params=None, auto_init=True):
 346         """Create a FileDownloader object with the given options."""
 347         if params is None:
 348             params = {}
 349         self._ies = []
 350         self._ies_instances = {}
 351         self._pps = []
 352         self._progress_hooks = []
 353         self._download_retcode = 0
 354         self._num_downloads = 0
 355         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 356         self._err_file = sys.stderr
 357         self.params = {
 358             # Default parameters
 359             'nocheckcertificate': False,
 360         }
 361         self.params.update(params)
 362         self.cache = Cache(self)
 363         self.archive = set()
 364
 365         """Preload the archive, if any is specified"""
 366         def preload_download_archive(self):
 367             fn = self.params.get('download_archive')
 368             if fn is None:
 369                 return False
 370             try:
 371                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 372                     for line in archive_file:
 373                         self.archive.add(line.strip())
 374             except IOError as ioe:
 375                 if ioe.errno != errno.ENOENT:
 376                     raise
 377                 return False
 378             return True
 379
 380         def check_deprecated(param, option, suggestion):
 381             if self.params.get(param) is not None:
 382                 self.report_warning(
 383                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 384                 return True
 385             return False
 386
 387         if self.params.get('verbose'):
 388             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 389
 390         preload_download_archive(self)
 391
 392         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 393             if self.params.get('geo_verification_proxy') is None:
 394                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 395
 396         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 397         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 398         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 399
 400         if params.get('bidi_workaround', False):
 401             try:
 402                 import pty
 403                 master, slave = pty.openpty()
 404                 width = compat_get_terminal_size().columns
 405                 if width is None:
 406                     width_args = []
 407                 else:
 408                     width_args = ['-w', str(width)]
 409                 sp_kwargs = dict(
 410                     stdin=subprocess.PIPE,
 411                     stdout=slave,
 412                     stderr=self._err_file)
 413                 try:
 414                     self._output_process = subprocess.Popen(
 415                         ['bidiv'] + width_args, **sp_kwargs
 416                     )
 417                 except OSError:
 418                     self._output_process = subprocess.Popen(
 419                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 420                 self._output_channel = os.fdopen(master, 'rb')
 421             except OSError as ose:
 422                 if ose.errno == errno.ENOENT:
 423                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 424                 else:
 425                     raise
 426
 427         if (sys.platform != 'win32'
 428                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 429                 and not params.get('restrictfilenames', False)):
 430             # Unicode filesystem API will throw errors (#1474, #13027)
 431             self.report_warning(
 432                 'Assuming --restrict-filenames since file system encoding '
 433                 'cannot encode all characters. '
 434                 'Set the LC_ALL environment variable to fix this.')
 435             self.params['restrictfilenames'] = True
 436
 437         if isinstance(params.get('outtmpl'), bytes):
 438             self.report_warning(
 439                 'Parameter outtmpl is bytes, but should be a unicode string. '
 440                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 441
 442         self._setup_opener()
 443
 444         if auto_init:
 445             self.print_debug_header()
 446             self.add_default_info_extractors()
 447
 448         for pp_def_raw in self.params.get('postprocessors', []):
 449             pp_class = get_postprocessor(pp_def_raw['key'])
 450             pp_def = dict(pp_def_raw)
 451             del pp_def['key']
 452             pp = pp_class(self, **compat_kwargs(pp_def))
 453             self.add_post_processor(pp)
 454
 455         for ph in self.params.get('progress_hooks', []):
 456             self.add_progress_hook(ph)
 457
 458         register_socks_protocols()
 459
 460     def warn_if_short_id(self, argv):
 461         # short YouTube ID starting with dash?
 462         idxs = [
 463             i for i, a in enumerate(argv)
 464             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 465         if idxs:
 466             correct_argv = (
 467                 ['youtube-dlc']
 468                 + [a for i, a in enumerate(argv) if i not in idxs]
 469                 + ['--'] + [argv[i] for i in idxs]
 470             )
 471             self.report_warning(
 472                 'Long argument string detected. '
 473                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 474                 args_to_str(correct_argv))
 475
 476     def add_info_extractor(self, ie):
 477         """Add an InfoExtractor object to the end of the list."""
 478         self._ies.append(ie)
 479         if not isinstance(ie, type):
 480             self._ies_instances[ie.ie_key()] = ie
 481             ie.set_downloader(self)
 482
 483     def get_info_extractor(self, ie_key):
 484         """
 485         Get an instance of an IE with name ie_key, it will try to get one from
 486         the _ies list, if there's no instance it will create a new one and add
 487         it to the extractor list.
 488         """
 489         ie = self._ies_instances.get(ie_key)
 490         if ie is None:
 491             ie = get_info_extractor(ie_key)()
 492             self.add_info_extractor(ie)
 493         return ie
 494
 495     def add_default_info_extractors(self):
 496         """
 497         Add the InfoExtractors returned by gen_extractors to the end of the list
 498         """
 499         for ie in gen_extractor_classes():
 500             self.add_info_extractor(ie)
 501
 502     def add_post_processor(self, pp):
 503         """Add a PostProcessor object to the end of the chain."""
 504         self._pps.append(pp)
 505         pp.set_downloader(self)
 506
 507     def add_progress_hook(self, ph):
 508         """Add the progress hook (currently only for the file downloader)"""
 509         self._progress_hooks.append(ph)
 510
 511     def _bidi_workaround(self, message):
 512         if not hasattr(self, '_output_channel'):
 513             return message
 514
 515         assert hasattr(self, '_output_process')
 516         assert isinstance(message, compat_str)
 517         line_count = message.count('\n') + 1
 518         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 519         self._output_process.stdin.flush()
 520         res = ''.join(self._output_channel.readline().decode('utf-8')
 521                       for _ in range(line_count))
 522         return res[:-len('\n')]
 523
 524     def to_screen(self, message, skip_eol=False):
 525         """Print message to stdout if not in quiet mode."""
 526         return self.to_stdout(message, skip_eol, check_quiet=True)
 527
 528     def _write_string(self, s, out=None):
 529         write_string(s, out=out, encoding=self.params.get('encoding'))
 530
 531     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 532         """Print message to stdout if not in quiet mode."""
 533         if self.params.get('logger'):
 534             self.params['logger'].debug(message)
 535         elif not check_quiet or not self.params.get('quiet', False):
 536             message = self._bidi_workaround(message)
 537             terminator = ['\n', ''][skip_eol]
 538             output = message + terminator
 539
 540             self._write_string(output, self._screen_file)
 541
 542     def to_stderr(self, message):
 543         """Print message to stderr."""
 544         assert isinstance(message, compat_str)
 545         if self.params.get('logger'):
 546             self.params['logger'].error(message)
 547         else:
 548             message = self._bidi_workaround(message)
 549             output = message + '\n'
 550             self._write_string(output, self._err_file)
 551
 552     def to_console_title(self, message):
 553         if not self.params.get('consoletitle', False):
 554             return
 555         if compat_os_name == 'nt':
 556             if ctypes.windll.kernel32.GetConsoleWindow():
 557                 # c_wchar_p() might not be necessary if `message` is
 558                 # already of type unicode()
 559                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 560         elif 'TERM' in os.environ:
 561             self._write_string('\033]0;%s\007' % message, self._screen_file)
 562
 563     def save_console_title(self):
 564         if not self.params.get('consoletitle', False):
 565             return
 566         if self.params.get('simulate', False):
 567             return
 568         if compat_os_name != 'nt' and 'TERM' in os.environ:
 569             # Save the title on stack
 570             self._write_string('\033[22;0t', self._screen_file)
 571
 572     def restore_console_title(self):
 573         if not self.params.get('consoletitle', False):
 574             return
 575         if self.params.get('simulate', False):
 576             return
 577         if compat_os_name != 'nt' and 'TERM' in os.environ:
 578             # Restore the title from stack
 579             self._write_string('\033[23;0t', self._screen_file)
 580
 581     def __enter__(self):
 582         self.save_console_title()
 583         return self
 584
 585     def __exit__(self, *args):
 586         self.restore_console_title()
 587
 588         if self.params.get('cookiefile') is not None:
 589             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 590
 591     def trouble(self, message=None, tb=None):
 592         """Determine action to take when a download problem appears.
 593
 594         Depending on if the downloader has been configured to ignore
 595         download errors or not, this method may throw an exception or
 596         not when errors are found, after printing the message.
 597
 598         tb, if given, is additional traceback information.
 599         """
 600         if message is not None:
 601             self.to_stderr(message)
 602         if self.params.get('verbose'):
 603             if tb is None:
 604                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 605                     tb = ''
 606                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 607                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 608                     tb += encode_compat_str(traceback.format_exc())
 609                 else:
 610                     tb_data = traceback.format_list(traceback.extract_stack())
 611                     tb = ''.join(tb_data)
 612             self.to_stderr(tb)
 613         if not self.params.get('ignoreerrors', False):
 614             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 615                 exc_info = sys.exc_info()[1].exc_info
 616             else:
 617                 exc_info = sys.exc_info()
 618             raise DownloadError(message, exc_info)
 619         self._download_retcode = 1
 620
 621     def report_warning(self, message):
 622         '''
 623         Print the message to stderr, it will be prefixed with 'WARNING:'
 624         If stderr is a tty file the 'WARNING:' will be colored
 625         '''
 626         if self.params.get('logger') is not None:
 627             self.params['logger'].warning(message)
 628         else:
 629             if self.params.get('no_warnings'):
 630                 return
 631             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 632                 _msg_header = '\033[0;33mWARNING:\033[0m'
 633             else:
 634                 _msg_header = 'WARNING:'
 635             warning_message = '%s %s' % (_msg_header, message)
 636             self.to_stderr(warning_message)
 637
 638     def report_error(self, message, tb=None):
 639         '''
 640         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 641         in red if stderr is a tty file.
 642         '''
 643         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 644             _msg_header = '\033[0;31mERROR:\033[0m'
 645         else:
 646             _msg_header = 'ERROR:'
 647         error_message = '%s %s' % (_msg_header, message)
 648         self.trouble(error_message, tb)
 649
 650     def report_file_already_downloaded(self, file_name):
 651         """Report file has already been fully downloaded."""
 652         try:
 653             self.to_screen('[download] %s has already been downloaded' % file_name)
 654         except UnicodeEncodeError:
 655             self.to_screen('[download] The file has already been downloaded')
 656
 657     def prepare_filename(self, info_dict):
 658         """Generate the output filename."""
 659         try:
 660             template_dict = dict(info_dict)
 661
 662             template_dict['epoch'] = int(time.time())
 663             autonumber_size = self.params.get('autonumber_size')
 664             if autonumber_size is None:
 665                 autonumber_size = 5
 666             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 667             if template_dict.get('resolution') is None:
 668                 if template_dict.get('width') and template_dict.get('height'):
 669                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 670                 elif template_dict.get('height'):
 671                     template_dict['resolution'] = '%sp' % template_dict['height']
 672                 elif template_dict.get('width'):
 673                     template_dict['resolution'] = '%dx?' % template_dict['width']
 674
 675             sanitize = lambda k, v: sanitize_filename(
 676                 compat_str(v),
 677                 restricted=self.params.get('restrictfilenames'),
 678                 is_id=(k == 'id' or k.endswith('_id')))
 679             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 680                                  for k, v in template_dict.items()
 681                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 682             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 683
 684             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 685
 686             # For fields playlist_index and autonumber convert all occurrences
 687             # of %(field)s to %(field)0Nd for backward compatibility
 688             field_size_compat_map = {
 689                 'playlist_index': len(str(template_dict['n_entries'])),
 690                 'autonumber': autonumber_size,
 691             }
 692             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 693             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 694             if mobj:
 695                 outtmpl = re.sub(
 696                     FIELD_SIZE_COMPAT_RE,
 697                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 698                     outtmpl)
 699
 700             # Missing numeric fields used together with integer presentation types
 701             # in format specification will break the argument substitution since
 702             # string 'NA' is returned for missing fields. We will patch output
 703             # template for missing fields to meet string presentation type.
 704             for numeric_field in self._NUMERIC_FIELDS:
 705                 if numeric_field not in template_dict:
 706                     # As of [1] format syntax is:
 707                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 708                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 709                     FORMAT_RE = r'''(?x)
 710                         (?<!%)
 711                         %
 712                         \({0}\)  # mapping key
 713                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 714                         (?:\d+)?  # minimum field width (optional)
 715                         (?:\.\d+)?  # precision (optional)
 716                         [hlL]?  # length modifier (optional)
 717                         [diouxXeEfFgGcrs%]  # conversion type
 718                     '''
 719                     outtmpl = re.sub(
 720                         FORMAT_RE.format(numeric_field),
 721                         r'%({0})s'.format(numeric_field), outtmpl)
 722
 723             # expand_path translates '%%' into '%' and '$$' into '$'
 724             # correspondingly that is not what we want since we need to keep
 725             # '%%' intact for template dict substitution step. Working around
 726             # with boundary-alike separator hack.
 727             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 728             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 729
 730             # outtmpl should be expand_path'ed before template dict substitution
 731             # because meta fields may contain env variables we don't want to
 732             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 733             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 734             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 735
 736             # https://github.com/blackjack4494/youtube-dlc/issues/85
 737             trim_file_name = self.params.get('trim_file_name', False)
 738             if trim_file_name:
 739                 fn_groups = filename.rsplit('.')
 740                 ext = fn_groups[-1]
 741                 sub_ext = ''
 742                 if len(fn_groups) > 2:
 743                     sub_ext = fn_groups[-2]
 744                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 745
 746             # Temporary fix for #4787
 747             # 'Treat' all problem characters by passing filename through preferredencoding
 748             # to workaround encoding issues with subprocess on python2 @ Windows
 749             if sys.version_info < (3, 0) and sys.platform == 'win32':
 750                 filename = encodeFilename(filename, True).decode(preferredencoding())
 751             return sanitize_path(filename)
 752         except ValueError as err:
 753             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 754             return None
 755
 756     def _match_entry(self, info_dict, incomplete):
 757         """ Returns None if the file should be downloaded """
 758
 759         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 760         if 'title' in info_dict:
 761             # This can happen when we're just evaluating the playlist
 762             title = info_dict['title']
 763             matchtitle = self.params.get('matchtitle', False)
 764             if matchtitle:
 765                 if not re.search(matchtitle, title, re.IGNORECASE):
 766                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 767             rejecttitle = self.params.get('rejecttitle', False)
 768             if rejecttitle:
 769                 if re.search(rejecttitle, title, re.IGNORECASE):
 770                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 771         date = info_dict.get('upload_date')
 772         if date is not None:
 773             dateRange = self.params.get('daterange', DateRange())
 774             if date not in dateRange:
 775                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 776         view_count = info_dict.get('view_count')
 777         if view_count is not None:
 778             min_views = self.params.get('min_views')
 779             if min_views is not None and view_count < min_views:
 780                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 781             max_views = self.params.get('max_views')
 782             if max_views is not None and view_count > max_views:
 783                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 784         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 785             return 'Skipping "%s" because it is age restricted' % video_title
 786         if self.in_download_archive(info_dict):
 787             return '%s has already been recorded in archive' % video_title
 788
 789         if not incomplete:
 790             match_filter = self.params.get('match_filter')
 791             if match_filter is not None:
 792                 ret = match_filter(info_dict)
 793                 if ret is not None:
 794                     return ret
 795
 796         return None
 797
 798     @staticmethod
 799     def add_extra_info(info_dict, extra_info):
 800         '''Set the keys from extra_info in info dict if they are missing'''
 801         for key, value in extra_info.items():
 802             info_dict.setdefault(key, value)
 803
 804     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 805                      process=True, force_generic_extractor=False):
 806         '''
 807         Returns a list with a dictionary for each video we find.
 808         If 'download', also downloads the videos.
 809         extra_info is a dict containing the extra values to add to each result
 810         '''
 811
 812         if not ie_key and force_generic_extractor:
 813             ie_key = 'Generic'
 814
 815         if ie_key:
 816             ies = [self.get_info_extractor(ie_key)]
 817         else:
 818             ies = self._ies
 819
 820         for ie in ies:
 821             if not ie.suitable(url):
 822                 continue
 823
 824             ie = self.get_info_extractor(ie.ie_key())
 825             if not ie.working():
 826                 self.report_warning('The program functionality for this site has been marked as broken, '
 827                                     'and will probably not work.')
 828
 829             try:
 830                 ie_result = ie.extract(url)
 831                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 832                     break
 833                 if isinstance(ie_result, list):
 834                     # Backwards compatibility: old IE result format
 835                     ie_result = {
 836                         '_type': 'compat_list',
 837                         'entries': ie_result,
 838                     }
 839                 self.add_default_extra_info(ie_result, ie, url)
 840                 if process:
 841                     return self.process_ie_result(ie_result, download, extra_info)
 842                 else:
 843                     return ie_result
 844             except GeoRestrictedError as e:
 845                 msg = e.msg
 846                 if e.countries:
 847                     msg += '\nThis video is available in %s.' % ', '.join(
 848                         map(ISO3166Utils.short2full, e.countries))
 849                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 850                 self.report_error(msg)
 851                 break
 852             except ExtractorError as e:  # An error we somewhat expected
 853                 self.report_error(compat_str(e), e.format_traceback())
 854                 break
 855             except MaxDownloadsReached:
 856                 raise
 857             except Exception as e:
 858                 if self.params.get('ignoreerrors', False):
 859                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 860                     break
 861                 else:
 862                     raise
 863         else:
 864             self.report_error('no suitable InfoExtractor for URL %s' % url)
 865
 866     def add_default_extra_info(self, ie_result, ie, url):
 867         self.add_extra_info(ie_result, {
 868             'extractor': ie.IE_NAME,
 869             'webpage_url': url,
 870             'webpage_url_basename': url_basename(url),
 871             'extractor_key': ie.ie_key(),
 872         })
 873
 874     def process_ie_result(self, ie_result, download=True, extra_info={}):
 875         """
 876         Take the result of the ie(may be modified) and resolve all unresolved
 877         references (URLs, playlist items).
 878
 879         It will also download the videos if 'download'.
 880         Returns the resolved ie_result.
 881         """
 882         result_type = ie_result.get('_type', 'video')
 883
 884         if result_type in ('url', 'url_transparent'):
 885             ie_result['url'] = sanitize_url(ie_result['url'])
 886             extract_flat = self.params.get('extract_flat', False)
 887             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 888                     or extract_flat is True):
 889                 self.__forced_printings(
 890                     ie_result, self.prepare_filename(ie_result),
 891                     incomplete=True)
 892                 return ie_result
 893
 894         if result_type == 'video':
 895             self.add_extra_info(ie_result, extra_info)
 896             return self.process_video_result(ie_result, download=download)
 897         elif result_type == 'url':
 898             # We have to add extra_info to the results because it may be
 899             # contained in a playlist
 900             return self.extract_info(ie_result['url'],
 901                                      download,
 902                                      ie_key=ie_result.get('ie_key'),
 903                                      extra_info=extra_info)
 904         elif result_type == 'url_transparent':
 905             # Use the information from the embedding page
 906             info = self.extract_info(
 907                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 908                 extra_info=extra_info, download=False, process=False)
 909
 910             # extract_info may return None when ignoreerrors is enabled and
 911             # extraction failed with an error, don't crash and return early
 912             # in this case
 913             if not info:
 914                 return info
 915
 916             force_properties = dict(
 917                 (k, v) for k, v in ie_result.items() if v is not None)
 918             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 919                 if f in force_properties:
 920                     del force_properties[f]
 921             new_result = info.copy()
 922             new_result.update(force_properties)
 923
 924             # Extracted info may not be a video result (i.e.
 925             # info.get('_type', 'video') != video) but rather an url or
 926             # url_transparent. In such cases outer metadata (from ie_result)
 927             # should be propagated to inner one (info). For this to happen
 928             # _type of info should be overridden with url_transparent. This
 929             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 930             if new_result.get('_type') == 'url':
 931                 new_result['_type'] = 'url_transparent'
 932
 933             return self.process_ie_result(
 934                 new_result, download=download, extra_info=extra_info)
 935         elif result_type in ('playlist', 'multi_video'):
 936             # We process each entry in the playlist
 937             playlist = ie_result.get('title') or ie_result.get('id')
 938             self.to_screen('[download] Downloading playlist: %s' % playlist)
 939
 940             playlist_results = []
 941
 942             playliststart = self.params.get('playliststart', 1) - 1
 943             playlistend = self.params.get('playlistend')
 944             # For backwards compatibility, interpret -1 as whole list
 945             if playlistend == -1:
 946                 playlistend = None
 947
 948             playlistitems_str = self.params.get('playlist_items')
 949             playlistitems = None
 950             if playlistitems_str is not None:
 951                 def iter_playlistitems(format):
 952                     for string_segment in format.split(','):
 953                         if '-' in string_segment:
 954                             start, end = string_segment.split('-')
 955                             for item in range(int(start), int(end) + 1):
 956                                 yield int(item)
 957                         else:
 958                             yield int(string_segment)
 959                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
 960
 961             ie_entries = ie_result['entries']
 962
 963             def make_playlistitems_entries(list_ie_entries):
 964                 num_entries = len(list_ie_entries)
 965                 return [
 966                     list_ie_entries[i - 1] for i in playlistitems
 967                     if -num_entries <= i - 1 < num_entries]
 968
 969             def report_download(num_entries):
 970                 self.to_screen(
 971                     '[%s] playlist %s: Downloading %d videos' %
 972                     (ie_result['extractor'], playlist, num_entries))
 973
 974             if isinstance(ie_entries, list):
 975                 n_all_entries = len(ie_entries)
 976                 if playlistitems:
 977                     entries = make_playlistitems_entries(ie_entries)
 978                 else:
 979                     entries = ie_entries[playliststart:playlistend]
 980                 n_entries = len(entries)
 981                 self.to_screen(
 982                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 983                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 984             elif isinstance(ie_entries, PagedList):
 985                 if playlistitems:
 986                     entries = []
 987                     for item in playlistitems:
 988                         entries.extend(ie_entries.getslice(
 989                             item - 1, item
 990                         ))
 991                 else:
 992                     entries = ie_entries.getslice(
 993                         playliststart, playlistend)
 994                 n_entries = len(entries)
 995                 report_download(n_entries)
 996             else:  # iterable
 997                 if playlistitems:
 998                     entries = make_playlistitems_entries(list(itertools.islice(
 999                         ie_entries, 0, max(playlistitems))))
1000                 else:
1001                     entries = list(itertools.islice(
1002                         ie_entries, playliststart, playlistend))
1003                 n_entries = len(entries)
1004                 report_download(n_entries)
1005
1006             if self.params.get('playlistreverse', False):
1007                 entries = entries[::-1]
1008
1009             if self.params.get('playlistrandom', False):
1010                 random.shuffle(entries)
1011
1012             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1013
1014             for i, entry in enumerate(entries, 1):
1015                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1016                 # This __x_forwarded_for_ip thing is a bit ugly but requires
1017                 # minimal changes
1018                 if x_forwarded_for:
1019                     entry['__x_forwarded_for_ip'] = x_forwarded_for
1020                 extra = {
1021                     'n_entries': n_entries,
1022                     'playlist': playlist,
1023                     'playlist_id': ie_result.get('id'),
1024                     'playlist_title': ie_result.get('title'),
1025                     'playlist_uploader': ie_result.get('uploader'),
1026                     'playlist_uploader_id': ie_result.get('uploader_id'),
1027                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1028                     'extractor': ie_result['extractor'],
1029                     'webpage_url': ie_result['webpage_url'],
1030                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1031                     'extractor_key': ie_result['extractor_key'],
1032                 }
1033
1034                 reason = self._match_entry(entry, incomplete=True)
1035                 if reason is not None:
1036                     self.to_screen('[download] ' + reason)
1037                     continue
1038
1039                 entry_result = self.process_ie_result(entry,
1040                                                       download=download,
1041                                                       extra_info=extra)
1042                 playlist_results.append(entry_result)
1043             ie_result['entries'] = playlist_results
1044             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1045             return ie_result
1046         elif result_type == 'compat_list':
1047             self.report_warning(
1048                 'Extractor %s returned a compat_list result. '
1049                 'It needs to be updated.' % ie_result.get('extractor'))
1050
1051             def _fixup(r):
1052                 self.add_extra_info(
1053                     r,
1054                     {
1055                         'extractor': ie_result['extractor'],
1056                         'webpage_url': ie_result['webpage_url'],
1057                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1058                         'extractor_key': ie_result['extractor_key'],
1059                     }
1060                 )
1061                 return r
1062             ie_result['entries'] = [
1063                 self.process_ie_result(_fixup(r), download, extra_info)
1064                 for r in ie_result['entries']
1065             ]
1066             return ie_result
1067         else:
1068             raise Exception('Invalid result type: %s' % result_type)
1069
1070     def _build_format_filter(self, filter_spec):
1071         " Returns a function to filter the formats according to the filter_spec "
1072
1073         OPERATORS = {
1074             '<': operator.lt,
1075             '<=': operator.le,
1076             '>': operator.gt,
1077             '>=': operator.ge,
1078             '=': operator.eq,
1079             '!=': operator.ne,
1080         }
1081         operator_rex = re.compile(r'''(?x)\s*
1082             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1083             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1084             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1085             $
1086             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1087         m = operator_rex.search(filter_spec)
1088         if m:
1089             try:
1090                 comparison_value = int(m.group('value'))
1091             except ValueError:
1092                 comparison_value = parse_filesize(m.group('value'))
1093                 if comparison_value is None:
1094                     comparison_value = parse_filesize(m.group('value') + 'B')
1095                 if comparison_value is None:
1096                     raise ValueError(
1097                         'Invalid value %r in format specification %r' % (
1098                             m.group('value'), filter_spec))
1099             op = OPERATORS[m.group('op')]
1100
1101         if not m:
1102             STR_OPERATORS = {
1103                 '=': operator.eq,
1104                 '^=': lambda attr, value: attr.startswith(value),
1105                 '$=': lambda attr, value: attr.endswith(value),
1106                 '*=': lambda attr, value: value in attr,
1107             }
1108             str_operator_rex = re.compile(r'''(?x)
1109                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1110                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1111                 \s*(?P<value>[a-zA-Z0-9._-]+)
1112                 \s*$
1113                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1114             m = str_operator_rex.search(filter_spec)
1115             if m:
1116                 comparison_value = m.group('value')
1117                 str_op = STR_OPERATORS[m.group('op')]
1118                 if m.group('negation'):
1119                     op = lambda attr, value: not str_op(attr, value)
1120                 else:
1121                     op = str_op
1122
1123         if not m:
1124             raise ValueError('Invalid filter specification %r' % filter_spec)
1125
1126         def _filter(f):
1127             actual_value = f.get(m.group('key'))
1128             if actual_value is None:
1129                 return m.group('none_inclusive')
1130             return op(actual_value, comparison_value)
1131         return _filter
1132
1133     def _default_format_spec(self, info_dict, download=True):
1134
1135         def can_merge():
1136             merger = FFmpegMergerPP(self)
1137             return merger.available and merger.can_merge()
1138
1139         def prefer_best():
1140             if self.params.get('simulate', False):
1141                 return False
1142             if not download:
1143                 return False
1144             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1145                 return True
1146             if info_dict.get('is_live'):
1147                 return True
1148             if not can_merge():
1149                 return True
1150             return False
1151
1152         req_format_list = ['bestvideo+bestaudio', 'best']
1153         if prefer_best():
1154             req_format_list.reverse()
1155         return '/'.join(req_format_list)
1156
1157     def build_format_selector(self, format_spec):
1158         def syntax_error(note, start):
1159             message = (
1160                 'Invalid format specification: '
1161                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1162             return SyntaxError(message)
1163
1164         PICKFIRST = 'PICKFIRST'
1165         MERGE = 'MERGE'
1166         SINGLE = 'SINGLE'
1167         GROUP = 'GROUP'
1168         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1169
1170         def _parse_filter(tokens):
1171             filter_parts = []
1172             for type, string, start, _, _ in tokens:
1173                 if type == tokenize.OP and string == ']':
1174                     return ''.join(filter_parts)
1175                 else:
1176                     filter_parts.append(string)
1177
1178         def _remove_unused_ops(tokens):
1179             # Remove operators that we don't use and join them with the surrounding strings
1180             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1181             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1182             last_string, last_start, last_end, last_line = None, None, None, None
1183             for type, string, start, end, line in tokens:
1184                 if type == tokenize.OP and string == '[':
1185                     if last_string:
1186                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1187                         last_string = None
1188                     yield type, string, start, end, line
1189                     # everything inside brackets will be handled by _parse_filter
1190                     for type, string, start, end, line in tokens:
1191                         yield type, string, start, end, line
1192                         if type == tokenize.OP and string == ']':
1193                             break
1194                 elif type == tokenize.OP and string in ALLOWED_OPS:
1195                     if last_string:
1196                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1197                         last_string = None
1198                     yield type, string, start, end, line
1199                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1200                     if not last_string:
1201                         last_string = string
1202                         last_start = start
1203                         last_end = end
1204                     else:
1205                         last_string += string
1206             if last_string:
1207                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1208
1209         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1210             selectors = []
1211             current_selector = None
1212             for type, string, start, _, _ in tokens:
1213                 # ENCODING is only defined in python 3.x
1214                 if type == getattr(tokenize, 'ENCODING', None):
1215                     continue
1216                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1217                     current_selector = FormatSelector(SINGLE, string, [])
1218                 elif type == tokenize.OP:
1219                     if string == ')':
1220                         if not inside_group:
1221                             # ')' will be handled by the parentheses group
1222                             tokens.restore_last_token()
1223                         break
1224                     elif inside_merge and string in ['/', ',']:
1225                         tokens.restore_last_token()
1226                         break
1227                     elif inside_choice and string == ',':
1228                         tokens.restore_last_token()
1229                         break
1230                     elif string == ',':
1231                         if not current_selector:
1232                             raise syntax_error('"," must follow a format selector', start)
1233                         selectors.append(current_selector)
1234                         current_selector = None
1235                     elif string == '/':
1236                         if not current_selector:
1237                             raise syntax_error('"/" must follow a format selector', start)
1238                         first_choice = current_selector
1239                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1240                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1241                     elif string == '[':
1242                         if not current_selector:
1243                             current_selector = FormatSelector(SINGLE, 'best', [])
1244                         format_filter = _parse_filter(tokens)
1245                         current_selector.filters.append(format_filter)
1246                     elif string == '(':
1247                         if current_selector:
1248                             raise syntax_error('Unexpected "("', start)
1249                         group = _parse_format_selection(tokens, inside_group=True)
1250                         current_selector = FormatSelector(GROUP, group, [])
1251                     elif string == '+':
1252                         if not current_selector:
1253                             raise syntax_error('Unexpected "+"', start)
1254                         selector_1 = current_selector
1255                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1256                         if not selector_2:
1257                             raise syntax_error('Expected a selector', start)
1258                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1259                     else:
1260                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1261                 elif type == tokenize.ENDMARKER:
1262                     break
1263             if current_selector:
1264                 selectors.append(current_selector)
1265             return selectors
1266
1267         def _build_selector_function(selector):
1268             if isinstance(selector, list):
1269                 fs = [_build_selector_function(s) for s in selector]
1270
1271                 def selector_function(ctx):
1272                     for f in fs:
1273                         for format in f(ctx):
1274                             yield format
1275                 return selector_function
1276             elif selector.type == GROUP:
1277                 selector_function = _build_selector_function(selector.selector)
1278             elif selector.type == PICKFIRST:
1279                 fs = [_build_selector_function(s) for s in selector.selector]
1280
1281                 def selector_function(ctx):
1282                     for f in fs:
1283                         picked_formats = list(f(ctx))
1284                         if picked_formats:
1285                             return picked_formats
1286                     return []
1287             elif selector.type == SINGLE:
1288                 format_spec = selector.selector
1289
1290                 def selector_function(ctx):
1291                     formats = list(ctx['formats'])
1292                     if not formats:
1293                         return
1294                     if format_spec == 'all':
1295                         for f in formats:
1296                             yield f
1297                     elif format_spec in ['best', 'worst', None]:
1298                         format_idx = 0 if format_spec == 'worst' else -1
1299                         audiovideo_formats = [
1300                             f for f in formats
1301                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1302                         if audiovideo_formats:
1303                             yield audiovideo_formats[format_idx]
1304                         # for extractors with incomplete formats (audio only (soundcloud)
1305                         # or video only (imgur)) we will fallback to best/worst
1306                         # {video,audio}-only format
1307                         elif ctx['incomplete_formats']:
1308                             yield formats[format_idx]
1309                     elif format_spec == 'bestaudio':
1310                         audio_formats = [
1311                             f for f in formats
1312                             if f.get('vcodec') == 'none']
1313                         if audio_formats:
1314                             yield audio_formats[-1]
1315                     elif format_spec == 'worstaudio':
1316                         audio_formats = [
1317                             f for f in formats
1318                             if f.get('vcodec') == 'none']
1319                         if audio_formats:
1320                             yield audio_formats[0]
1321                     elif format_spec == 'bestvideo':
1322                         video_formats = [
1323                             f for f in formats
1324                             if f.get('acodec') == 'none']
1325                         if video_formats:
1326                             yield video_formats[-1]
1327                     elif format_spec == 'worstvideo':
1328                         video_formats = [
1329                             f for f in formats
1330                             if f.get('acodec') == 'none']
1331                         if video_formats:
1332                             yield video_formats[0]
1333                     else:
1334                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1335                         if format_spec in extensions:
1336                             filter_f = lambda f: f['ext'] == format_spec
1337                         else:
1338                             filter_f = lambda f: f['format_id'] == format_spec
1339                         matches = list(filter(filter_f, formats))
1340                         if matches:
1341                             yield matches[-1]
1342             elif selector.type == MERGE:
1343                 def _merge(formats_pair):
1344                     format_1, format_2 = formats_pair
1345
1346                     formats_info = []
1347                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1348                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1349
1350                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1351                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1352
1353                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1354                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1355
1356                     output_ext = self.params.get('merge_output_format')
1357                     if not output_ext:
1358                         if the_only_video:
1359                             output_ext = the_only_video['ext']
1360                         elif the_only_audio and not video_fmts:
1361                             output_ext = the_only_audio['ext']
1362                         else:
1363                             output_ext = 'mkv'
1364
1365                     new_dict = {
1366                         'requested_formats': formats_info,
1367                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1368                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1369                         'ext': output_ext,
1370                     }
1371
1372                     if the_only_video:
1373                         new_dict.update({
1374                             'width': the_only_video.get('width'),
1375                             'height': the_only_video.get('height'),
1376                             'resolution': the_only_video.get('resolution'),
1377                             'fps': the_only_video.get('fps'),
1378                             'vcodec': the_only_video.get('vcodec'),
1379                             'vbr': the_only_video.get('vbr'),
1380                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1381                         })
1382
1383                     if the_only_audio:
1384                         new_dict.update({
1385                             'acodec': the_only_audio.get('acodec'),
1386                             'abr': the_only_audio.get('abr'),
1387                         })
1388
1389                     return new_dict
1390
1391                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1392
1393                 def selector_function(ctx):
1394                     for pair in itertools.product(
1395                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1396                         yield _merge(pair)
1397
1398             filters = [self._build_format_filter(f) for f in selector.filters]
1399
1400             def final_selector(ctx):
1401                 ctx_copy = copy.deepcopy(ctx)
1402                 for _filter in filters:
1403                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1404                 return selector_function(ctx_copy)
1405             return final_selector
1406
1407         stream = io.BytesIO(format_spec.encode('utf-8'))
1408         try:
1409             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1410         except tokenize.TokenError:
1411             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1412
1413         class TokenIterator(object):
1414             def __init__(self, tokens):
1415                 self.tokens = tokens
1416                 self.counter = 0
1417
1418             def __iter__(self):
1419                 return self
1420
1421             def __next__(self):
1422                 if self.counter >= len(self.tokens):
1423                     raise StopIteration()
1424                 value = self.tokens[self.counter]
1425                 self.counter += 1
1426                 return value
1427
1428             next = __next__
1429
1430             def restore_last_token(self):
1431                 self.counter -= 1
1432
1433         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1434         return _build_selector_function(parsed_selector)
1435
1436     def _calc_headers(self, info_dict):
1437         res = std_headers.copy()
1438
1439         add_headers = info_dict.get('http_headers')
1440         if add_headers:
1441             res.update(add_headers)
1442
1443         cookies = self._calc_cookies(info_dict)
1444         if cookies:
1445             res['Cookie'] = cookies
1446
1447         if 'X-Forwarded-For' not in res:
1448             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1449             if x_forwarded_for_ip:
1450                 res['X-Forwarded-For'] = x_forwarded_for_ip
1451
1452         return res
1453
1454     def _calc_cookies(self, info_dict):
1455         pr = sanitized_Request(info_dict['url'])
1456         self.cookiejar.add_cookie_header(pr)
1457         return pr.get_header('Cookie')
1458
1459     def process_video_result(self, info_dict, download=True):
1460         assert info_dict.get('_type', 'video') == 'video'
1461
1462         if 'id' not in info_dict:
1463             raise ExtractorError('Missing "id" field in extractor result')
1464         if 'title' not in info_dict:
1465             raise ExtractorError('Missing "title" field in extractor result')
1466
1467         def report_force_conversion(field, field_not, conversion):
1468             self.report_warning(
1469                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1470                 % (field, field_not, conversion))
1471
1472         def sanitize_string_field(info, string_field):
1473             field = info.get(string_field)
1474             if field is None or isinstance(field, compat_str):
1475                 return
1476             report_force_conversion(string_field, 'a string', 'string')
1477             info[string_field] = compat_str(field)
1478
1479         def sanitize_numeric_fields(info):
1480             for numeric_field in self._NUMERIC_FIELDS:
1481                 field = info.get(numeric_field)
1482                 if field is None or isinstance(field, compat_numeric_types):
1483                     continue
1484                 report_force_conversion(numeric_field, 'numeric', 'int')
1485                 info[numeric_field] = int_or_none(field)
1486
1487         sanitize_string_field(info_dict, 'id')
1488         sanitize_numeric_fields(info_dict)
1489
1490         if 'playlist' not in info_dict:
1491             # It isn't part of a playlist
1492             info_dict['playlist'] = None
1493             info_dict['playlist_index'] = None
1494
1495         thumbnails = info_dict.get('thumbnails')
1496         if thumbnails is None:
1497             thumbnail = info_dict.get('thumbnail')
1498             if thumbnail:
1499                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1500         if thumbnails:
1501             thumbnails.sort(key=lambda t: (
1502                 t.get('preference') if t.get('preference') is not None else -1,
1503                 t.get('width') if t.get('width') is not None else -1,
1504                 t.get('height') if t.get('height') is not None else -1,
1505                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1506             for i, t in enumerate(thumbnails):
1507                 t['url'] = sanitize_url(t['url'])
1508                 if t.get('width') and t.get('height'):
1509                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1510                 if t.get('id') is None:
1511                     t['id'] = '%d' % i
1512
1513         if self.params.get('list_thumbnails'):
1514             self.list_thumbnails(info_dict)
1515             return
1516
1517         thumbnail = info_dict.get('thumbnail')
1518         if thumbnail:
1519             info_dict['thumbnail'] = sanitize_url(thumbnail)
1520         elif thumbnails:
1521             info_dict['thumbnail'] = thumbnails[-1]['url']
1522
1523         if 'display_id' not in info_dict and 'id' in info_dict:
1524             info_dict['display_id'] = info_dict['id']
1525
1526         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1527             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1528             # see http://bugs.python.org/issue1646728)
1529             try:
1530                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1531                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1532             except (ValueError, OverflowError, OSError):
1533                 pass
1534
1535         # Auto generate title fields corresponding to the *_number fields when missing
1536         # in order to always have clean titles. This is very common for TV series.
1537         for field in ('chapter', 'season', 'episode'):
1538             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1539                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1540
1541         for cc_kind in ('subtitles', 'automatic_captions'):
1542             cc = info_dict.get(cc_kind)
1543             if cc:
1544                 for _, subtitle in cc.items():
1545                     for subtitle_format in subtitle:
1546                         if subtitle_format.get('url'):
1547                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1548                         if subtitle_format.get('ext') is None:
1549                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1550
1551         automatic_captions = info_dict.get('automatic_captions')
1552         subtitles = info_dict.get('subtitles')
1553
1554         if self.params.get('listsubtitles', False):
1555             if 'automatic_captions' in info_dict:
1556                 self.list_subtitles(
1557                     info_dict['id'], automatic_captions, 'automatic captions')
1558             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1559             return
1560
1561         info_dict['requested_subtitles'] = self.process_subtitles(
1562             info_dict['id'], subtitles, automatic_captions)
1563
1564         # We now pick which formats have to be downloaded
1565         if info_dict.get('formats') is None:
1566             # There's only one format available
1567             formats = [info_dict]
1568         else:
1569             formats = info_dict['formats']
1570
1571         if not formats:
1572             raise ExtractorError('No video formats found!')
1573
1574         def is_wellformed(f):
1575             url = f.get('url')
1576             if not url:
1577                 self.report_warning(
1578                     '"url" field is missing or empty - skipping format, '
1579                     'there is an error in extractor')
1580                 return False
1581             if isinstance(url, bytes):
1582                 sanitize_string_field(f, 'url')
1583             return True
1584
1585         # Filter out malformed formats for better extraction robustness
1586         formats = list(filter(is_wellformed, formats))
1587
1588         formats_dict = {}
1589
1590         # We check that all the formats have the format and format_id fields
1591         for i, format in enumerate(formats):
1592             sanitize_string_field(format, 'format_id')
1593             sanitize_numeric_fields(format)
1594             format['url'] = sanitize_url(format['url'])
1595             if not format.get('format_id'):
1596                 format['format_id'] = compat_str(i)
1597             else:
1598                 # Sanitize format_id from characters used in format selector expression
1599                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1600             format_id = format['format_id']
1601             if format_id not in formats_dict:
1602                 formats_dict[format_id] = []
1603             formats_dict[format_id].append(format)
1604
1605         # Make sure all formats have unique format_id
1606         for format_id, ambiguous_formats in formats_dict.items():
1607             if len(ambiguous_formats) > 1:
1608                 for i, format in enumerate(ambiguous_formats):
1609                     format['format_id'] = '%s-%d' % (format_id, i)
1610
1611         for i, format in enumerate(formats):
1612             if format.get('format') is None:
1613                 format['format'] = '{id} - {res}{note}'.format(
1614                     id=format['format_id'],
1615                     res=self.format_resolution(format),
1616                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1617                 )
1618             # Automatically determine file extension if missing
1619             if format.get('ext') is None:
1620                 format['ext'] = determine_ext(format['url']).lower()
1621             # Automatically determine protocol if missing (useful for format
1622             # selection purposes)
1623             if format.get('protocol') is None:
1624                 format['protocol'] = determine_protocol(format)
1625             # Add HTTP headers, so that external programs can use them from the
1626             # json output
1627             full_format_info = info_dict.copy()
1628             full_format_info.update(format)
1629             format['http_headers'] = self._calc_headers(full_format_info)
1630         # Remove private housekeeping stuff
1631         if '__x_forwarded_for_ip' in info_dict:
1632             del info_dict['__x_forwarded_for_ip']
1633
1634         # TODO Central sorting goes here
1635
1636         if formats[0] is not info_dict:
1637             # only set the 'formats' fields if the original info_dict list them
1638             # otherwise we end up with a circular reference, the first (and unique)
1639             # element in the 'formats' field in info_dict is info_dict itself,
1640             # which can't be exported to json
1641             info_dict['formats'] = formats
1642         if self.params.get('listformats'):
1643             self.list_formats(info_dict)
1644             return
1645
1646         req_format = self.params.get('format')
1647         if req_format is None:
1648             req_format = self._default_format_spec(info_dict, download=download)
1649             if self.params.get('verbose'):
1650                 self.to_stdout('[debug] Default format spec: %s' % req_format)
1651
1652         format_selector = self.build_format_selector(req_format)
1653
1654         # While in format selection we may need to have an access to the original
1655         # format set in order to calculate some metrics or do some processing.
1656         # For now we need to be able to guess whether original formats provided
1657         # by extractor are incomplete or not (i.e. whether extractor provides only
1658         # video-only or audio-only formats) for proper formats selection for
1659         # extractors with such incomplete formats (see
1660         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1661         # Since formats may be filtered during format selection and may not match
1662         # the original formats the results may be incorrect. Thus original formats
1663         # or pre-calculated metrics should be passed to format selection routines
1664         # as well.
1665         # We will pass a context object containing all necessary additional data
1666         # instead of just formats.
1667         # This fixes incorrect format selection issue (see
1668         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1669         incomplete_formats = (
1670             # All formats are video-only or
1671             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1672             # all formats are audio-only
1673             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1674
1675         ctx = {
1676             'formats': formats,
1677             'incomplete_formats': incomplete_formats,
1678         }
1679
1680         formats_to_download = list(format_selector(ctx))
1681         if not formats_to_download:
1682             raise ExtractorError('requested format not available',
1683                                  expected=True)
1684
1685         if download:
1686             if len(formats_to_download) > 1:
1687                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1688             for format in formats_to_download:
1689                 new_info = dict(info_dict)
1690                 new_info.update(format)
1691                 self.process_info(new_info)
1692         # We update the info dict with the best quality format (backwards compatibility)
1693         info_dict.update(formats_to_download[-1])
1694         return info_dict
1695
1696     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1697         """Select the requested subtitles and their format"""
1698         available_subs = {}
1699         if normal_subtitles and self.params.get('writesubtitles'):
1700             available_subs.update(normal_subtitles)
1701         if automatic_captions and self.params.get('writeautomaticsub'):
1702             for lang, cap_info in automatic_captions.items():
1703                 if lang not in available_subs:
1704                     available_subs[lang] = cap_info
1705
1706         if (not self.params.get('writesubtitles') and not
1707                 self.params.get('writeautomaticsub') or not
1708                 available_subs):
1709             return None
1710
1711         if self.params.get('allsubtitles', False):
1712             requested_langs = available_subs.keys()
1713         else:
1714             if self.params.get('subtitleslangs', False):
1715                 requested_langs = self.params.get('subtitleslangs')
1716             elif 'en' in available_subs:
1717                 requested_langs = ['en']
1718             else:
1719                 requested_langs = [list(available_subs.keys())[0]]
1720
1721         formats_query = self.params.get('subtitlesformat', 'best')
1722         formats_preference = formats_query.split('/') if formats_query else []
1723         subs = {}
1724         for lang in requested_langs:
1725             formats = available_subs.get(lang)
1726             if formats is None:
1727                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1728                 continue
1729             for ext in formats_preference:
1730                 if ext == 'best':
1731                     f = formats[-1]
1732                     break
1733                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1734                 if matches:
1735                     f = matches[-1]
1736                     break
1737             else:
1738                 f = formats[-1]
1739                 self.report_warning(
1740                     'No subtitle format found matching "%s" for language %s, '
1741                     'using %s' % (formats_query, lang, f['ext']))
1742             subs[lang] = f
1743         return subs
1744
1745     def __forced_printings(self, info_dict, filename, incomplete):
1746         def print_mandatory(field):
1747             if (self.params.get('force%s' % field, False)
1748                     and (not incomplete or info_dict.get(field) is not None)):
1749                 self.to_stdout(info_dict[field])
1750
1751         def print_optional(field):
1752             if (self.params.get('force%s' % field, False)
1753                     and info_dict.get(field) is not None):
1754                 self.to_stdout(info_dict[field])
1755
1756         print_mandatory('title')
1757         print_mandatory('id')
1758         if self.params.get('forceurl', False) and not incomplete:
1759             if info_dict.get('requested_formats') is not None:
1760                 for f in info_dict['requested_formats']:
1761                     self.to_stdout(f['url'] + f.get('play_path', ''))
1762             else:
1763                 # For RTMP URLs, also include the playpath
1764                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1765         print_optional('thumbnail')
1766         print_optional('description')
1767         if self.params.get('forcefilename', False) and filename is not None:
1768             self.to_stdout(filename)
1769         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1770             self.to_stdout(formatSeconds(info_dict['duration']))
1771         print_mandatory('format')
1772         if self.params.get('forcejson', False):
1773             self.to_stdout(json.dumps(info_dict))
1774
1775     def process_info(self, info_dict):
1776         """Process a single resolved IE result."""
1777
1778         assert info_dict.get('_type', 'video') == 'video'
1779
1780         max_downloads = self.params.get('max_downloads')
1781         if max_downloads is not None:
1782             if self._num_downloads >= int(max_downloads):
1783                 raise MaxDownloadsReached()
1784
1785         # TODO: backward compatibility, to be removed
1786         info_dict['fulltitle'] = info_dict['title']
1787
1788         if 'format' not in info_dict:
1789             info_dict['format'] = info_dict['ext']
1790
1791         reason = self._match_entry(info_dict, incomplete=False)
1792         if reason is not None:
1793             self.to_screen('[download] ' + reason)
1794             return
1795
1796         self._num_downloads += 1
1797
1798         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1799
1800         # Forced printings
1801         self.__forced_printings(info_dict, filename, incomplete=False)
1802
1803         # Do nothing else if in simulate mode
1804         if self.params.get('simulate', False):
1805             return
1806
1807         if filename is None:
1808             return
1809
1810         def ensure_dir_exists(path):
1811             try:
1812                 dn = os.path.dirname(path)
1813                 if dn and not os.path.exists(dn):
1814                     os.makedirs(dn)
1815                 return True
1816             except (OSError, IOError) as err:
1817                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1818                 return False
1819
1820         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1821             return
1822
1823         if self.params.get('writedescription', False):
1824             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1825             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1826                 self.to_screen('[info] Video description is already present')
1827             elif info_dict.get('description') is None:
1828                 self.report_warning('There\'s no description to write.')
1829             else:
1830                 try:
1831                     self.to_screen('[info] Writing video description to: ' + descfn)
1832                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1833                         descfile.write(info_dict['description'])
1834                 except (OSError, IOError):
1835                     self.report_error('Cannot write description file ' + descfn)
1836                     return
1837
1838         if self.params.get('writeannotations', False):
1839             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1840             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1841                 self.to_screen('[info] Video annotations are already present')
1842             elif not info_dict.get('annotations'):
1843                 self.report_warning('There are no annotations to write.')
1844             else:
1845                 try:
1846                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1847                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1848                         annofile.write(info_dict['annotations'])
1849                 except (KeyError, TypeError):
1850                     self.report_warning('There are no annotations to write.')
1851                 except (OSError, IOError):
1852                     self.report_error('Cannot write annotations file: ' + annofn)
1853                     return
1854
1855         def dl(name, info):
1856             fd = get_suitable_downloader(info, self.params)(self, self.params)
1857             for ph in self._progress_hooks:
1858                 fd.add_progress_hook(ph)
1859             if self.params.get('verbose'):
1860                 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1861             return fd.download(name, info)
1862
1863         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1864                                        self.params.get('writeautomaticsub')])
1865
1866         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1867             # subtitles download errors are already managed as troubles in relevant IE
1868             # that way it will silently go on when used with unsupporting IE
1869             subtitles = info_dict['requested_subtitles']
1870             ie = self.get_info_extractor(info_dict['extractor_key'])
1871             for sub_lang, sub_info in subtitles.items():
1872                 sub_format = sub_info['ext']
1873                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1874                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1875                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1876                 else:
1877                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1878                     if sub_info.get('data') is not None:
1879                         try:
1880                             # Use newline='' to prevent conversion of newline characters
1881                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1882                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1883                                 subfile.write(sub_info['data'])
1884                         except (OSError, IOError):
1885                             self.report_error('Cannot write subtitles file ' + sub_filename)
1886                             return
1887                     else:
1888                         try:
1889                             if self.params.get('sleep_interval_subtitles', False):
1890                                 dl(sub_filename, sub_info)
1891                             else:
1892                                 sub_data = ie._request_webpage(
1893                                     sub_info['url'], info_dict['id'], note=False).read()
1894                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1895                                     subfile.write(sub_data)
1896                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1897                             self.report_warning('Unable to download subtitle for "%s": %s' %
1898                                                 (sub_lang, error_to_compat_str(err)))
1899                             continue
1900
1901         if self.params.get('skip_download', False):
1902             if self.params.get('convertsubtitles', False):
1903                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1904                 filename_real_ext = os.path.splitext(filename)[1][1:]
1905                 filename_wo_ext = (
1906                     os.path.splitext(filename)[0]
1907                     if filename_real_ext == info_dict['ext']
1908                     else filename)
1909                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1910                 if subconv.available:
1911                     info_dict.setdefault('__postprocessors', [])
1912                     # info_dict['__postprocessors'].append(subconv)
1913                 if os.path.exists(encodeFilename(afilename)):
1914                     self.to_screen(
1915                         '[download] %s has already been downloaded and '
1916                         'converted' % afilename)
1917                 else:
1918                     try:
1919                         self.post_process(filename, info_dict)
1920                     except (PostProcessingError) as err:
1921                         self.report_error('postprocessing: %s' % str(err))
1922                         return
1923
1924         if self.params.get('writeinfojson', False):
1925             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1926             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1927                 self.to_screen('[info] Video description metadata is already present')
1928             else:
1929                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1930                 try:
1931                     write_json_file(self.filter_requested_info(info_dict), infofn)
1932                 except (OSError, IOError):
1933                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1934                     return
1935
1936         self._write_thumbnails(info_dict, filename)
1937
1938         if not self.params.get('skip_download', False):
1939             try:
1940                 if info_dict.get('requested_formats') is not None:
1941                     downloaded = []
1942                     success = True
1943                     merger = FFmpegMergerPP(self)
1944                     if not merger.available:
1945                         postprocessors = []
1946                         self.report_warning('You have requested multiple '
1947                                             'formats but ffmpeg or avconv are not installed.'
1948                                             ' The formats won\'t be merged.')
1949                     else:
1950                         postprocessors = [merger]
1951
1952                     def compatible_formats(formats):
1953                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
1954                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
1955                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
1956                         if len(video_formats) > 2 or len(audio_formats) > 2:
1957                             return False
1958
1959                         # Check extension
1960                         exts = set(format.get('ext') for format in formats)
1961                         COMPATIBLE_EXTS = (
1962                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
1963                             set(('webm',)),
1964                         )
1965                         for ext_sets in COMPATIBLE_EXTS:
1966                             if ext_sets.issuperset(exts):
1967                                 return True
1968                         # TODO: Check acodec/vcodec
1969                         return False
1970
1971                     filename_real_ext = os.path.splitext(filename)[1][1:]
1972                     filename_wo_ext = (
1973                         os.path.splitext(filename)[0]
1974                         if filename_real_ext == info_dict['ext']
1975                         else filename)
1976                     requested_formats = info_dict['requested_formats']
1977                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1978                         info_dict['ext'] = 'mkv'
1979                         self.report_warning(
1980                             'Requested formats are incompatible for merge and will be merged into mkv.')
1981                     # Ensure filename always has a correct extension for successful merge
1982                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1983                     if os.path.exists(encodeFilename(filename)):
1984                         self.to_screen(
1985                             '[download] %s has already been downloaded and '
1986                             'merged' % filename)
1987                     else:
1988                         for f in requested_formats:
1989                             new_info = dict(info_dict)
1990                             new_info.update(f)
1991                             fname = prepend_extension(
1992                                 self.prepare_filename(new_info),
1993                                 'f%s' % f['format_id'], new_info['ext'])
1994                             if not ensure_dir_exists(fname):
1995                                 return
1996                             downloaded.append(fname)
1997                             partial_success = dl(fname, new_info)
1998                             success = success and partial_success
1999                         info_dict['__postprocessors'] = postprocessors
2000                         info_dict['__files_to_merge'] = downloaded
2001                 else:
2002                     # Just a single file
2003                     success = dl(filename, info_dict)
2004             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2005                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2006                 return
2007             except (OSError, IOError) as err:
2008                 raise UnavailableVideoError(err)
2009             except (ContentTooShortError, ) as err:
2010                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2011                 return
2012
2013             if success and filename != '-':
2014                 # Fixup content
2015                 fixup_policy = self.params.get('fixup')
2016                 if fixup_policy is None:
2017                     fixup_policy = 'detect_or_warn'
2018
2019                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2020
2021                 stretched_ratio = info_dict.get('stretched_ratio')
2022                 if stretched_ratio is not None and stretched_ratio != 1:
2023                     if fixup_policy == 'warn':
2024                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2025                             info_dict['id'], stretched_ratio))
2026                     elif fixup_policy == 'detect_or_warn':
2027                         stretched_pp = FFmpegFixupStretchedPP(self)
2028                         if stretched_pp.available:
2029                             info_dict.setdefault('__postprocessors', [])
2030                             info_dict['__postprocessors'].append(stretched_pp)
2031                         else:
2032                             self.report_warning(
2033                                 '%s: Non-uniform pixel ratio (%s). %s'
2034                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2035                     else:
2036                         assert fixup_policy in ('ignore', 'never')
2037
2038                 if (info_dict.get('requested_formats') is None
2039                         and info_dict.get('container') == 'm4a_dash'):
2040                     if fixup_policy == 'warn':
2041                         self.report_warning(
2042                             '%s: writing DASH m4a. '
2043                             'Only some players support this container.'
2044                             % info_dict['id'])
2045                     elif fixup_policy == 'detect_or_warn':
2046                         fixup_pp = FFmpegFixupM4aPP(self)
2047                         if fixup_pp.available:
2048                             info_dict.setdefault('__postprocessors', [])
2049                             info_dict['__postprocessors'].append(fixup_pp)
2050                         else:
2051                             self.report_warning(
2052                                 '%s: writing DASH m4a. '
2053                                 'Only some players support this container. %s'
2054                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2055                     else:
2056                         assert fixup_policy in ('ignore', 'never')
2057
2058                 if (info_dict.get('protocol') == 'm3u8_native'
2059                         or info_dict.get('protocol') == 'm3u8'
2060                         and self.params.get('hls_prefer_native')):
2061                     if fixup_policy == 'warn':
2062                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2063                             info_dict['id']))
2064                     elif fixup_policy == 'detect_or_warn':
2065                         fixup_pp = FFmpegFixupM3u8PP(self)
2066                         if fixup_pp.available:
2067                             info_dict.setdefault('__postprocessors', [])
2068                             info_dict['__postprocessors'].append(fixup_pp)
2069                         else:
2070                             self.report_warning(
2071                                 '%s: malformed AAC bitstream detected. %s'
2072                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2073                     else:
2074                         assert fixup_policy in ('ignore', 'never')
2075
2076                 try:
2077                     self.post_process(filename, info_dict)
2078                 except (PostProcessingError) as err:
2079                     self.report_error('postprocessing: %s' % str(err))
2080                     return
2081                 self.record_download_archive(info_dict)
2082
2083     def download(self, url_list):
2084         """Download a given list of URLs."""
2085         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2086         if (len(url_list) > 1
2087                 and outtmpl != '-'
2088                 and '%' not in outtmpl
2089                 and self.params.get('max_downloads') != 1):
2090             raise SameFileError(outtmpl)
2091
2092         for url in url_list:
2093             try:
2094                 # It also downloads the videos
2095                 res = self.extract_info(
2096                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2097             except UnavailableVideoError:
2098                 self.report_error('unable to download video')
2099             except MaxDownloadsReached:
2100                 self.to_screen('[info] Maximum number of downloaded files reached.')
2101                 raise
2102             else:
2103                 if self.params.get('dump_single_json', False):
2104                     self.to_stdout(json.dumps(res))
2105
2106         return self._download_retcode
2107
2108     def download_with_info_file(self, info_filename):
2109         with contextlib.closing(fileinput.FileInput(
2110                 [info_filename], mode='r',
2111                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2112             # FileInput doesn't have a read method, we can't call json.load
2113             info = self.filter_requested_info(json.loads('\n'.join(f)))
2114         try:
2115             self.process_ie_result(info, download=True)
2116         except DownloadError:
2117             webpage_url = info.get('webpage_url')
2118             if webpage_url is not None:
2119                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2120                 return self.download([webpage_url])
2121             else:
2122                 raise
2123         return self._download_retcode
2124
2125     @staticmethod
2126     def filter_requested_info(info_dict):
2127         return dict(
2128             (k, v) for k, v in info_dict.items()
2129             if k not in ['requested_formats', 'requested_subtitles'])
2130
2131     def post_process(self, filename, ie_info):
2132         """Run all the postprocessors on the given file."""
2133         info = dict(ie_info)
2134         info['filepath'] = filename
2135         pps_chain = []
2136         if ie_info.get('__postprocessors') is not None:
2137             pps_chain.extend(ie_info['__postprocessors'])
2138         pps_chain.extend(self._pps)
2139         for pp in pps_chain:
2140             files_to_delete = []
2141             try:
2142                 files_to_delete, info = pp.run(info)
2143             except PostProcessingError as e:
2144                 self.report_error(e.msg)
2145             if files_to_delete and not self.params.get('keepvideo', False):
2146                 for old_filename in set(files_to_delete):
2147                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2148                     try:
2149                         os.remove(encodeFilename(old_filename))
2150                     except (IOError, OSError):
2151                         self.report_warning('Unable to remove downloaded original file')
2152
2153     def _make_archive_id(self, info_dict):
2154         video_id = info_dict.get('id')
2155         if not video_id:
2156             return
2157         # Future-proof against any change in case
2158         # and backwards compatibility with prior versions
2159         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2160         if extractor is None:
2161             url = str_or_none(info_dict.get('url'))
2162             if not url:
2163                 return
2164             # Try to find matching extractor for the URL and take its ie_key
2165             for ie in self._ies:
2166                 if ie.suitable(url):
2167                     extractor = ie.ie_key()
2168                     break
2169             else:
2170                 return
2171         return extractor.lower() + ' ' + video_id
2172
2173     def in_download_archive(self, info_dict):
2174         fn = self.params.get('download_archive')
2175         if fn is None:
2176             return False
2177
2178         vid_id = self._make_archive_id(info_dict)
2179         if not vid_id:
2180             return False  # Incomplete video information
2181
2182         return vid_id in self.archive
2183
2184     def record_download_archive(self, info_dict):
2185         fn = self.params.get('download_archive')
2186         if fn is None:
2187             return
2188         vid_id = self._make_archive_id(info_dict)
2189         assert vid_id
2190         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2191             archive_file.write(vid_id + '\n')
2192         self.archive.add(vid_id)
2193
2194     @staticmethod
2195     def format_resolution(format, default='unknown'):
2196         if format.get('vcodec') == 'none':
2197             return 'audio only'
2198         if format.get('resolution') is not None:
2199             return format['resolution']
2200         if format.get('height') is not None:
2201             if format.get('width') is not None:
2202                 res = '%sx%s' % (format['width'], format['height'])
2203             else:
2204                 res = '%sp' % format['height']
2205         elif format.get('width') is not None:
2206             res = '%dx?' % format['width']
2207         else:
2208             res = default
2209         return res
2210
2211     def _format_note(self, fdict):
2212         res = ''
2213         if fdict.get('ext') in ['f4f', 'f4m']:
2214             res += '(unsupported) '
2215         if fdict.get('language'):
2216             if res:
2217                 res += ' '
2218             res += '[%s] ' % fdict['language']
2219         if fdict.get('format_note') is not None:
2220             res += fdict['format_note'] + ' '
2221         if fdict.get('tbr') is not None:
2222             res += '%4dk ' % fdict['tbr']
2223         if fdict.get('container') is not None:
2224             if res:
2225                 res += ', '
2226             res += '%s container' % fdict['container']
2227         if (fdict.get('vcodec') is not None
2228                 and fdict.get('vcodec') != 'none'):
2229             if res:
2230                 res += ', '
2231             res += fdict['vcodec']
2232             if fdict.get('vbr') is not None:
2233                 res += '@'
2234         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2235             res += 'video@'
2236         if fdict.get('vbr') is not None:
2237             res += '%4dk' % fdict['vbr']
2238         if fdict.get('fps') is not None:
2239             if res:
2240                 res += ', '
2241             res += '%sfps' % fdict['fps']
2242         if fdict.get('acodec') is not None:
2243             if res:
2244                 res += ', '
2245             if fdict['acodec'] == 'none':
2246                 res += 'video only'
2247             else:
2248                 res += '%-5s' % fdict['acodec']
2249         elif fdict.get('abr') is not None:
2250             if res:
2251                 res += ', '
2252             res += 'audio'
2253         if fdict.get('abr') is not None:
2254             res += '@%3dk' % fdict['abr']
2255         if fdict.get('asr') is not None:
2256             res += ' (%5dHz)' % fdict['asr']
2257         if fdict.get('filesize') is not None:
2258             if res:
2259                 res += ', '
2260             res += format_bytes(fdict['filesize'])
2261         elif fdict.get('filesize_approx') is not None:
2262             if res:
2263                 res += ', '
2264             res += '~' + format_bytes(fdict['filesize_approx'])
2265         return res
2266
2267     def list_formats(self, info_dict):
2268         formats = info_dict.get('formats', [info_dict])
2269         table = [
2270             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2271             for f in formats
2272             if f.get('preference') is None or f['preference'] >= -1000]
2273         if len(formats) > 1:
2274             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2275
2276         header_line = ['format code', 'extension', 'resolution', 'note']
2277         self.to_screen(
2278             '[info] Available formats for %s:\n%s' %
2279             (info_dict['id'], render_table(header_line, table)))
2280
2281     def list_thumbnails(self, info_dict):
2282         thumbnails = info_dict.get('thumbnails')
2283         if not thumbnails:
2284             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2285             return
2286
2287         self.to_screen(
2288             '[info] Thumbnails for %s:' % info_dict['id'])
2289         self.to_screen(render_table(
2290             ['ID', 'width', 'height', 'URL'],
2291             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2292
2293     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2294         if not subtitles:
2295             self.to_screen('%s has no %s' % (video_id, name))
2296             return
2297         self.to_screen(
2298             'Available %s for %s:' % (name, video_id))
2299         self.to_screen(render_table(
2300             ['Language', 'formats'],
2301             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2302                 for lang, formats in subtitles.items()]))
2303
2304     def urlopen(self, req):
2305         """ Start an HTTP download """
2306         if isinstance(req, compat_basestring):
2307             req = sanitized_Request(req)
2308         return self._opener.open(req, timeout=self._socket_timeout)
2309
2310     def print_debug_header(self):
2311         if not self.params.get('verbose'):
2312             return
2313
2314         if type('') is not compat_str:
2315             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2316             self.report_warning(
2317                 'Your Python is broken! Update to a newer and supported version')
2318
2319         stdout_encoding = getattr(
2320             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2321         encoding_str = (
2322             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2323                 locale.getpreferredencoding(),
2324                 sys.getfilesystemencoding(),
2325                 stdout_encoding,
2326                 self.get_encoding()))
2327         write_string(encoding_str, encoding=None)
2328
2329         self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2330         if _LAZY_LOADER:
2331             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2332         try:
2333             sp = subprocess.Popen(
2334                 ['git', 'rev-parse', '--short', 'HEAD'],
2335                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2336                 cwd=os.path.dirname(os.path.abspath(__file__)))
2337             out, err = sp.communicate()
2338             out = out.decode().strip()
2339             if re.match('[0-9a-f]+', out):
2340                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2341         except Exception:
2342             try:
2343                 sys.exc_clear()
2344             except Exception:
2345                 pass
2346
2347         def python_implementation():
2348             impl_name = platform.python_implementation()
2349             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2350                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2351             return impl_name
2352
2353         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2354             platform.python_version(), python_implementation(),
2355             platform_name()))
2356
2357         exe_versions = FFmpegPostProcessor.get_versions(self)
2358         exe_versions['rtmpdump'] = rtmpdump_version()
2359         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2360         exe_str = ', '.join(
2361             '%s %s' % (exe, v)
2362             for exe, v in sorted(exe_versions.items())
2363             if v
2364         )
2365         if not exe_str:
2366             exe_str = 'none'
2367         self._write_string('[debug] exe versions: %s\n' % exe_str)
2368
2369         proxy_map = {}
2370         for handler in self._opener.handlers:
2371             if hasattr(handler, 'proxies'):
2372                 proxy_map.update(handler.proxies)
2373         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2374
2375         if self.params.get('call_home', False):
2376             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2377             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2378             latest_version = self.urlopen(
2379                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2380             if version_tuple(latest_version) > version_tuple(__version__):
2381                 self.report_warning(
2382                     'You are using an outdated version (newest version: %s)! '
2383                     'See https://yt-dl.org/update if you need help updating.' %
2384                     latest_version)
2385
2386     def _setup_opener(self):
2387         timeout_val = self.params.get('socket_timeout')
2388         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2389
2390         opts_cookiefile = self.params.get('cookiefile')
2391         opts_proxy = self.params.get('proxy')
2392
2393         if opts_cookiefile is None:
2394             self.cookiejar = compat_cookiejar.CookieJar()
2395         else:
2396             opts_cookiefile = expand_path(opts_cookiefile)
2397             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2398             if os.access(opts_cookiefile, os.R_OK):
2399                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2400
2401         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2402         if opts_proxy is not None:
2403             if opts_proxy == '':
2404                 proxies = {}
2405             else:
2406                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2407         else:
2408             proxies = compat_urllib_request.getproxies()
2409             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2410             if 'http' in proxies and 'https' not in proxies:
2411                 proxies['https'] = proxies['http']
2412         proxy_handler = PerRequestProxyHandler(proxies)
2413
2414         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2415         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2416         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2417         redirect_handler = YoutubeDLRedirectHandler()
2418         data_handler = compat_urllib_request_DataHandler()
2419
2420         # When passing our own FileHandler instance, build_opener won't add the
2421         # default FileHandler and allows us to disable the file protocol, which
2422         # can be used for malicious purposes (see
2423         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2424         file_handler = compat_urllib_request.FileHandler()
2425
2426         def file_open(*args, **kwargs):
2427             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2428         file_handler.file_open = file_open
2429
2430         opener = compat_urllib_request.build_opener(
2431             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2432
2433         # Delete the default user-agent header, which would otherwise apply in
2434         # cases where our custom HTTP handler doesn't come into play
2435         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2436         opener.addheaders = []
2437         self._opener = opener
2438
2439     def encode(self, s):
2440         if isinstance(s, bytes):
2441             return s  # Already encoded
2442
2443         try:
2444             return s.encode(self.get_encoding())
2445         except UnicodeEncodeError as err:
2446             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2447             raise
2448
2449     def get_encoding(self):
2450         encoding = self.params.get('encoding')
2451         if encoding is None:
2452             encoding = preferredencoding()
2453         return encoding
2454
2455     def _write_thumbnails(self, info_dict, filename):
2456         if self.params.get('writethumbnail', False):
2457             thumbnails = info_dict.get('thumbnails')
2458             if thumbnails:
2459                 thumbnails = [thumbnails[-1]]
2460         elif self.params.get('write_all_thumbnails', False):
2461             thumbnails = info_dict.get('thumbnails')
2462         else:
2463             return
2464
2465         if not thumbnails:
2466             # No thumbnails present, so return immediately
2467             return
2468
2469         for t in thumbnails:
2470             thumb_ext = determine_ext(t['url'], 'jpg')
2471             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2472             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2473             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2474
2475             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2476                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2477                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2478             else:
2479                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2480                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2481                 try:
2482                     uf = self.urlopen(t['url'])
2483                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2484                         shutil.copyfileobj(uf, thumbf)
2485                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2486                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2487                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2488                     self.report_warning('Unable to download thumbnail "%s": %s' %
2489                                         (t['url'], error_to_compat_str(err)))