youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DownloadError,
  55     encode_compat_str,
  56     encodeFilename,
  57     error_to_compat_str,
  58     expand_path,
  59     ExtractorError,
  60     format_bytes,
  61     formatSeconds,
  62     GeoRestrictedError,
  63     int_or_none,
  64     ISO3166Utils,
  65     locked_file,
  66     make_HTTPS_handler,
  67     MaxDownloadsReached,
  68     orderedSet,
  69     PagedList,
  70     parse_filesize,
  71     PerRequestProxyHandler,
  72     platform_name,
  73     PostProcessingError,
  74     preferredencoding,
  75     prepend_extension,
  76     register_socks_protocols,
  77     render_table,
  78     replace_extension,
  79     SameFileError,
  80     sanitize_filename,
  81     sanitize_path,
  82     sanitize_url,
  83     sanitized_Request,
  84     std_headers,
  85     str_or_none,
  86     subtitles_filename,
  87     UnavailableVideoError,
  88     url_basename,
  89     version_tuple,
  90     write_json_file,
  91     write_string,
  92     YoutubeDLCookieJar,
  93     YoutubeDLCookieProcessor,
  94     YoutubeDLHandler,
  95     YoutubeDLRedirectHandler,
  96 )
  97 from .cache import Cache
  98 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  99 from .extractor.openload import PhantomJSwrapper
 100 from .downloader import get_suitable_downloader
 101 from .downloader.rtmp import rtmpdump_version
 102 from .postprocessor import (
 103     FFmpegFixupM3u8PP,
 104     FFmpegFixupM4aPP,
 105     FFmpegFixupStretchedPP,
 106     FFmpegMergerPP,
 107     FFmpegPostProcessor,
 108     FFmpegSubtitlesConvertorPP,
 109     get_postprocessor,
 110 )
 111 from .version import __version__
 112
 113 if compat_os_name == 'nt':
 114     import ctypes
 115
 116
 117 class YoutubeDL(object):
 118     """YoutubeDL class.
 119
 120     YoutubeDL objects are the ones responsible of downloading the
 121     actual video file and writing it to disk if the user has requested
 122     it, among some other tasks. In most cases there should be one per
 123     program. As, given a video URL, the downloader doesn't know how to
 124     extract all the needed information, task that InfoExtractors do, it
 125     has to pass the URL to one of them.
 126
 127     For this, YoutubeDL objects have a method that allows
 128     InfoExtractors to be registered in a given order. When it is passed
 129     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 130     finds that reports being able to handle it. The InfoExtractor extracts
 131     all the information about the video or videos the URL refers to, and
 132     YoutubeDL process the extracted information, possibly using a File
 133     Downloader to download the video.
 134
 135     YoutubeDL objects accept a lot of parameters. In order not to saturate
 136     the object constructor with arguments, it receives a dictionary of
 137     options instead. These options are available through the params
 138     attribute for the InfoExtractors to use. The YoutubeDL also
 139     registers itself as the downloader in charge for the InfoExtractors
 140     that are added to it, so this is a "mutual registration".
 141
 142     Available options:
 143
 144     username:          Username for authentication purposes.
 145     password:          Password for authentication purposes.
 146     videopassword:     Password for accessing a video.
 147     ap_mso:            Adobe Pass multiple-system operator identifier.
 148     ap_username:       Multiple-system operator account username.
 149     ap_password:       Multiple-system operator account password.
 150     usenetrc:          Use netrc for authentication instead.
 151     verbose:           Print additional info to stdout.
 152     quiet:             Do not print messages to stdout.
 153     no_warnings:       Do not print out anything for warnings.
 154     forceurl:          Force printing final URL.
 155     forcetitle:        Force printing title.
 156     forceid:           Force printing ID.
 157     forcethumbnail:    Force printing thumbnail URL.
 158     forcedescription:  Force printing description.
 159     forcefilename:     Force printing final filename.
 160     forceduration:     Force printing duration.
 161     forcejson:         Force printing info_dict as JSON.
 162     dump_single_json:  Force printing the info_dict of the whole playlist
 163                        (or video) as a single JSON line.
 164     simulate:          Do not download the video files.
 165     format:            Video format code. See options.py for more information.
 166     outtmpl:           Template for output names.
 167     restrictfilenames: Do not allow "&" and spaces in file names
 168     ignoreerrors:      Do not stop on download errors.
 169     force_generic_extractor: Force downloader to use the generic extractor
 170     nooverwrites:      Prevent overwriting files.
 171     playliststart:     Playlist item to start at.
 172     playlistend:       Playlist item to end at.
 173     playlist_items:    Specific indices of playlist to download.
 174     playlistreverse:   Download playlist items in reverse order.
 175     playlistrandom:    Download playlist items in random order.
 176     matchtitle:        Download only matching titles.
 177     rejecttitle:       Reject downloads for matching titles.
 178     logger:            Log messages to a logging.Logger instance.
 179     logtostderr:       Log messages to stderr instead of stdout.
 180     writedescription:  Write the video description to a .description file
 181     writeinfojson:     Write the video description to a .info.json file
 182     writeannotations:  Write the video annotations to a .annotations.xml file
 183     writethumbnail:    Write the thumbnail image to a file
 184     write_all_thumbnails:  Write all thumbnail formats to files
 185     writesubtitles:    Write the video subtitles to a file
 186     writeautomaticsub: Write the automatically generated subtitles to a file
 187     allsubtitles:      Downloads all the subtitles of the video
 188                        (requires writesubtitles or writeautomaticsub)
 189     listsubtitles:     Lists all available subtitles for the video
 190     subtitlesformat:   The format code for subtitles
 191     subtitleslangs:    List of languages of the subtitles to download
 192     keepvideo:         Keep the video file after post-processing
 193     daterange:         A DateRange object, download only if the upload_date is in the range.
 194     skip_download:     Skip the actual download of the video file
 195     cachedir:          Location of the cache files in the filesystem.
 196                        False to disable filesystem cache.
 197     noplaylist:        Download single video instead of a playlist if in doubt.
 198     age_limit:         An integer representing the user's age in years.
 199                        Unsuitable videos for the given age are skipped.
 200     min_views:         An integer representing the minimum view count the video
 201                        must have in order to not be skipped.
 202                        Videos without view count information are always
 203                        downloaded. None for no limit.
 204     max_views:         An integer representing the maximum view count.
 205                        Videos that are more popular than that are not
 206                        downloaded.
 207                        Videos without view count information are always
 208                        downloaded. None for no limit.
 209     download_archive:  File name of a file where all downloads are recorded.
 210                        Videos already present in the file are not downloaded
 211                        again.
 212     cookiefile:        File name where cookies should be read from and dumped to.
 213     nocheckcertificate:Do not verify SSL certificates
 214     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 215                        At the moment, this is only supported by YouTube.
 216     proxy:             URL of the proxy server to use
 217     geo_verification_proxy:  URL of the proxy to use for IP address verification
 218                        on geo-restricted sites.
 219     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 220     bidi_workaround:   Work around buggy terminals without bidirectional text
 221                        support, using fridibi
 222     debug_printtraffic:Print out sent and received HTTP traffic
 223     include_ads:       Download ads as well
 224     default_search:    Prepend this string if an input url is not valid.
 225                        'auto' for elaborate guessing
 226     encoding:          Use this encoding instead of the system-specified.
 227     extract_flat:      Do not resolve URLs, return the immediate result.
 228                        Pass in 'in_playlist' to only show this behavior for
 229                        playlist items.
 230     postprocessors:    A list of dictionaries, each with an entry
 231                        * key:  The name of the postprocessor. See
 232                                youtube_dlc/postprocessor/__init__.py for a list.
 233                        as well as any further keyword arguments for the
 234                        postprocessor.
 235     progress_hooks:    A list of functions that get called on download
 236                        progress, with a dictionary with the entries
 237                        * status: One of "downloading", "error", or "finished".
 238                                  Check this first and ignore unknown values.
 239
 240                        If status is one of "downloading", or "finished", the
 241                        following properties may also be present:
 242                        * filename: The final filename (always present)
 243                        * tmpfilename: The filename we're currently writing to
 244                        * downloaded_bytes: Bytes on disk
 245                        * total_bytes: Size of the whole file, None if unknown
 246                        * total_bytes_estimate: Guess of the eventual file size,
 247                                                None if unavailable.
 248                        * elapsed: The number of seconds since download started.
 249                        * eta: The estimated time in seconds, None if unknown
 250                        * speed: The download speed in bytes/second, None if
 251                                 unknown
 252                        * fragment_index: The counter of the currently
 253                                          downloaded video fragment.
 254                        * fragment_count: The number of fragments (= individual
 255                                          files that will be merged)
 256
 257                        Progress hooks are guaranteed to be called at least once
 258                        (with status "finished") if the download is successful.
 259     merge_output_format: Extension to use when merging formats.
 260     fixup:             Automatically correct known faults of the file.
 261                        One of:
 262                        - "never": do nothing
 263                        - "warn": only emit a warning
 264                        - "detect_or_warn": check whether we can do anything
 265                                            about it, warn otherwise (default)
 266     source_address:    Client-side IP address to bind to.
 267     call_home:         Boolean, true iff we are allowed to contact the
 268                        youtube-dlc servers for debugging.
 269     sleep_interval:    Number of seconds to sleep before each download when
 270                        used alone or a lower bound of a range for randomized
 271                        sleep before each download (minimum possible number
 272                        of seconds to sleep) when used along with
 273                        max_sleep_interval.
 274     max_sleep_interval:Upper bound of a range for randomized sleep before each
 275                        download (maximum possible number of seconds to sleep).
 276                        Must only be used along with sleep_interval.
 277                        Actual sleep time will be a random float from range
 278                        [sleep_interval; max_sleep_interval].
 279     listformats:       Print an overview of available video formats and exit.
 280     list_thumbnails:   Print a table of all thumbnails and exit.
 281     match_filter:      A function that gets called with the info_dict of
 282                        every video.
 283                        If it returns a message, the video is ignored.
 284                        If it returns None, the video is downloaded.
 285                        match_filter_func in utils.py is one example for this.
 286     no_color:          Do not emit color codes in output.
 287     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 288                        HTTP header
 289     geo_bypass_country:
 290                        Two-letter ISO 3166-2 country code that will be used for
 291                        explicit geographic restriction bypassing via faking
 292                        X-Forwarded-For HTTP header
 293     geo_bypass_ip_block:
 294                        IP range in CIDR notation that will be used similarly to
 295                        geo_bypass_country
 296
 297     The following options determine which downloader is picked:
 298     external_downloader: Executable of the external downloader to call.
 299                        None or unset for standard (built-in) downloader.
 300     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 301                        if True, otherwise use ffmpeg/avconv if False, otherwise
 302                        use downloader suggested by extractor if None.
 303
 304     The following parameters are not used by YoutubeDL itself, they are used by
 305     the downloader (see youtube_dlc/downloader/common.py):
 306     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 307     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 308     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 309     http_chunk_size.
 310
 311     The following options are used by the post processors:
 312     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 313                        otherwise prefer ffmpeg.
 314     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 315                        to the binary or its containing directory.
 316     postprocessor_args: A list of additional command-line arguments for the
 317                         postprocessor.
 318
 319     The following options are used by the Youtube extractor:
 320     youtube_include_dash_manifest: If True (default), DASH manifests and related
 321                         data will be downloaded and processed by extractor.
 322                         You can reduce network I/O by disabling it if you don't
 323                         care about DASH.
 324     """
 325
 326     _NUMERIC_FIELDS = set((
 327         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 328         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 329         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 330         'average_rating', 'comment_count', 'age_limit',
 331         'start_time', 'end_time',
 332         'chapter_number', 'season_number', 'episode_number',
 333         'track_number', 'disc_number', 'release_year',
 334         'playlist_index',
 335     ))
 336
 337     params = None
 338     _ies = []
 339     _pps = []
 340     _download_retcode = None
 341     _num_downloads = None
 342     _screen_file = None
 343
 344     def __init__(self, params=None, auto_init=True):
 345         """Create a FileDownloader object with the given options."""
 346         if params is None:
 347             params = {}
 348         self._ies = []
 349         self._ies_instances = {}
 350         self._pps = []
 351         self._progress_hooks = []
 352         self._download_retcode = 0
 353         self._num_downloads = 0
 354         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 355         self._err_file = sys.stderr
 356         self.params = {
 357             # Default parameters
 358             'nocheckcertificate': False,
 359         }
 360         self.params.update(params)
 361         self.cache = Cache(self)
 362         self.archive = set()
 363
 364         """Preload the archive, if any is specified"""
 365         def preload_download_archive(self):
 366             fn = self.params.get('download_archive')
 367             if fn is None:
 368                 return False
 369             try:
 370                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 371                     for line in archive_file:
 372                         self.archive.add(line.strip())
 373             except IOError as ioe:
 374                 if ioe.errno != errno.ENOENT:
 375                     raise
 376                 return False
 377             return True
 378
 379         def check_deprecated(param, option, suggestion):
 380             if self.params.get(param) is not None:
 381                 self.report_warning(
 382                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 383                 return True
 384             return False
 385
 386         if self.params.get('verbose'):
 387             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 388
 389         preload_download_archive(self)
 390
 391         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 392             if self.params.get('geo_verification_proxy') is None:
 393                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 394
 395         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 396         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 397         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 398
 399         if params.get('bidi_workaround', False):
 400             try:
 401                 import pty
 402                 master, slave = pty.openpty()
 403                 width = compat_get_terminal_size().columns
 404                 if width is None:
 405                     width_args = []
 406                 else:
 407                     width_args = ['-w', str(width)]
 408                 sp_kwargs = dict(
 409                     stdin=subprocess.PIPE,
 410                     stdout=slave,
 411                     stderr=self._err_file)
 412                 try:
 413                     self._output_process = subprocess.Popen(
 414                         ['bidiv'] + width_args, **sp_kwargs
 415                     )
 416                 except OSError:
 417                     self._output_process = subprocess.Popen(
 418                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 419                 self._output_channel = os.fdopen(master, 'rb')
 420             except OSError as ose:
 421                 if ose.errno == errno.ENOENT:
 422                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 423                 else:
 424                     raise
 425
 426         if (sys.platform != 'win32'
 427                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 428                 and not params.get('restrictfilenames', False)):
 429             # Unicode filesystem API will throw errors (#1474, #13027)
 430             self.report_warning(
 431                 'Assuming --restrict-filenames since file system encoding '
 432                 'cannot encode all characters. '
 433                 'Set the LC_ALL environment variable to fix this.')
 434             self.params['restrictfilenames'] = True
 435
 436         if isinstance(params.get('outtmpl'), bytes):
 437             self.report_warning(
 438                 'Parameter outtmpl is bytes, but should be a unicode string. '
 439                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 440
 441         self._setup_opener()
 442
 443         if auto_init:
 444             self.print_debug_header()
 445             self.add_default_info_extractors()
 446
 447         for pp_def_raw in self.params.get('postprocessors', []):
 448             pp_class = get_postprocessor(pp_def_raw['key'])
 449             pp_def = dict(pp_def_raw)
 450             del pp_def['key']
 451             pp = pp_class(self, **compat_kwargs(pp_def))
 452             self.add_post_processor(pp)
 453
 454         for ph in self.params.get('progress_hooks', []):
 455             self.add_progress_hook(ph)
 456
 457         register_socks_protocols()
 458
 459     def warn_if_short_id(self, argv):
 460         # short YouTube ID starting with dash?
 461         idxs = [
 462             i for i, a in enumerate(argv)
 463             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 464         if idxs:
 465             correct_argv = (
 466                 ['youtube-dlc']
 467                 + [a for i, a in enumerate(argv) if i not in idxs]
 468                 + ['--'] + [argv[i] for i in idxs]
 469             )
 470             self.report_warning(
 471                 'Long argument string detected. '
 472                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 473                 args_to_str(correct_argv))
 474
 475     def add_info_extractor(self, ie):
 476         """Add an InfoExtractor object to the end of the list."""
 477         self._ies.append(ie)
 478         if not isinstance(ie, type):
 479             self._ies_instances[ie.ie_key()] = ie
 480             ie.set_downloader(self)
 481
 482     def get_info_extractor(self, ie_key):
 483         """
 484         Get an instance of an IE with name ie_key, it will try to get one from
 485         the _ies list, if there's no instance it will create a new one and add
 486         it to the extractor list.
 487         """
 488         ie = self._ies_instances.get(ie_key)
 489         if ie is None:
 490             ie = get_info_extractor(ie_key)()
 491             self.add_info_extractor(ie)
 492         return ie
 493
 494     def add_default_info_extractors(self):
 495         """
 496         Add the InfoExtractors returned by gen_extractors to the end of the list
 497         """
 498         for ie in gen_extractor_classes():
 499             self.add_info_extractor(ie)
 500
 501     def add_post_processor(self, pp):
 502         """Add a PostProcessor object to the end of the chain."""
 503         self._pps.append(pp)
 504         pp.set_downloader(self)
 505
 506     def add_progress_hook(self, ph):
 507         """Add the progress hook (currently only for the file downloader)"""
 508         self._progress_hooks.append(ph)
 509
 510     def _bidi_workaround(self, message):
 511         if not hasattr(self, '_output_channel'):
 512             return message
 513
 514         assert hasattr(self, '_output_process')
 515         assert isinstance(message, compat_str)
 516         line_count = message.count('\n') + 1
 517         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 518         self._output_process.stdin.flush()
 519         res = ''.join(self._output_channel.readline().decode('utf-8')
 520                       for _ in range(line_count))
 521         return res[:-len('\n')]
 522
 523     def to_screen(self, message, skip_eol=False):
 524         """Print message to stdout if not in quiet mode."""
 525         return self.to_stdout(message, skip_eol, check_quiet=True)
 526
 527     def _write_string(self, s, out=None):
 528         write_string(s, out=out, encoding=self.params.get('encoding'))
 529
 530     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 531         """Print message to stdout if not in quiet mode."""
 532         if self.params.get('logger'):
 533             self.params['logger'].debug(message)
 534         elif not check_quiet or not self.params.get('quiet', False):
 535             message = self._bidi_workaround(message)
 536             terminator = ['\n', ''][skip_eol]
 537             output = message + terminator
 538
 539             self._write_string(output, self._screen_file)
 540
 541     def to_stderr(self, message):
 542         """Print message to stderr."""
 543         assert isinstance(message, compat_str)
 544         if self.params.get('logger'):
 545             self.params['logger'].error(message)
 546         else:
 547             message = self._bidi_workaround(message)
 548             output = message + '\n'
 549             self._write_string(output, self._err_file)
 550
 551     def to_console_title(self, message):
 552         if not self.params.get('consoletitle', False):
 553             return
 554         if compat_os_name == 'nt':
 555             if ctypes.windll.kernel32.GetConsoleWindow():
 556                 # c_wchar_p() might not be necessary if `message` is
 557                 # already of type unicode()
 558                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 559         elif 'TERM' in os.environ:
 560             self._write_string('\033]0;%s\007' % message, self._screen_file)
 561
 562     def save_console_title(self):
 563         if not self.params.get('consoletitle', False):
 564             return
 565         if self.params.get('simulate', False):
 566             return
 567         if compat_os_name != 'nt' and 'TERM' in os.environ:
 568             # Save the title on stack
 569             self._write_string('\033[22;0t', self._screen_file)
 570
 571     def restore_console_title(self):
 572         if not self.params.get('consoletitle', False):
 573             return
 574         if self.params.get('simulate', False):
 575             return
 576         if compat_os_name != 'nt' and 'TERM' in os.environ:
 577             # Restore the title from stack
 578             self._write_string('\033[23;0t', self._screen_file)
 579
 580     def __enter__(self):
 581         self.save_console_title()
 582         return self
 583
 584     def __exit__(self, *args):
 585         self.restore_console_title()
 586
 587         if self.params.get('cookiefile') is not None:
 588             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 589
 590     def trouble(self, message=None, tb=None):
 591         """Determine action to take when a download problem appears.
 592
 593         Depending on if the downloader has been configured to ignore
 594         download errors or not, this method may throw an exception or
 595         not when errors are found, after printing the message.
 596
 597         tb, if given, is additional traceback information.
 598         """
 599         if message is not None:
 600             self.to_stderr(message)
 601         if self.params.get('verbose'):
 602             if tb is None:
 603                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 604                     tb = ''
 605                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 606                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 607                     tb += encode_compat_str(traceback.format_exc())
 608                 else:
 609                     tb_data = traceback.format_list(traceback.extract_stack())
 610                     tb = ''.join(tb_data)
 611             self.to_stderr(tb)
 612         if not self.params.get('ignoreerrors', False):
 613             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 614                 exc_info = sys.exc_info()[1].exc_info
 615             else:
 616                 exc_info = sys.exc_info()
 617             raise DownloadError(message, exc_info)
 618         self._download_retcode = 1
 619
 620     def report_warning(self, message):
 621         '''
 622         Print the message to stderr, it will be prefixed with 'WARNING:'
 623         If stderr is a tty file the 'WARNING:' will be colored
 624         '''
 625         if self.params.get('logger') is not None:
 626             self.params['logger'].warning(message)
 627         else:
 628             if self.params.get('no_warnings'):
 629                 return
 630             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 631                 _msg_header = '\033[0;33mWARNING:\033[0m'
 632             else:
 633                 _msg_header = 'WARNING:'
 634             warning_message = '%s %s' % (_msg_header, message)
 635             self.to_stderr(warning_message)
 636
 637     def report_error(self, message, tb=None):
 638         '''
 639         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 640         in red if stderr is a tty file.
 641         '''
 642         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 643             _msg_header = '\033[0;31mERROR:\033[0m'
 644         else:
 645             _msg_header = 'ERROR:'
 646         error_message = '%s %s' % (_msg_header, message)
 647         self.trouble(error_message, tb)
 648
 649     def report_file_already_downloaded(self, file_name):
 650         """Report file has already been fully downloaded."""
 651         try:
 652             self.to_screen('[download] %s has already been downloaded' % file_name)
 653         except UnicodeEncodeError:
 654             self.to_screen('[download] The file has already been downloaded')
 655
 656     def prepare_filename(self, info_dict):
 657         """Generate the output filename."""
 658         try:
 659             template_dict = dict(info_dict)
 660
 661             template_dict['epoch'] = int(time.time())
 662             autonumber_size = self.params.get('autonumber_size')
 663             if autonumber_size is None:
 664                 autonumber_size = 5
 665             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 666             if template_dict.get('resolution') is None:
 667                 if template_dict.get('width') and template_dict.get('height'):
 668                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 669                 elif template_dict.get('height'):
 670                     template_dict['resolution'] = '%sp' % template_dict['height']
 671                 elif template_dict.get('width'):
 672                     template_dict['resolution'] = '%dx?' % template_dict['width']
 673
 674             sanitize = lambda k, v: sanitize_filename(
 675                 compat_str(v),
 676                 restricted=self.params.get('restrictfilenames'),
 677                 is_id=(k == 'id' or k.endswith('_id')))
 678             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 679                                  for k, v in template_dict.items()
 680                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 681             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 682
 683             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 684
 685             # For fields playlist_index and autonumber convert all occurrences
 686             # of %(field)s to %(field)0Nd for backward compatibility
 687             field_size_compat_map = {
 688                 'playlist_index': len(str(template_dict['n_entries'])),
 689                 'autonumber': autonumber_size,
 690             }
 691             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 692             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 693             if mobj:
 694                 outtmpl = re.sub(
 695                     FIELD_SIZE_COMPAT_RE,
 696                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 697                     outtmpl)
 698
 699             # Missing numeric fields used together with integer presentation types
 700             # in format specification will break the argument substitution since
 701             # string 'NA' is returned for missing fields. We will patch output
 702             # template for missing fields to meet string presentation type.
 703             for numeric_field in self._NUMERIC_FIELDS:
 704                 if numeric_field not in template_dict:
 705                     # As of [1] format syntax is:
 706                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 707                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 708                     FORMAT_RE = r'''(?x)
 709                         (?<!%)
 710                         %
 711                         \({0}\)  # mapping key
 712                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 713                         (?:\d+)?  # minimum field width (optional)
 714                         (?:\.\d+)?  # precision (optional)
 715                         [hlL]?  # length modifier (optional)
 716                         [diouxXeEfFgGcrs%]  # conversion type
 717                     '''
 718                     outtmpl = re.sub(
 719                         FORMAT_RE.format(numeric_field),
 720                         r'%({0})s'.format(numeric_field), outtmpl)
 721
 722             # expand_path translates '%%' into '%' and '$$' into '$'
 723             # correspondingly that is not what we want since we need to keep
 724             # '%%' intact for template dict substitution step. Working around
 725             # with boundary-alike separator hack.
 726             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 727             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 728
 729             # outtmpl should be expand_path'ed before template dict substitution
 730             # because meta fields may contain env variables we don't want to
 731             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 732             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 733             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 734
 735             # Temporary fix for #4787
 736             # 'Treat' all problem characters by passing filename through preferredencoding
 737             # to workaround encoding issues with subprocess on python2 @ Windows
 738             if sys.version_info < (3, 0) and sys.platform == 'win32':
 739                 filename = encodeFilename(filename, True).decode(preferredencoding())
 740             return sanitize_path(filename)
 741         except ValueError as err:
 742             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 743             return None
 744
 745     def _match_entry(self, info_dict, incomplete):
 746         """ Returns None if the file should be downloaded """
 747
 748         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 749         if 'title' in info_dict:
 750             # This can happen when we're just evaluating the playlist
 751             title = info_dict['title']
 752             matchtitle = self.params.get('matchtitle', False)
 753             if matchtitle:
 754                 if not re.search(matchtitle, title, re.IGNORECASE):
 755                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 756             rejecttitle = self.params.get('rejecttitle', False)
 757             if rejecttitle:
 758                 if re.search(rejecttitle, title, re.IGNORECASE):
 759                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 760         date = info_dict.get('upload_date')
 761         if date is not None:
 762             dateRange = self.params.get('daterange', DateRange())
 763             if date not in dateRange:
 764                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 765         view_count = info_dict.get('view_count')
 766         if view_count is not None:
 767             min_views = self.params.get('min_views')
 768             if min_views is not None and view_count < min_views:
 769                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 770             max_views = self.params.get('max_views')
 771             if max_views is not None and view_count > max_views:
 772                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 773         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 774             return 'Skipping "%s" because it is age restricted' % video_title
 775         if self.in_download_archive(info_dict):
 776             return '%s has already been recorded in archive' % video_title
 777
 778         if not incomplete:
 779             match_filter = self.params.get('match_filter')
 780             if match_filter is not None:
 781                 ret = match_filter(info_dict)
 782                 if ret is not None:
 783                     return ret
 784
 785         return None
 786
 787     @staticmethod
 788     def add_extra_info(info_dict, extra_info):
 789         '''Set the keys from extra_info in info dict if they are missing'''
 790         for key, value in extra_info.items():
 791             info_dict.setdefault(key, value)
 792
 793     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 794                      process=True, force_generic_extractor=False):
 795         '''
 796         Returns a list with a dictionary for each video we find.
 797         If 'download', also downloads the videos.
 798         extra_info is a dict containing the extra values to add to each result
 799         '''
 800
 801         if not ie_key and force_generic_extractor:
 802             ie_key = 'Generic'
 803
 804         if ie_key:
 805             ies = [self.get_info_extractor(ie_key)]
 806         else:
 807             ies = self._ies
 808
 809         for ie in ies:
 810             if not ie.suitable(url):
 811                 continue
 812
 813             ie = self.get_info_extractor(ie.ie_key())
 814             if not ie.working():
 815                 self.report_warning('The program functionality for this site has been marked as broken, '
 816                                     'and will probably not work.')
 817
 818             try:
 819                 ie_result = ie.extract(url)
 820                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 821                     break
 822                 if isinstance(ie_result, list):
 823                     # Backwards compatibility: old IE result format
 824                     ie_result = {
 825                         '_type': 'compat_list',
 826                         'entries': ie_result,
 827                     }
 828                 self.add_default_extra_info(ie_result, ie, url)
 829                 if process:
 830                     return self.process_ie_result(ie_result, download, extra_info)
 831                 else:
 832                     return ie_result
 833             except GeoRestrictedError as e:
 834                 msg = e.msg
 835                 if e.countries:
 836                     msg += '\nThis video is available in %s.' % ', '.join(
 837                         map(ISO3166Utils.short2full, e.countries))
 838                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 839                 self.report_error(msg)
 840                 break
 841             except ExtractorError as e:  # An error we somewhat expected
 842                 self.report_error(compat_str(e), e.format_traceback())
 843                 break
 844             except MaxDownloadsReached:
 845                 raise
 846             except Exception as e:
 847                 if self.params.get('ignoreerrors', False):
 848                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 849                     break
 850                 else:
 851                     raise
 852         else:
 853             self.report_error('no suitable InfoExtractor for URL %s' % url)
 854
 855     def add_default_extra_info(self, ie_result, ie, url):
 856         self.add_extra_info(ie_result, {
 857             'extractor': ie.IE_NAME,
 858             'webpage_url': url,
 859             'webpage_url_basename': url_basename(url),
 860             'extractor_key': ie.ie_key(),
 861         })
 862
 863     def process_ie_result(self, ie_result, download=True, extra_info={}):
 864         """
 865         Take the result of the ie(may be modified) and resolve all unresolved
 866         references (URLs, playlist items).
 867
 868         It will also download the videos if 'download'.
 869         Returns the resolved ie_result.
 870         """
 871         result_type = ie_result.get('_type', 'video')
 872
 873         if result_type in ('url', 'url_transparent'):
 874             ie_result['url'] = sanitize_url(ie_result['url'])
 875             extract_flat = self.params.get('extract_flat', False)
 876             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 877                     or extract_flat is True):
 878                 self.__forced_printings(
 879                     ie_result, self.prepare_filename(ie_result),
 880                     incomplete=True)
 881                 return ie_result
 882
 883         if result_type == 'video':
 884             self.add_extra_info(ie_result, extra_info)
 885             return self.process_video_result(ie_result, download=download)
 886         elif result_type == 'url':
 887             # We have to add extra_info to the results because it may be
 888             # contained in a playlist
 889             return self.extract_info(ie_result['url'],
 890                                      download,
 891                                      ie_key=ie_result.get('ie_key'),
 892                                      extra_info=extra_info)
 893         elif result_type == 'url_transparent':
 894             # Use the information from the embedding page
 895             info = self.extract_info(
 896                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 897                 extra_info=extra_info, download=False, process=False)
 898
 899             # extract_info may return None when ignoreerrors is enabled and
 900             # extraction failed with an error, don't crash and return early
 901             # in this case
 902             if not info:
 903                 return info
 904
 905             force_properties = dict(
 906                 (k, v) for k, v in ie_result.items() if v is not None)
 907             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 908                 if f in force_properties:
 909                     del force_properties[f]
 910             new_result = info.copy()
 911             new_result.update(force_properties)
 912
 913             # Extracted info may not be a video result (i.e.
 914             # info.get('_type', 'video') != video) but rather an url or
 915             # url_transparent. In such cases outer metadata (from ie_result)
 916             # should be propagated to inner one (info). For this to happen
 917             # _type of info should be overridden with url_transparent. This
 918             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 919             if new_result.get('_type') == 'url':
 920                 new_result['_type'] = 'url_transparent'
 921
 922             return self.process_ie_result(
 923                 new_result, download=download, extra_info=extra_info)
 924         elif result_type in ('playlist', 'multi_video'):
 925             # We process each entry in the playlist
 926             playlist = ie_result.get('title') or ie_result.get('id')
 927             self.to_screen('[download] Downloading playlist: %s' % playlist)
 928
 929             playlist_results = []
 930
 931             playliststart = self.params.get('playliststart', 1) - 1
 932             playlistend = self.params.get('playlistend')
 933             # For backwards compatibility, interpret -1 as whole list
 934             if playlistend == -1:
 935                 playlistend = None
 936
 937             playlistitems_str = self.params.get('playlist_items')
 938             playlistitems = None
 939             if playlistitems_str is not None:
 940                 def iter_playlistitems(format):
 941                     for string_segment in format.split(','):
 942                         if '-' in string_segment:
 943                             start, end = string_segment.split('-')
 944                             for item in range(int(start), int(end) + 1):
 945                                 yield int(item)
 946                         else:
 947                             yield int(string_segment)
 948                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
 949
 950             ie_entries = ie_result['entries']
 951
 952             def make_playlistitems_entries(list_ie_entries):
 953                 num_entries = len(list_ie_entries)
 954                 return [
 955                     list_ie_entries[i - 1] for i in playlistitems
 956                     if -num_entries <= i - 1 < num_entries]
 957
 958             def report_download(num_entries):
 959                 self.to_screen(
 960                     '[%s] playlist %s: Downloading %d videos' %
 961                     (ie_result['extractor'], playlist, num_entries))
 962
 963             if isinstance(ie_entries, list):
 964                 n_all_entries = len(ie_entries)
 965                 if playlistitems:
 966                     entries = make_playlistitems_entries(ie_entries)
 967                 else:
 968                     entries = ie_entries[playliststart:playlistend]
 969                 n_entries = len(entries)
 970                 self.to_screen(
 971                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 972                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 973             elif isinstance(ie_entries, PagedList):
 974                 if playlistitems:
 975                     entries = []
 976                     for item in playlistitems:
 977                         entries.extend(ie_entries.getslice(
 978                             item - 1, item
 979                         ))
 980                 else:
 981                     entries = ie_entries.getslice(
 982                         playliststart, playlistend)
 983                 n_entries = len(entries)
 984                 report_download(n_entries)
 985             else:  # iterable
 986                 if playlistitems:
 987                     entries = make_playlistitems_entries(list(itertools.islice(
 988                         ie_entries, 0, max(playlistitems))))
 989                 else:
 990                     entries = list(itertools.islice(
 991                         ie_entries, playliststart, playlistend))
 992                 n_entries = len(entries)
 993                 report_download(n_entries)
 994
 995             if self.params.get('playlistreverse', False):
 996                 entries = entries[::-1]
 997
 998             if self.params.get('playlistrandom', False):
 999                 random.shuffle(entries)
1000
1001             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1002
1003             for i, entry in enumerate(entries, 1):
1004                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1005                 # This __x_forwarded_for_ip thing is a bit ugly but requires
1006                 # minimal changes
1007                 if x_forwarded_for:
1008                     entry['__x_forwarded_for_ip'] = x_forwarded_for
1009                 extra = {
1010                     'n_entries': n_entries,
1011                     'playlist': playlist,
1012                     'playlist_id': ie_result.get('id'),
1013                     'playlist_title': ie_result.get('title'),
1014                     'playlist_uploader': ie_result.get('uploader'),
1015                     'playlist_uploader_id': ie_result.get('uploader_id'),
1016                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1017                     'extractor': ie_result['extractor'],
1018                     'webpage_url': ie_result['webpage_url'],
1019                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1020                     'extractor_key': ie_result['extractor_key'],
1021                 }
1022
1023                 reason = self._match_entry(entry, incomplete=True)
1024                 if reason is not None:
1025                     self.to_screen('[download] ' + reason)
1026                     continue
1027
1028                 entry_result = self.process_ie_result(entry,
1029                                                       download=download,
1030                                                       extra_info=extra)
1031                 playlist_results.append(entry_result)
1032             ie_result['entries'] = playlist_results
1033             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1034             return ie_result
1035         elif result_type == 'compat_list':
1036             self.report_warning(
1037                 'Extractor %s returned a compat_list result. '
1038                 'It needs to be updated.' % ie_result.get('extractor'))
1039
1040             def _fixup(r):
1041                 self.add_extra_info(
1042                     r,
1043                     {
1044                         'extractor': ie_result['extractor'],
1045                         'webpage_url': ie_result['webpage_url'],
1046                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1047                         'extractor_key': ie_result['extractor_key'],
1048                     }
1049                 )
1050                 return r
1051             ie_result['entries'] = [
1052                 self.process_ie_result(_fixup(r), download, extra_info)
1053                 for r in ie_result['entries']
1054             ]
1055             return ie_result
1056         else:
1057             raise Exception('Invalid result type: %s' % result_type)
1058
1059     def _build_format_filter(self, filter_spec):
1060         " Returns a function to filter the formats according to the filter_spec "
1061
1062         OPERATORS = {
1063             '<': operator.lt,
1064             '<=': operator.le,
1065             '>': operator.gt,
1066             '>=': operator.ge,
1067             '=': operator.eq,
1068             '!=': operator.ne,
1069         }
1070         operator_rex = re.compile(r'''(?x)\s*
1071             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1072             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1073             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1074             $
1075             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1076         m = operator_rex.search(filter_spec)
1077         if m:
1078             try:
1079                 comparison_value = int(m.group('value'))
1080             except ValueError:
1081                 comparison_value = parse_filesize(m.group('value'))
1082                 if comparison_value is None:
1083                     comparison_value = parse_filesize(m.group('value') + 'B')
1084                 if comparison_value is None:
1085                     raise ValueError(
1086                         'Invalid value %r in format specification %r' % (
1087                             m.group('value'), filter_spec))
1088             op = OPERATORS[m.group('op')]
1089
1090         if not m:
1091             STR_OPERATORS = {
1092                 '=': operator.eq,
1093                 '^=': lambda attr, value: attr.startswith(value),
1094                 '$=': lambda attr, value: attr.endswith(value),
1095                 '*=': lambda attr, value: value in attr,
1096             }
1097             str_operator_rex = re.compile(r'''(?x)
1098                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1099                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1100                 \s*(?P<value>[a-zA-Z0-9._-]+)
1101                 \s*$
1102                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1103             m = str_operator_rex.search(filter_spec)
1104             if m:
1105                 comparison_value = m.group('value')
1106                 str_op = STR_OPERATORS[m.group('op')]
1107                 if m.group('negation'):
1108                     op = lambda attr, value: not str_op(attr, value)
1109                 else:
1110                     op = str_op
1111
1112         if not m:
1113             raise ValueError('Invalid filter specification %r' % filter_spec)
1114
1115         def _filter(f):
1116             actual_value = f.get(m.group('key'))
1117             if actual_value is None:
1118                 return m.group('none_inclusive')
1119             return op(actual_value, comparison_value)
1120         return _filter
1121
1122     def _default_format_spec(self, info_dict, download=True):
1123
1124         def can_merge():
1125             merger = FFmpegMergerPP(self)
1126             return merger.available and merger.can_merge()
1127
1128         def prefer_best():
1129             if self.params.get('simulate', False):
1130                 return False
1131             if not download:
1132                 return False
1133             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1134                 return True
1135             if info_dict.get('is_live'):
1136                 return True
1137             if not can_merge():
1138                 return True
1139             return False
1140
1141         req_format_list = ['bestvideo+bestaudio', 'best']
1142         if prefer_best():
1143             req_format_list.reverse()
1144         return '/'.join(req_format_list)
1145
1146     def build_format_selector(self, format_spec):
1147         def syntax_error(note, start):
1148             message = (
1149                 'Invalid format specification: '
1150                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1151             return SyntaxError(message)
1152
1153         PICKFIRST = 'PICKFIRST'
1154         MERGE = 'MERGE'
1155         SINGLE = 'SINGLE'
1156         GROUP = 'GROUP'
1157         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1158
1159         def _parse_filter(tokens):
1160             filter_parts = []
1161             for type, string, start, _, _ in tokens:
1162                 if type == tokenize.OP and string == ']':
1163                     return ''.join(filter_parts)
1164                 else:
1165                     filter_parts.append(string)
1166
1167         def _remove_unused_ops(tokens):
1168             # Remove operators that we don't use and join them with the surrounding strings
1169             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1170             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1171             last_string, last_start, last_end, last_line = None, None, None, None
1172             for type, string, start, end, line in tokens:
1173                 if type == tokenize.OP and string == '[':
1174                     if last_string:
1175                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1176                         last_string = None
1177                     yield type, string, start, end, line
1178                     # everything inside brackets will be handled by _parse_filter
1179                     for type, string, start, end, line in tokens:
1180                         yield type, string, start, end, line
1181                         if type == tokenize.OP and string == ']':
1182                             break
1183                 elif type == tokenize.OP and string in ALLOWED_OPS:
1184                     if last_string:
1185                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1186                         last_string = None
1187                     yield type, string, start, end, line
1188                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1189                     if not last_string:
1190                         last_string = string
1191                         last_start = start
1192                         last_end = end
1193                     else:
1194                         last_string += string
1195             if last_string:
1196                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1197
1198         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1199             selectors = []
1200             current_selector = None
1201             for type, string, start, _, _ in tokens:
1202                 # ENCODING is only defined in python 3.x
1203                 if type == getattr(tokenize, 'ENCODING', None):
1204                     continue
1205                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1206                     current_selector = FormatSelector(SINGLE, string, [])
1207                 elif type == tokenize.OP:
1208                     if string == ')':
1209                         if not inside_group:
1210                             # ')' will be handled by the parentheses group
1211                             tokens.restore_last_token()
1212                         break
1213                     elif inside_merge and string in ['/', ',']:
1214                         tokens.restore_last_token()
1215                         break
1216                     elif inside_choice and string == ',':
1217                         tokens.restore_last_token()
1218                         break
1219                     elif string == ',':
1220                         if not current_selector:
1221                             raise syntax_error('"," must follow a format selector', start)
1222                         selectors.append(current_selector)
1223                         current_selector = None
1224                     elif string == '/':
1225                         if not current_selector:
1226                             raise syntax_error('"/" must follow a format selector', start)
1227                         first_choice = current_selector
1228                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1229                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1230                     elif string == '[':
1231                         if not current_selector:
1232                             current_selector = FormatSelector(SINGLE, 'best', [])
1233                         format_filter = _parse_filter(tokens)
1234                         current_selector.filters.append(format_filter)
1235                     elif string == '(':
1236                         if current_selector:
1237                             raise syntax_error('Unexpected "("', start)
1238                         group = _parse_format_selection(tokens, inside_group=True)
1239                         current_selector = FormatSelector(GROUP, group, [])
1240                     elif string == '+':
1241                         if not current_selector:
1242                             raise syntax_error('Unexpected "+"', start)
1243                         selector_1 = current_selector
1244                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1245                         if not selector_2:
1246                             raise syntax_error('Expected a selector', start)
1247                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1248                     else:
1249                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1250                 elif type == tokenize.ENDMARKER:
1251                     break
1252             if current_selector:
1253                 selectors.append(current_selector)
1254             return selectors
1255
1256         def _build_selector_function(selector):
1257             if isinstance(selector, list):
1258                 fs = [_build_selector_function(s) for s in selector]
1259
1260                 def selector_function(ctx):
1261                     for f in fs:
1262                         for format in f(ctx):
1263                             yield format
1264                 return selector_function
1265             elif selector.type == GROUP:
1266                 selector_function = _build_selector_function(selector.selector)
1267             elif selector.type == PICKFIRST:
1268                 fs = [_build_selector_function(s) for s in selector.selector]
1269
1270                 def selector_function(ctx):
1271                     for f in fs:
1272                         picked_formats = list(f(ctx))
1273                         if picked_formats:
1274                             return picked_formats
1275                     return []
1276             elif selector.type == SINGLE:
1277                 format_spec = selector.selector
1278
1279                 def selector_function(ctx):
1280                     formats = list(ctx['formats'])
1281                     if not formats:
1282                         return
1283                     if format_spec == 'all':
1284                         for f in formats:
1285                             yield f
1286                     elif format_spec in ['best', 'worst', None]:
1287                         format_idx = 0 if format_spec == 'worst' else -1
1288                         audiovideo_formats = [
1289                             f for f in formats
1290                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1291                         if audiovideo_formats:
1292                             yield audiovideo_formats[format_idx]
1293                         # for extractors with incomplete formats (audio only (soundcloud)
1294                         # or video only (imgur)) we will fallback to best/worst
1295                         # {video,audio}-only format
1296                         elif ctx['incomplete_formats']:
1297                             yield formats[format_idx]
1298                     elif format_spec == 'bestaudio':
1299                         audio_formats = [
1300                             f for f in formats
1301                             if f.get('vcodec') == 'none']
1302                         if audio_formats:
1303                             yield audio_formats[-1]
1304                     elif format_spec == 'worstaudio':
1305                         audio_formats = [
1306                             f for f in formats
1307                             if f.get('vcodec') == 'none']
1308                         if audio_formats:
1309                             yield audio_formats[0]
1310                     elif format_spec == 'bestvideo':
1311                         video_formats = [
1312                             f for f in formats
1313                             if f.get('acodec') == 'none']
1314                         if video_formats:
1315                             yield video_formats[-1]
1316                     elif format_spec == 'worstvideo':
1317                         video_formats = [
1318                             f for f in formats
1319                             if f.get('acodec') == 'none']
1320                         if video_formats:
1321                             yield video_formats[0]
1322                     else:
1323                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1324                         if format_spec in extensions:
1325                             filter_f = lambda f: f['ext'] == format_spec
1326                         else:
1327                             filter_f = lambda f: f['format_id'] == format_spec
1328                         matches = list(filter(filter_f, formats))
1329                         if matches:
1330                             yield matches[-1]
1331             elif selector.type == MERGE:
1332                 def _merge(formats_pair):
1333                     format_1, format_2 = formats_pair
1334
1335                     formats_info = []
1336                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1337                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1338
1339                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1340                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1341
1342                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1343                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1344
1345                     output_ext = self.params.get('merge_output_format')
1346                     if not output_ext:
1347                         if the_only_video:
1348                             output_ext = the_only_video['ext']
1349                         elif the_only_audio and not video_fmts:
1350                             output_ext = the_only_audio['ext']
1351                         else:
1352                             output_ext = 'mkv'
1353
1354                     new_dict = {
1355                         'requested_formats': formats_info,
1356                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1357                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1358                         'ext': output_ext,
1359                     }
1360
1361                     if the_only_video:
1362                         new_dict.update({
1363                             'width': the_only_video.get('width'),
1364                             'height': the_only_video.get('height'),
1365                             'resolution': the_only_video.get('resolution'),
1366                             'fps': the_only_video.get('fps'),
1367                             'vcodec': the_only_video.get('vcodec'),
1368                             'vbr': the_only_video.get('vbr'),
1369                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1370                         })
1371
1372                     if the_only_audio:
1373                         new_dict.update({
1374                             'acodec': the_only_audio.get('acodec'),
1375                             'abr': the_only_audio.get('abr'),
1376                         })
1377
1378                     return new_dict
1379
1380                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1381
1382                 def selector_function(ctx):
1383                     for pair in itertools.product(
1384                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1385                         yield _merge(pair)
1386
1387             filters = [self._build_format_filter(f) for f in selector.filters]
1388
1389             def final_selector(ctx):
1390                 ctx_copy = copy.deepcopy(ctx)
1391                 for _filter in filters:
1392                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1393                 return selector_function(ctx_copy)
1394             return final_selector
1395
1396         stream = io.BytesIO(format_spec.encode('utf-8'))
1397         try:
1398             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1399         except tokenize.TokenError:
1400             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1401
1402         class TokenIterator(object):
1403             def __init__(self, tokens):
1404                 self.tokens = tokens
1405                 self.counter = 0
1406
1407             def __iter__(self):
1408                 return self
1409
1410             def __next__(self):
1411                 if self.counter >= len(self.tokens):
1412                     raise StopIteration()
1413                 value = self.tokens[self.counter]
1414                 self.counter += 1
1415                 return value
1416
1417             next = __next__
1418
1419             def restore_last_token(self):
1420                 self.counter -= 1
1421
1422         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1423         return _build_selector_function(parsed_selector)
1424
1425     def _calc_headers(self, info_dict):
1426         res = std_headers.copy()
1427
1428         add_headers = info_dict.get('http_headers')
1429         if add_headers:
1430             res.update(add_headers)
1431
1432         cookies = self._calc_cookies(info_dict)
1433         if cookies:
1434             res['Cookie'] = cookies
1435
1436         if 'X-Forwarded-For' not in res:
1437             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1438             if x_forwarded_for_ip:
1439                 res['X-Forwarded-For'] = x_forwarded_for_ip
1440
1441         return res
1442
1443     def _calc_cookies(self, info_dict):
1444         pr = sanitized_Request(info_dict['url'])
1445         self.cookiejar.add_cookie_header(pr)
1446         return pr.get_header('Cookie')
1447
1448     def process_video_result(self, info_dict, download=True):
1449         assert info_dict.get('_type', 'video') == 'video'
1450
1451         if 'id' not in info_dict:
1452             raise ExtractorError('Missing "id" field in extractor result')
1453         if 'title' not in info_dict:
1454             raise ExtractorError('Missing "title" field in extractor result')
1455
1456         def report_force_conversion(field, field_not, conversion):
1457             self.report_warning(
1458                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1459                 % (field, field_not, conversion))
1460
1461         def sanitize_string_field(info, string_field):
1462             field = info.get(string_field)
1463             if field is None or isinstance(field, compat_str):
1464                 return
1465             report_force_conversion(string_field, 'a string', 'string')
1466             info[string_field] = compat_str(field)
1467
1468         def sanitize_numeric_fields(info):
1469             for numeric_field in self._NUMERIC_FIELDS:
1470                 field = info.get(numeric_field)
1471                 if field is None or isinstance(field, compat_numeric_types):
1472                     continue
1473                 report_force_conversion(numeric_field, 'numeric', 'int')
1474                 info[numeric_field] = int_or_none(field)
1475
1476         sanitize_string_field(info_dict, 'id')
1477         sanitize_numeric_fields(info_dict)
1478
1479         if 'playlist' not in info_dict:
1480             # It isn't part of a playlist
1481             info_dict['playlist'] = None
1482             info_dict['playlist_index'] = None
1483
1484         thumbnails = info_dict.get('thumbnails')
1485         if thumbnails is None:
1486             thumbnail = info_dict.get('thumbnail')
1487             if thumbnail:
1488                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1489         if thumbnails:
1490             thumbnails.sort(key=lambda t: (
1491                 t.get('preference') if t.get('preference') is not None else -1,
1492                 t.get('width') if t.get('width') is not None else -1,
1493                 t.get('height') if t.get('height') is not None else -1,
1494                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1495             for i, t in enumerate(thumbnails):
1496                 t['url'] = sanitize_url(t['url'])
1497                 if t.get('width') and t.get('height'):
1498                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1499                 if t.get('id') is None:
1500                     t['id'] = '%d' % i
1501
1502         if self.params.get('list_thumbnails'):
1503             self.list_thumbnails(info_dict)
1504             return
1505
1506         thumbnail = info_dict.get('thumbnail')
1507         if thumbnail:
1508             info_dict['thumbnail'] = sanitize_url(thumbnail)
1509         elif thumbnails:
1510             info_dict['thumbnail'] = thumbnails[-1]['url']
1511
1512         if 'display_id' not in info_dict and 'id' in info_dict:
1513             info_dict['display_id'] = info_dict['id']
1514
1515         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1516             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1517             # see http://bugs.python.org/issue1646728)
1518             try:
1519                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1520                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1521             except (ValueError, OverflowError, OSError):
1522                 pass
1523
1524         # Auto generate title fields corresponding to the *_number fields when missing
1525         # in order to always have clean titles. This is very common for TV series.
1526         for field in ('chapter', 'season', 'episode'):
1527             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1528                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1529
1530         for cc_kind in ('subtitles', 'automatic_captions'):
1531             cc = info_dict.get(cc_kind)
1532             if cc:
1533                 for _, subtitle in cc.items():
1534                     for subtitle_format in subtitle:
1535                         if subtitle_format.get('url'):
1536                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1537                         if subtitle_format.get('ext') is None:
1538                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1539
1540         automatic_captions = info_dict.get('automatic_captions')
1541         subtitles = info_dict.get('subtitles')
1542
1543         if self.params.get('listsubtitles', False):
1544             if 'automatic_captions' in info_dict:
1545                 self.list_subtitles(
1546                     info_dict['id'], automatic_captions, 'automatic captions')
1547             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1548             return
1549
1550         info_dict['requested_subtitles'] = self.process_subtitles(
1551             info_dict['id'], subtitles, automatic_captions)
1552
1553         # We now pick which formats have to be downloaded
1554         if info_dict.get('formats') is None:
1555             # There's only one format available
1556             formats = [info_dict]
1557         else:
1558             formats = info_dict['formats']
1559
1560         if not formats:
1561             raise ExtractorError('No video formats found!')
1562
1563         def is_wellformed(f):
1564             url = f.get('url')
1565             if not url:
1566                 self.report_warning(
1567                     '"url" field is missing or empty - skipping format, '
1568                     'there is an error in extractor')
1569                 return False
1570             if isinstance(url, bytes):
1571                 sanitize_string_field(f, 'url')
1572             return True
1573
1574         # Filter out malformed formats for better extraction robustness
1575         formats = list(filter(is_wellformed, formats))
1576
1577         formats_dict = {}
1578
1579         # We check that all the formats have the format and format_id fields
1580         for i, format in enumerate(formats):
1581             sanitize_string_field(format, 'format_id')
1582             sanitize_numeric_fields(format)
1583             format['url'] = sanitize_url(format['url'])
1584             if not format.get('format_id'):
1585                 format['format_id'] = compat_str(i)
1586             else:
1587                 # Sanitize format_id from characters used in format selector expression
1588                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1589             format_id = format['format_id']
1590             if format_id not in formats_dict:
1591                 formats_dict[format_id] = []
1592             formats_dict[format_id].append(format)
1593
1594         # Make sure all formats have unique format_id
1595         for format_id, ambiguous_formats in formats_dict.items():
1596             if len(ambiguous_formats) > 1:
1597                 for i, format in enumerate(ambiguous_formats):
1598                     format['format_id'] = '%s-%d' % (format_id, i)
1599
1600         for i, format in enumerate(formats):
1601             if format.get('format') is None:
1602                 format['format'] = '{id} - {res}{note}'.format(
1603                     id=format['format_id'],
1604                     res=self.format_resolution(format),
1605                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1606                 )
1607             # Automatically determine file extension if missing
1608             if format.get('ext') is None:
1609                 format['ext'] = determine_ext(format['url']).lower()
1610             # Automatically determine protocol if missing (useful for format
1611             # selection purposes)
1612             if format.get('protocol') is None:
1613                 format['protocol'] = determine_protocol(format)
1614             # Add HTTP headers, so that external programs can use them from the
1615             # json output
1616             full_format_info = info_dict.copy()
1617             full_format_info.update(format)
1618             format['http_headers'] = self._calc_headers(full_format_info)
1619         # Remove private housekeeping stuff
1620         if '__x_forwarded_for_ip' in info_dict:
1621             del info_dict['__x_forwarded_for_ip']
1622
1623         # TODO Central sorting goes here
1624
1625         if formats[0] is not info_dict:
1626             # only set the 'formats' fields if the original info_dict list them
1627             # otherwise we end up with a circular reference, the first (and unique)
1628             # element in the 'formats' field in info_dict is info_dict itself,
1629             # which can't be exported to json
1630             info_dict['formats'] = formats
1631         if self.params.get('listformats'):
1632             self.list_formats(info_dict)
1633             return
1634
1635         req_format = self.params.get('format')
1636         if req_format is None:
1637             req_format = self._default_format_spec(info_dict, download=download)
1638             if self.params.get('verbose'):
1639                 self.to_stdout('[debug] Default format spec: %s' % req_format)
1640
1641         format_selector = self.build_format_selector(req_format)
1642
1643         # While in format selection we may need to have an access to the original
1644         # format set in order to calculate some metrics or do some processing.
1645         # For now we need to be able to guess whether original formats provided
1646         # by extractor are incomplete or not (i.e. whether extractor provides only
1647         # video-only or audio-only formats) for proper formats selection for
1648         # extractors with such incomplete formats (see
1649         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1650         # Since formats may be filtered during format selection and may not match
1651         # the original formats the results may be incorrect. Thus original formats
1652         # or pre-calculated metrics should be passed to format selection routines
1653         # as well.
1654         # We will pass a context object containing all necessary additional data
1655         # instead of just formats.
1656         # This fixes incorrect format selection issue (see
1657         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1658         incomplete_formats = (
1659             # All formats are video-only or
1660             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1661             # all formats are audio-only
1662             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1663
1664         ctx = {
1665             'formats': formats,
1666             'incomplete_formats': incomplete_formats,
1667         }
1668
1669         formats_to_download = list(format_selector(ctx))
1670         if not formats_to_download:
1671             raise ExtractorError('requested format not available',
1672                                  expected=True)
1673
1674         if download:
1675             if len(formats_to_download) > 1:
1676                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1677             for format in formats_to_download:
1678                 new_info = dict(info_dict)
1679                 new_info.update(format)
1680                 self.process_info(new_info)
1681         # We update the info dict with the best quality format (backwards compatibility)
1682         info_dict.update(formats_to_download[-1])
1683         return info_dict
1684
1685     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1686         """Select the requested subtitles and their format"""
1687         available_subs = {}
1688         if normal_subtitles and self.params.get('writesubtitles'):
1689             available_subs.update(normal_subtitles)
1690         if automatic_captions and self.params.get('writeautomaticsub'):
1691             for lang, cap_info in automatic_captions.items():
1692                 if lang not in available_subs:
1693                     available_subs[lang] = cap_info
1694
1695         if (not self.params.get('writesubtitles') and not
1696                 self.params.get('writeautomaticsub') or not
1697                 available_subs):
1698             return None
1699
1700         if self.params.get('allsubtitles', False):
1701             requested_langs = available_subs.keys()
1702         else:
1703             if self.params.get('subtitleslangs', False):
1704                 requested_langs = self.params.get('subtitleslangs')
1705             elif 'en' in available_subs:
1706                 requested_langs = ['en']
1707             else:
1708                 requested_langs = [list(available_subs.keys())[0]]
1709
1710         formats_query = self.params.get('subtitlesformat', 'best')
1711         formats_preference = formats_query.split('/') if formats_query else []
1712         subs = {}
1713         for lang in requested_langs:
1714             formats = available_subs.get(lang)
1715             if formats is None:
1716                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1717                 continue
1718             for ext in formats_preference:
1719                 if ext == 'best':
1720                     f = formats[-1]
1721                     break
1722                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1723                 if matches:
1724                     f = matches[-1]
1725                     break
1726             else:
1727                 f = formats[-1]
1728                 self.report_warning(
1729                     'No subtitle format found matching "%s" for language %s, '
1730                     'using %s' % (formats_query, lang, f['ext']))
1731             subs[lang] = f
1732         return subs
1733
1734     def __forced_printings(self, info_dict, filename, incomplete):
1735         def print_mandatory(field):
1736             if (self.params.get('force%s' % field, False)
1737                     and (not incomplete or info_dict.get(field) is not None)):
1738                 self.to_stdout(info_dict[field])
1739
1740         def print_optional(field):
1741             if (self.params.get('force%s' % field, False)
1742                     and info_dict.get(field) is not None):
1743                 self.to_stdout(info_dict[field])
1744
1745         print_mandatory('title')
1746         print_mandatory('id')
1747         if self.params.get('forceurl', False) and not incomplete:
1748             if info_dict.get('requested_formats') is not None:
1749                 for f in info_dict['requested_formats']:
1750                     self.to_stdout(f['url'] + f.get('play_path', ''))
1751             else:
1752                 # For RTMP URLs, also include the playpath
1753                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1754         print_optional('thumbnail')
1755         print_optional('description')
1756         if self.params.get('forcefilename', False) and filename is not None:
1757             self.to_stdout(filename)
1758         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1759             self.to_stdout(formatSeconds(info_dict['duration']))
1760         print_mandatory('format')
1761         if self.params.get('forcejson', False):
1762             self.to_stdout(json.dumps(info_dict))
1763
1764     def process_info(self, info_dict):
1765         """Process a single resolved IE result."""
1766
1767         assert info_dict.get('_type', 'video') == 'video'
1768
1769         max_downloads = self.params.get('max_downloads')
1770         if max_downloads is not None:
1771             if self._num_downloads >= int(max_downloads):
1772                 raise MaxDownloadsReached()
1773
1774         # TODO: backward compatibility, to be removed
1775         info_dict['fulltitle'] = info_dict['title']
1776
1777         if 'format' not in info_dict:
1778             info_dict['format'] = info_dict['ext']
1779
1780         reason = self._match_entry(info_dict, incomplete=False)
1781         if reason is not None:
1782             self.to_screen('[download] ' + reason)
1783             return
1784
1785         self._num_downloads += 1
1786
1787         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1788
1789         # Forced printings
1790         self.__forced_printings(info_dict, filename, incomplete=False)
1791
1792         # Do nothing else if in simulate mode
1793         if self.params.get('simulate', False):
1794             return
1795
1796         if filename is None:
1797             return
1798
1799         def ensure_dir_exists(path):
1800             try:
1801                 dn = os.path.dirname(path)
1802                 if dn and not os.path.exists(dn):
1803                     os.makedirs(dn)
1804                 return True
1805             except (OSError, IOError) as err:
1806                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1807                 return False
1808
1809         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1810             return
1811
1812         if self.params.get('writedescription', False):
1813             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1814             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1815                 self.to_screen('[info] Video description is already present')
1816             elif info_dict.get('description') is None:
1817                 self.report_warning('There\'s no description to write.')
1818             else:
1819                 try:
1820                     self.to_screen('[info] Writing video description to: ' + descfn)
1821                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1822                         descfile.write(info_dict['description'])
1823                 except (OSError, IOError):
1824                     self.report_error('Cannot write description file ' + descfn)
1825                     return
1826
1827         if self.params.get('writeannotations', False):
1828             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1829             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1830                 self.to_screen('[info] Video annotations are already present')
1831             elif not info_dict.get('annotations'):
1832                 self.report_warning('There are no annotations to write.')
1833             else:
1834                 try:
1835                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1836                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1837                         annofile.write(info_dict['annotations'])
1838                 except (KeyError, TypeError):
1839                     self.report_warning('There are no annotations to write.')
1840                 except (OSError, IOError):
1841                     self.report_error('Cannot write annotations file: ' + annofn)
1842                     return
1843
1844         def dl(name, info):
1845             fd = get_suitable_downloader(info, self.params)(self, self.params)
1846             for ph in self._progress_hooks:
1847                 fd.add_progress_hook(ph)
1848             if self.params.get('verbose'):
1849                 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1850             return fd.download(name, info)
1851
1852         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1853                                        self.params.get('writeautomaticsub')])
1854
1855         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1856             # subtitles download errors are already managed as troubles in relevant IE
1857             # that way it will silently go on when used with unsupporting IE
1858             subtitles = info_dict['requested_subtitles']
1859             for sub_lang, sub_info in subtitles.items():
1860                 sub_format = sub_info['ext']
1861                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1862                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1863                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1864                 else:
1865                     if sub_info.get('data') is not None:
1866                         try:
1867                             # Use newline='' to prevent conversion of newline characters
1868                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1869                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1870                                 subfile.write(sub_info['data'])
1871                         except (OSError, IOError):
1872                             self.report_error('Cannot write subtitles file ' + sub_filename)
1873                             return
1874                     else:
1875                         try:
1876                             dl(sub_filename, sub_info)
1877                         except (ExtractorError, IOError, OSError, ValueError,
1878                                 compat_urllib_error.URLError,
1879                                 compat_http_client.HTTPException,
1880                                 socket.error) as err:
1881                             self.report_warning('Unable to download subtitle for "%s": %s' %
1882                                                 (sub_lang, error_to_compat_str(err)))
1883                             continue
1884
1885         if self.params.get('skip_download', False):
1886             if self.params.get('convertsubtitles', False):
1887                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1888                 filename_real_ext = os.path.splitext(filename)[1][1:]
1889                 filename_wo_ext = (
1890                     os.path.splitext(filename)[0]
1891                     if filename_real_ext == info_dict['ext']
1892                     else filename)
1893                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1894                 if subconv.available:
1895                     info_dict.setdefault('__postprocessors', [])
1896                     # info_dict['__postprocessors'].append(subconv)
1897                 if os.path.exists(encodeFilename(afilename)):
1898                     self.to_screen(
1899                         '[download] %s has already been downloaded and '
1900                         'converted' % afilename)
1901                 else:
1902                     try:
1903                         self.post_process(filename, info_dict)
1904                     except (PostProcessingError) as err:
1905                         self.report_error('postprocessing: %s' % str(err))
1906                         return
1907
1908         if self.params.get('writeinfojson', False):
1909             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1910             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1911                 self.to_screen('[info] Video description metadata is already present')
1912             else:
1913                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1914                 try:
1915                     write_json_file(self.filter_requested_info(info_dict), infofn)
1916                 except (OSError, IOError):
1917                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1918                     return
1919
1920         self._write_thumbnails(info_dict, filename)
1921
1922         if not self.params.get('skip_download', False):
1923             try:
1924                 if info_dict.get('requested_formats') is not None:
1925                     downloaded = []
1926                     success = True
1927                     merger = FFmpegMergerPP(self)
1928                     if not merger.available:
1929                         postprocessors = []
1930                         self.report_warning('You have requested multiple '
1931                                             'formats but ffmpeg or avconv are not installed.'
1932                                             ' The formats won\'t be merged.')
1933                     else:
1934                         postprocessors = [merger]
1935
1936                     def compatible_formats(formats):
1937                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
1938                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
1939                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
1940                         if len(video_formats) > 2 or len(audio_formats) > 2:
1941                             return False
1942
1943                         # Check extension
1944                         exts = set(format.get('ext') for format in formats)
1945                         COMPATIBLE_EXTS = (
1946                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
1947                             set(('webm',)),
1948                         )
1949                         for ext_sets in COMPATIBLE_EXTS:
1950                             if ext_sets.issuperset(exts):
1951                                 return True
1952                         # TODO: Check acodec/vcodec
1953                         return False
1954
1955                     filename_real_ext = os.path.splitext(filename)[1][1:]
1956                     filename_wo_ext = (
1957                         os.path.splitext(filename)[0]
1958                         if filename_real_ext == info_dict['ext']
1959                         else filename)
1960                     requested_formats = info_dict['requested_formats']
1961                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1962                         info_dict['ext'] = 'mkv'
1963                         self.report_warning(
1964                             'Requested formats are incompatible for merge and will be merged into mkv.')
1965                     # Ensure filename always has a correct extension for successful merge
1966                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1967                     if os.path.exists(encodeFilename(filename)):
1968                         self.to_screen(
1969                             '[download] %s has already been downloaded and '
1970                             'merged' % filename)
1971                     else:
1972                         for f in requested_formats:
1973                             new_info = dict(info_dict)
1974                             new_info.update(f)
1975                             fname = prepend_extension(
1976                                 self.prepare_filename(new_info),
1977                                 'f%s' % f['format_id'], new_info['ext'])
1978                             if not ensure_dir_exists(fname):
1979                                 return
1980                             downloaded.append(fname)
1981                             partial_success = dl(fname, new_info)
1982                             success = success and partial_success
1983                         info_dict['__postprocessors'] = postprocessors
1984                         info_dict['__files_to_merge'] = downloaded
1985                 else:
1986                     # Just a single file
1987                     success = dl(filename, info_dict)
1988             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1989                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1990                 return
1991             except (OSError, IOError) as err:
1992                 raise UnavailableVideoError(err)
1993             except (ContentTooShortError, ) as err:
1994                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1995                 return
1996
1997             if success and filename != '-':
1998                 # Fixup content
1999                 fixup_policy = self.params.get('fixup')
2000                 if fixup_policy is None:
2001                     fixup_policy = 'detect_or_warn'
2002
2003                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2004
2005                 stretched_ratio = info_dict.get('stretched_ratio')
2006                 if stretched_ratio is not None and stretched_ratio != 1:
2007                     if fixup_policy == 'warn':
2008                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2009                             info_dict['id'], stretched_ratio))
2010                     elif fixup_policy == 'detect_or_warn':
2011                         stretched_pp = FFmpegFixupStretchedPP(self)
2012                         if stretched_pp.available:
2013                             info_dict.setdefault('__postprocessors', [])
2014                             info_dict['__postprocessors'].append(stretched_pp)
2015                         else:
2016                             self.report_warning(
2017                                 '%s: Non-uniform pixel ratio (%s). %s'
2018                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2019                     else:
2020                         assert fixup_policy in ('ignore', 'never')
2021
2022                 if (info_dict.get('requested_formats') is None
2023                         and info_dict.get('container') == 'm4a_dash'):
2024                     if fixup_policy == 'warn':
2025                         self.report_warning(
2026                             '%s: writing DASH m4a. '
2027                             'Only some players support this container.'
2028                             % info_dict['id'])
2029                     elif fixup_policy == 'detect_or_warn':
2030                         fixup_pp = FFmpegFixupM4aPP(self)
2031                         if fixup_pp.available:
2032                             info_dict.setdefault('__postprocessors', [])
2033                             info_dict['__postprocessors'].append(fixup_pp)
2034                         else:
2035                             self.report_warning(
2036                                 '%s: writing DASH m4a. '
2037                                 'Only some players support this container. %s'
2038                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2039                     else:
2040                         assert fixup_policy in ('ignore', 'never')
2041
2042                 if (info_dict.get('protocol') == 'm3u8_native'
2043                         or info_dict.get('protocol') == 'm3u8'
2044                         and self.params.get('hls_prefer_native')):
2045                     if fixup_policy == 'warn':
2046                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2047                             info_dict['id']))
2048                     elif fixup_policy == 'detect_or_warn':
2049                         fixup_pp = FFmpegFixupM3u8PP(self)
2050                         if fixup_pp.available:
2051                             info_dict.setdefault('__postprocessors', [])
2052                             info_dict['__postprocessors'].append(fixup_pp)
2053                         else:
2054                             self.report_warning(
2055                                 '%s: malformed AAC bitstream detected. %s'
2056                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2057                     else:
2058                         assert fixup_policy in ('ignore', 'never')
2059
2060                 try:
2061                     self.post_process(filename, info_dict)
2062                 except (PostProcessingError) as err:
2063                     self.report_error('postprocessing: %s' % str(err))
2064                     return
2065                 self.record_download_archive(info_dict)
2066
2067     def download(self, url_list):
2068         """Download a given list of URLs."""
2069         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2070         if (len(url_list) > 1
2071                 and outtmpl != '-'
2072                 and '%' not in outtmpl
2073                 and self.params.get('max_downloads') != 1):
2074             raise SameFileError(outtmpl)
2075
2076         for url in url_list:
2077             try:
2078                 # It also downloads the videos
2079                 res = self.extract_info(
2080                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2081             except UnavailableVideoError:
2082                 self.report_error('unable to download video')
2083             except MaxDownloadsReached:
2084                 self.to_screen('[info] Maximum number of downloaded files reached.')
2085                 raise
2086             else:
2087                 if self.params.get('dump_single_json', False):
2088                     self.to_stdout(json.dumps(res))
2089
2090         return self._download_retcode
2091
2092     def download_with_info_file(self, info_filename):
2093         with contextlib.closing(fileinput.FileInput(
2094                 [info_filename], mode='r',
2095                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2096             # FileInput doesn't have a read method, we can't call json.load
2097             info = self.filter_requested_info(json.loads('\n'.join(f)))
2098         try:
2099             self.process_ie_result(info, download=True)
2100         except DownloadError:
2101             webpage_url = info.get('webpage_url')
2102             if webpage_url is not None:
2103                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2104                 return self.download([webpage_url])
2105             else:
2106                 raise
2107         return self._download_retcode
2108
2109     @staticmethod
2110     def filter_requested_info(info_dict):
2111         return dict(
2112             (k, v) for k, v in info_dict.items()
2113             if k not in ['requested_formats', 'requested_subtitles'])
2114
2115     def post_process(self, filename, ie_info):
2116         """Run all the postprocessors on the given file."""
2117         info = dict(ie_info)
2118         info['filepath'] = filename
2119         pps_chain = []
2120         if ie_info.get('__postprocessors') is not None:
2121             pps_chain.extend(ie_info['__postprocessors'])
2122         pps_chain.extend(self._pps)
2123         for pp in pps_chain:
2124             files_to_delete = []
2125             try:
2126                 files_to_delete, info = pp.run(info)
2127             except PostProcessingError as e:
2128                 self.report_error(e.msg)
2129             if files_to_delete and not self.params.get('keepvideo', False):
2130                 for old_filename in set(files_to_delete):
2131                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2132                     try:
2133                         os.remove(encodeFilename(old_filename))
2134                     except (IOError, OSError):
2135                         self.report_warning('Unable to remove downloaded original file')
2136
2137     def _make_archive_id(self, info_dict):
2138         video_id = info_dict.get('id')
2139         if not video_id:
2140             return
2141         # Future-proof against any change in case
2142         # and backwards compatibility with prior versions
2143         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2144         if extractor is None:
2145             url = str_or_none(info_dict.get('url'))
2146             if not url:
2147                 return
2148             # Try to find matching extractor for the URL and take its ie_key
2149             for ie in self._ies:
2150                 if ie.suitable(url):
2151                     extractor = ie.ie_key()
2152                     break
2153             else:
2154                 return
2155         return extractor.lower() + ' ' + video_id
2156
2157     def in_download_archive(self, info_dict):
2158         fn = self.params.get('download_archive')
2159         if fn is None:
2160             return False
2161
2162         vid_id = self._make_archive_id(info_dict)
2163         if not vid_id:
2164             return False  # Incomplete video information
2165
2166         return vid_id in self.archive
2167
2168     def record_download_archive(self, info_dict):
2169         fn = self.params.get('download_archive')
2170         if fn is None:
2171             return
2172         vid_id = self._make_archive_id(info_dict)
2173         assert vid_id
2174         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2175             archive_file.write(vid_id + '\n')
2176         self.archive.add(vid_id)
2177
2178     @staticmethod
2179     def format_resolution(format, default='unknown'):
2180         if format.get('vcodec') == 'none':
2181             return 'audio only'
2182         if format.get('resolution') is not None:
2183             return format['resolution']
2184         if format.get('height') is not None:
2185             if format.get('width') is not None:
2186                 res = '%sx%s' % (format['width'], format['height'])
2187             else:
2188                 res = '%sp' % format['height']
2189         elif format.get('width') is not None:
2190             res = '%dx?' % format['width']
2191         else:
2192             res = default
2193         return res
2194
2195     def _format_note(self, fdict):
2196         res = ''
2197         if fdict.get('ext') in ['f4f', 'f4m']:
2198             res += '(unsupported) '
2199         if fdict.get('language'):
2200             if res:
2201                 res += ' '
2202             res += '[%s] ' % fdict['language']
2203         if fdict.get('format_note') is not None:
2204             res += fdict['format_note'] + ' '
2205         if fdict.get('tbr') is not None:
2206             res += '%4dk ' % fdict['tbr']
2207         if fdict.get('container') is not None:
2208             if res:
2209                 res += ', '
2210             res += '%s container' % fdict['container']
2211         if (fdict.get('vcodec') is not None
2212                 and fdict.get('vcodec') != 'none'):
2213             if res:
2214                 res += ', '
2215             res += fdict['vcodec']
2216             if fdict.get('vbr') is not None:
2217                 res += '@'
2218         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2219             res += 'video@'
2220         if fdict.get('vbr') is not None:
2221             res += '%4dk' % fdict['vbr']
2222         if fdict.get('fps') is not None:
2223             if res:
2224                 res += ', '
2225             res += '%sfps' % fdict['fps']
2226         if fdict.get('acodec') is not None:
2227             if res:
2228                 res += ', '
2229             if fdict['acodec'] == 'none':
2230                 res += 'video only'
2231             else:
2232                 res += '%-5s' % fdict['acodec']
2233         elif fdict.get('abr') is not None:
2234             if res:
2235                 res += ', '
2236             res += 'audio'
2237         if fdict.get('abr') is not None:
2238             res += '@%3dk' % fdict['abr']
2239         if fdict.get('asr') is not None:
2240             res += ' (%5dHz)' % fdict['asr']
2241         if fdict.get('filesize') is not None:
2242             if res:
2243                 res += ', '
2244             res += format_bytes(fdict['filesize'])
2245         elif fdict.get('filesize_approx') is not None:
2246             if res:
2247                 res += ', '
2248             res += '~' + format_bytes(fdict['filesize_approx'])
2249         return res
2250
2251     def list_formats(self, info_dict):
2252         formats = info_dict.get('formats', [info_dict])
2253         table = [
2254             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2255             for f in formats
2256             if f.get('preference') is None or f['preference'] >= -1000]
2257         if len(formats) > 1:
2258             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2259
2260         header_line = ['format code', 'extension', 'resolution', 'note']
2261         self.to_screen(
2262             '[info] Available formats for %s:\n%s' %
2263             (info_dict['id'], render_table(header_line, table)))
2264
2265     def list_thumbnails(self, info_dict):
2266         thumbnails = info_dict.get('thumbnails')
2267         if not thumbnails:
2268             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2269             return
2270
2271         self.to_screen(
2272             '[info] Thumbnails for %s:' % info_dict['id'])
2273         self.to_screen(render_table(
2274             ['ID', 'width', 'height', 'URL'],
2275             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2276
2277     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2278         if not subtitles:
2279             self.to_screen('%s has no %s' % (video_id, name))
2280             return
2281         self.to_screen(
2282             'Available %s for %s:' % (name, video_id))
2283         self.to_screen(render_table(
2284             ['Language', 'formats'],
2285             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2286                 for lang, formats in subtitles.items()]))
2287
2288     def urlopen(self, req):
2289         """ Start an HTTP download """
2290         if isinstance(req, compat_basestring):
2291             req = sanitized_Request(req)
2292         return self._opener.open(req, timeout=self._socket_timeout)
2293
2294     def print_debug_header(self):
2295         if not self.params.get('verbose'):
2296             return
2297
2298         if type('') is not compat_str:
2299             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2300             self.report_warning(
2301                 'Your Python is broken! Update to a newer and supported version')
2302
2303         stdout_encoding = getattr(
2304             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2305         encoding_str = (
2306             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2307                 locale.getpreferredencoding(),
2308                 sys.getfilesystemencoding(),
2309                 stdout_encoding,
2310                 self.get_encoding()))
2311         write_string(encoding_str, encoding=None)
2312
2313         self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2314         if _LAZY_LOADER:
2315             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2316         try:
2317             sp = subprocess.Popen(
2318                 ['git', 'rev-parse', '--short', 'HEAD'],
2319                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2320                 cwd=os.path.dirname(os.path.abspath(__file__)))
2321             out, err = sp.communicate()
2322             out = out.decode().strip()
2323             if re.match('[0-9a-f]+', out):
2324                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2325         except Exception:
2326             try:
2327                 sys.exc_clear()
2328             except Exception:
2329                 pass
2330
2331         def python_implementation():
2332             impl_name = platform.python_implementation()
2333             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2334                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2335             return impl_name
2336
2337         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2338             platform.python_version(), python_implementation(),
2339             platform_name()))
2340
2341         exe_versions = FFmpegPostProcessor.get_versions(self)
2342         exe_versions['rtmpdump'] = rtmpdump_version()
2343         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2344         exe_str = ', '.join(
2345             '%s %s' % (exe, v)
2346             for exe, v in sorted(exe_versions.items())
2347             if v
2348         )
2349         if not exe_str:
2350             exe_str = 'none'
2351         self._write_string('[debug] exe versions: %s\n' % exe_str)
2352
2353         proxy_map = {}
2354         for handler in self._opener.handlers:
2355             if hasattr(handler, 'proxies'):
2356                 proxy_map.update(handler.proxies)
2357         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2358
2359         if self.params.get('call_home', False):
2360             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2361             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2362             latest_version = self.urlopen(
2363                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2364             if version_tuple(latest_version) > version_tuple(__version__):
2365                 self.report_warning(
2366                     'You are using an outdated version (newest version: %s)! '
2367                     'See https://yt-dl.org/update if you need help updating.' %
2368                     latest_version)
2369
2370     def _setup_opener(self):
2371         timeout_val = self.params.get('socket_timeout')
2372         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2373
2374         opts_cookiefile = self.params.get('cookiefile')
2375         opts_proxy = self.params.get('proxy')
2376
2377         if opts_cookiefile is None:
2378             self.cookiejar = compat_cookiejar.CookieJar()
2379         else:
2380             opts_cookiefile = expand_path(opts_cookiefile)
2381             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2382             if os.access(opts_cookiefile, os.R_OK):
2383                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2384
2385         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2386         if opts_proxy is not None:
2387             if opts_proxy == '':
2388                 proxies = {}
2389             else:
2390                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2391         else:
2392             proxies = compat_urllib_request.getproxies()
2393             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2394             if 'http' in proxies and 'https' not in proxies:
2395                 proxies['https'] = proxies['http']
2396         proxy_handler = PerRequestProxyHandler(proxies)
2397
2398         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2399         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2400         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2401         redirect_handler = YoutubeDLRedirectHandler()
2402         data_handler = compat_urllib_request_DataHandler()
2403
2404         # When passing our own FileHandler instance, build_opener won't add the
2405         # default FileHandler and allows us to disable the file protocol, which
2406         # can be used for malicious purposes (see
2407         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2408         file_handler = compat_urllib_request.FileHandler()
2409
2410         def file_open(*args, **kwargs):
2411             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2412         file_handler.file_open = file_open
2413
2414         opener = compat_urllib_request.build_opener(
2415             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2416
2417         # Delete the default user-agent header, which would otherwise apply in
2418         # cases where our custom HTTP handler doesn't come into play
2419         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2420         opener.addheaders = []
2421         self._opener = opener
2422
2423     def encode(self, s):
2424         if isinstance(s, bytes):
2425             return s  # Already encoded
2426
2427         try:
2428             return s.encode(self.get_encoding())
2429         except UnicodeEncodeError as err:
2430             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2431             raise
2432
2433     def get_encoding(self):
2434         encoding = self.params.get('encoding')
2435         if encoding is None:
2436             encoding = preferredencoding()
2437         return encoding
2438
2439     def _write_thumbnails(self, info_dict, filename):
2440         if self.params.get('writethumbnail', False):
2441             thumbnails = info_dict.get('thumbnails')
2442             if thumbnails:
2443                 thumbnails = [thumbnails[-1]]
2444         elif self.params.get('write_all_thumbnails', False):
2445             thumbnails = info_dict.get('thumbnails')
2446         else:
2447             return
2448
2449         if not thumbnails:
2450             # No thumbnails present, so return immediately
2451             return
2452
2453         for t in thumbnails:
2454             thumb_ext = determine_ext(t['url'], 'jpg')
2455             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2456             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2457             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2458
2459             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2460                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2461                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2462             else:
2463                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2464                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2465                 try:
2466                     uf = self.urlopen(t['url'])
2467                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2468                         shutil.copyfileobj(uf, thumbf)
2469                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2470                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2471                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2472                     self.report_warning('Unable to download thumbnail "%s": %s' %
2473                                         (t['url'], error_to_compat_str(err)))