youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     SameFileError,
  58     sanitize_filename,
  59     subtitles_filename,
  60     takewhile_inclusive,
  61     UnavailableVideoError,
  62     url_basename,
  63     version_tuple,
  64     write_json_file,
  65     write_string,
  66     YoutubeDLHandler,
  67     prepend_extension,
  68     args_to_str,
  69     age_restricted,
  70 )
  71 from .cache import Cache
  72 from .extractor import get_info_extractor, gen_extractors
  73 from .downloader import get_suitable_downloader
  74 from .downloader.rtmp import rtmpdump_version
  75 from .postprocessor import (
  76     FFmpegFixupStretchedPP,
  77     FFmpegMergerPP,
  78     FFmpegPostProcessor,
  79     get_postprocessor,
  80 )
  81 from .version import __version__
  82
  83
  84 class YoutubeDL(object):
  85     """YoutubeDL class.
  86
  87     YoutubeDL objects are the ones responsible of downloading the
  88     actual video file and writing it to disk if the user has requested
  89     it, among some other tasks. In most cases there should be one per
  90     program. As, given a video URL, the downloader doesn't know how to
  91     extract all the needed information, task that InfoExtractors do, it
  92     has to pass the URL to one of them.
  93
  94     For this, YoutubeDL objects have a method that allows
  95     InfoExtractors to be registered in a given order. When it is passed
  96     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  97     finds that reports being able to handle it. The InfoExtractor extracts
  98     all the information about the video or videos the URL refers to, and
  99     YoutubeDL process the extracted information, possibly using a File
 100     Downloader to download the video.
 101
 102     YoutubeDL objects accept a lot of parameters. In order not to saturate
 103     the object constructor with arguments, it receives a dictionary of
 104     options instead. These options are available through the params
 105     attribute for the InfoExtractors to use. The YoutubeDL also
 106     registers itself as the downloader in charge for the InfoExtractors
 107     that are added to it, so this is a "mutual registration".
 108
 109     Available options:
 110
 111     username:          Username for authentication purposes.
 112     password:          Password for authentication purposes.
 113     videopassword:     Password for acces a video.
 114     usenetrc:          Use netrc for authentication instead.
 115     verbose:           Print additional info to stdout.
 116     quiet:             Do not print messages to stdout.
 117     no_warnings:       Do not print out anything for warnings.
 118     forceurl:          Force printing final URL.
 119     forcetitle:        Force printing title.
 120     forceid:           Force printing ID.
 121     forcethumbnail:    Force printing thumbnail URL.
 122     forcedescription:  Force printing description.
 123     forcefilename:     Force printing final filename.
 124     forceduration:     Force printing duration.
 125     forcejson:         Force printing info_dict as JSON.
 126     dump_single_json:  Force printing the info_dict of the whole playlist
 127                        (or video) as a single JSON line.
 128     simulate:          Do not download the video files.
 129     format:            Video format code. See options.py for more information.
 130     format_limit:      Highest quality format to try.
 131     outtmpl:           Template for output names.
 132     restrictfilenames: Do not allow "&" and spaces in file names
 133     ignoreerrors:      Do not stop on download errors.
 134     nooverwrites:      Prevent overwriting files.
 135     playliststart:     Playlist item to start at.
 136     playlistend:       Playlist item to end at.
 137     playlistreverse:   Download playlist items in reverse order.
 138     matchtitle:        Download only matching titles.
 139     rejecttitle:       Reject downloads for matching titles.
 140     logger:            Log messages to a logging.Logger instance.
 141     logtostderr:       Log messages to stderr instead of stdout.
 142     writedescription:  Write the video description to a .description file
 143     writeinfojson:     Write the video description to a .info.json file
 144     writeannotations:  Write the video annotations to a .annotations.xml file
 145     writethumbnail:    Write the thumbnail image to a file
 146     writesubtitles:    Write the video subtitles to a file
 147     writeautomaticsub: Write the automatic subtitles to a file
 148     allsubtitles:      Downloads all the subtitles of the video
 149                        (requires writesubtitles or writeautomaticsub)
 150     listsubtitles:     Lists all available subtitles for the video
 151     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 152     subtitleslangs:    List of languages of the subtitles to download
 153     keepvideo:         Keep the video file after post-processing
 154     daterange:         A DateRange object, download only if the upload_date is in the range.
 155     skip_download:     Skip the actual download of the video file
 156     cachedir:          Location of the cache files in the filesystem.
 157                        False to disable filesystem cache.
 158     noplaylist:        Download single video instead of a playlist if in doubt.
 159     age_limit:         An integer representing the user's age in years.
 160                        Unsuitable videos for the given age are skipped.
 161     min_views:         An integer representing the minimum view count the video
 162                        must have in order to not be skipped.
 163                        Videos without view count information are always
 164                        downloaded. None for no limit.
 165     max_views:         An integer representing the maximum view count.
 166                        Videos that are more popular than that are not
 167                        downloaded.
 168                        Videos without view count information are always
 169                        downloaded. None for no limit.
 170     download_archive:  File name of a file where all downloads are recorded.
 171                        Videos already present in the file are not downloaded
 172                        again.
 173     cookiefile:        File name where cookies should be read from and dumped to.
 174     nocheckcertificate:Do not verify SSL certificates
 175     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 176                        At the moment, this is only supported by YouTube.
 177     proxy:             URL of the proxy server to use
 178     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 179     bidi_workaround:   Work around buggy terminals without bidirectional text
 180                        support, using fridibi
 181     debug_printtraffic:Print out sent and received HTTP traffic
 182     include_ads:       Download ads as well
 183     default_search:    Prepend this string if an input url is not valid.
 184                        'auto' for elaborate guessing
 185     encoding:          Use this encoding instead of the system-specified.
 186     extract_flat:      Do not resolve URLs, return the immediate result.
 187                        Pass in 'in_playlist' to only show this behavior for
 188                        playlist items.
 189     postprocessors:    A list of dictionaries, each with an entry
 190                        * key:  The name of the postprocessor. See
 191                                youtube_dl/postprocessor/__init__.py for a list.
 192                        as well as any further keyword arguments for the
 193                        postprocessor.
 194     progress_hooks:    A list of functions that get called on download
 195                        progress, with a dictionary with the entries
 196                        * filename: The final filename
 197                        * status: One of "downloading" and "finished"
 198
 199                        The dict may also have some of the following entries:
 200
 201                        * downloaded_bytes: Bytes on disk
 202                        * total_bytes: Size of the whole file, None if unknown
 203                        * tmpfilename: The filename we're currently writing to
 204                        * eta: The estimated time in seconds, None if unknown
 205                        * speed: The download speed in bytes/second, None if
 206                                 unknown
 207
 208                        Progress hooks are guaranteed to be called at least once
 209                        (with status "finished") if the download is successful.
 210     merge_output_format: Extension to use when merging formats.
 211     fixup:             Automatically correct known faults of the file.
 212                        One of:
 213                        - "never": do nothing
 214                        - "warn": only emit a warning
 215                        - "detect_or_warn": check whether we can do anything
 216                                            about it, warn otherwise
 217     source_address:    (Experimental) Client-side IP address to bind to.
 218     call_home:         Boolean, true iff we are allowed to contact the
 219                        youtube-dl servers for debugging.
 220     sleep_interval:    Number of seconds to sleep before each download.
 221
 222
 223     The following parameters are not used by YoutubeDL itself, they are used by
 224     the FileDownloader:
 225     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 226     noresizebuffer, retries, continuedl, noprogress, consoletitle
 227
 228     The following options are used by the post processors:
 229     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 230                        otherwise prefer avconv.
 231     exec_cmd:          Arbitrary command to run after downloading
 232     """
 233
 234     params = None
 235     _ies = []
 236     _pps = []
 237     _download_retcode = None
 238     _num_downloads = None
 239     _screen_file = None
 240
 241     def __init__(self, params=None, auto_init=True):
 242         """Create a FileDownloader object with the given options."""
 243         if params is None:
 244             params = {}
 245         self._ies = []
 246         self._ies_instances = {}
 247         self._pps = []
 248         self._progress_hooks = []
 249         self._download_retcode = 0
 250         self._num_downloads = 0
 251         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 252         self._err_file = sys.stderr
 253         self.params = params
 254         self.cache = Cache(self)
 255
 256         if params.get('bidi_workaround', False):
 257             try:
 258                 import pty
 259                 master, slave = pty.openpty()
 260                 width = get_term_width()
 261                 if width is None:
 262                     width_args = []
 263                 else:
 264                     width_args = ['-w', str(width)]
 265                 sp_kwargs = dict(
 266                     stdin=subprocess.PIPE,
 267                     stdout=slave,
 268                     stderr=self._err_file)
 269                 try:
 270                     self._output_process = subprocess.Popen(
 271                         ['bidiv'] + width_args, **sp_kwargs
 272                     )
 273                 except OSError:
 274                     self._output_process = subprocess.Popen(
 275                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 276                 self._output_channel = os.fdopen(master, 'rb')
 277             except OSError as ose:
 278                 if ose.errno == 2:
 279                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 280                 else:
 281                     raise
 282
 283         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 284                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 285                 and not params.get('restrictfilenames', False)):
 286             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 287             self.report_warning(
 288                 'Assuming --restrict-filenames since file system encoding '
 289                 'cannot encode all characters. '
 290                 'Set the LC_ALL environment variable to fix this.')
 291             self.params['restrictfilenames'] = True
 292
 293         if '%(stitle)s' in self.params.get('outtmpl', ''):
 294             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 295
 296         self._setup_opener()
 297
 298         if auto_init:
 299             self.print_debug_header()
 300             self.add_default_info_extractors()
 301
 302         for pp_def_raw in self.params.get('postprocessors', []):
 303             pp_class = get_postprocessor(pp_def_raw['key'])
 304             pp_def = dict(pp_def_raw)
 305             del pp_def['key']
 306             pp = pp_class(self, **compat_kwargs(pp_def))
 307             self.add_post_processor(pp)
 308
 309         for ph in self.params.get('progress_hooks', []):
 310             self.add_progress_hook(ph)
 311
 312     def warn_if_short_id(self, argv):
 313         # short YouTube ID starting with dash?
 314         idxs = [
 315             i for i, a in enumerate(argv)
 316             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 317         if idxs:
 318             correct_argv = (
 319                 ['youtube-dl'] +
 320                 [a for i, a in enumerate(argv) if i not in idxs] +
 321                 ['--'] + [argv[i] for i in idxs]
 322             )
 323             self.report_warning(
 324                 'Long argument string detected. '
 325                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 326                 args_to_str(correct_argv))
 327
 328     def add_info_extractor(self, ie):
 329         """Add an InfoExtractor object to the end of the list."""
 330         self._ies.append(ie)
 331         self._ies_instances[ie.ie_key()] = ie
 332         ie.set_downloader(self)
 333
 334     def get_info_extractor(self, ie_key):
 335         """
 336         Get an instance of an IE with name ie_key, it will try to get one from
 337         the _ies list, if there's no instance it will create a new one and add
 338         it to the extractor list.
 339         """
 340         ie = self._ies_instances.get(ie_key)
 341         if ie is None:
 342             ie = get_info_extractor(ie_key)()
 343             self.add_info_extractor(ie)
 344         return ie
 345
 346     def add_default_info_extractors(self):
 347         """
 348         Add the InfoExtractors returned by gen_extractors to the end of the list
 349         """
 350         for ie in gen_extractors():
 351             self.add_info_extractor(ie)
 352
 353     def add_post_processor(self, pp):
 354         """Add a PostProcessor object to the end of the chain."""
 355         self._pps.append(pp)
 356         pp.set_downloader(self)
 357
 358     def add_progress_hook(self, ph):
 359         """Add the progress hook (currently only for the file downloader)"""
 360         self._progress_hooks.append(ph)
 361
 362     def _bidi_workaround(self, message):
 363         if not hasattr(self, '_output_channel'):
 364             return message
 365
 366         assert hasattr(self, '_output_process')
 367         assert isinstance(message, compat_str)
 368         line_count = message.count('\n') + 1
 369         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 370         self._output_process.stdin.flush()
 371         res = ''.join(self._output_channel.readline().decode('utf-8')
 372                       for _ in range(line_count))
 373         return res[:-len('\n')]
 374
 375     def to_screen(self, message, skip_eol=False):
 376         """Print message to stdout if not in quiet mode."""
 377         return self.to_stdout(message, skip_eol, check_quiet=True)
 378
 379     def _write_string(self, s, out=None):
 380         write_string(s, out=out, encoding=self.params.get('encoding'))
 381
 382     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 383         """Print message to stdout if not in quiet mode."""
 384         if self.params.get('logger'):
 385             self.params['logger'].debug(message)
 386         elif not check_quiet or not self.params.get('quiet', False):
 387             message = self._bidi_workaround(message)
 388             terminator = ['\n', ''][skip_eol]
 389             output = message + terminator
 390
 391             self._write_string(output, self._screen_file)
 392
 393     def to_stderr(self, message):
 394         """Print message to stderr."""
 395         assert isinstance(message, compat_str)
 396         if self.params.get('logger'):
 397             self.params['logger'].error(message)
 398         else:
 399             message = self._bidi_workaround(message)
 400             output = message + '\n'
 401             self._write_string(output, self._err_file)
 402
 403     def to_console_title(self, message):
 404         if not self.params.get('consoletitle', False):
 405             return
 406         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 407             # c_wchar_p() might not be necessary if `message` is
 408             # already of type unicode()
 409             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 410         elif 'TERM' in os.environ:
 411             self._write_string('\033]0;%s\007' % message, self._screen_file)
 412
 413     def save_console_title(self):
 414         if not self.params.get('consoletitle', False):
 415             return
 416         if 'TERM' in os.environ:
 417             # Save the title on stack
 418             self._write_string('\033[22;0t', self._screen_file)
 419
 420     def restore_console_title(self):
 421         if not self.params.get('consoletitle', False):
 422             return
 423         if 'TERM' in os.environ:
 424             # Restore the title from stack
 425             self._write_string('\033[23;0t', self._screen_file)
 426
 427     def __enter__(self):
 428         self.save_console_title()
 429         return self
 430
 431     def __exit__(self, *args):
 432         self.restore_console_title()
 433
 434         if self.params.get('cookiefile') is not None:
 435             self.cookiejar.save()
 436
 437     def trouble(self, message=None, tb=None):
 438         """Determine action to take when a download problem appears.
 439
 440         Depending on if the downloader has been configured to ignore
 441         download errors or not, this method may throw an exception or
 442         not when errors are found, after printing the message.
 443
 444         tb, if given, is additional traceback information.
 445         """
 446         if message is not None:
 447             self.to_stderr(message)
 448         if self.params.get('verbose'):
 449             if tb is None:
 450                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 451                     tb = ''
 452                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 453                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 454                     tb += compat_str(traceback.format_exc())
 455                 else:
 456                     tb_data = traceback.format_list(traceback.extract_stack())
 457                     tb = ''.join(tb_data)
 458             self.to_stderr(tb)
 459         if not self.params.get('ignoreerrors', False):
 460             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 461                 exc_info = sys.exc_info()[1].exc_info
 462             else:
 463                 exc_info = sys.exc_info()
 464             raise DownloadError(message, exc_info)
 465         self._download_retcode = 1
 466
 467     def report_warning(self, message):
 468         '''
 469         Print the message to stderr, it will be prefixed with 'WARNING:'
 470         If stderr is a tty file the 'WARNING:' will be colored
 471         '''
 472         if self.params.get('logger') is not None:
 473             self.params['logger'].warning(message)
 474         else:
 475             if self.params.get('no_warnings'):
 476                 return
 477             if self._err_file.isatty() and os.name != 'nt':
 478                 _msg_header = '\033[0;33mWARNING:\033[0m'
 479             else:
 480                 _msg_header = 'WARNING:'
 481             warning_message = '%s %s' % (_msg_header, message)
 482             self.to_stderr(warning_message)
 483
 484     def report_error(self, message, tb=None):
 485         '''
 486         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 487         in red if stderr is a tty file.
 488         '''
 489         if self._err_file.isatty() and os.name != 'nt':
 490             _msg_header = '\033[0;31mERROR:\033[0m'
 491         else:
 492             _msg_header = 'ERROR:'
 493         error_message = '%s %s' % (_msg_header, message)
 494         self.trouble(error_message, tb)
 495
 496     def report_file_already_downloaded(self, file_name):
 497         """Report file has already been fully downloaded."""
 498         try:
 499             self.to_screen('[download] %s has already been downloaded' % file_name)
 500         except UnicodeEncodeError:
 501             self.to_screen('[download] The file has already been downloaded')
 502
 503     def prepare_filename(self, info_dict):
 504         """Generate the output filename."""
 505         try:
 506             template_dict = dict(info_dict)
 507
 508             template_dict['epoch'] = int(time.time())
 509             autonumber_size = self.params.get('autonumber_size')
 510             if autonumber_size is None:
 511                 autonumber_size = 5
 512             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 513             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 514             if template_dict.get('playlist_index') is not None:
 515                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 516             if template_dict.get('resolution') is None:
 517                 if template_dict.get('width') and template_dict.get('height'):
 518                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 519                 elif template_dict.get('height'):
 520                     template_dict['resolution'] = '%sp' % template_dict['height']
 521                 elif template_dict.get('width'):
 522                     template_dict['resolution'] = '?x%d' % template_dict['width']
 523
 524             sanitize = lambda k, v: sanitize_filename(
 525                 compat_str(v),
 526                 restricted=self.params.get('restrictfilenames'),
 527                 is_id=(k == 'id'))
 528             template_dict = dict((k, sanitize(k, v))
 529                                  for k, v in template_dict.items()
 530                                  if v is not None)
 531             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 532
 533             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 534             tmpl = compat_expanduser(outtmpl)
 535             filename = tmpl % template_dict
 536             return filename
 537         except ValueError as err:
 538             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 539             return None
 540
 541     def _match_entry(self, info_dict):
 542         """ Returns None iff the file should be downloaded """
 543
 544         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 545         if 'title' in info_dict:
 546             # This can happen when we're just evaluating the playlist
 547             title = info_dict['title']
 548             matchtitle = self.params.get('matchtitle', False)
 549             if matchtitle:
 550                 if not re.search(matchtitle, title, re.IGNORECASE):
 551                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 552             rejecttitle = self.params.get('rejecttitle', False)
 553             if rejecttitle:
 554                 if re.search(rejecttitle, title, re.IGNORECASE):
 555                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 556         date = info_dict.get('upload_date', None)
 557         if date is not None:
 558             dateRange = self.params.get('daterange', DateRange())
 559             if date not in dateRange:
 560                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 561         view_count = info_dict.get('view_count', None)
 562         if view_count is not None:
 563             min_views = self.params.get('min_views')
 564             if min_views is not None and view_count < min_views:
 565                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 566             max_views = self.params.get('max_views')
 567             if max_views is not None and view_count > max_views:
 568                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 569         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 570             return 'Skipping "%s" because it is age restricted' % title
 571         if self.in_download_archive(info_dict):
 572             return '%s has already been recorded in archive' % video_title
 573         return None
 574
 575     @staticmethod
 576     def add_extra_info(info_dict, extra_info):
 577         '''Set the keys from extra_info in info dict if they are missing'''
 578         for key, value in extra_info.items():
 579             info_dict.setdefault(key, value)
 580
 581     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 582                      process=True):
 583         '''
 584         Returns a list with a dictionary for each video we find.
 585         If 'download', also downloads the videos.
 586         extra_info is a dict containing the extra values to add to each result
 587          '''
 588
 589         if ie_key:
 590             ies = [self.get_info_extractor(ie_key)]
 591         else:
 592             ies = self._ies
 593
 594         for ie in ies:
 595             if not ie.suitable(url):
 596                 continue
 597
 598             if not ie.working():
 599                 self.report_warning('The program functionality for this site has been marked as broken, '
 600                                     'and will probably not work.')
 601
 602             try:
 603                 ie_result = ie.extract(url)
 604                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 605                     break
 606                 if isinstance(ie_result, list):
 607                     # Backwards compatibility: old IE result format
 608                     ie_result = {
 609                         '_type': 'compat_list',
 610                         'entries': ie_result,
 611                     }
 612                 self.add_default_extra_info(ie_result, ie, url)
 613                 if process:
 614                     return self.process_ie_result(ie_result, download, extra_info)
 615                 else:
 616                     return ie_result
 617             except ExtractorError as de:  # An error we somewhat expected
 618                 self.report_error(compat_str(de), de.format_traceback())
 619                 break
 620             except MaxDownloadsReached:
 621                 raise
 622             except Exception as e:
 623                 if self.params.get('ignoreerrors', False):
 624                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 625                     break
 626                 else:
 627                     raise
 628         else:
 629             self.report_error('no suitable InfoExtractor for URL %s' % url)
 630
 631     def add_default_extra_info(self, ie_result, ie, url):
 632         self.add_extra_info(ie_result, {
 633             'extractor': ie.IE_NAME,
 634             'webpage_url': url,
 635             'webpage_url_basename': url_basename(url),
 636             'extractor_key': ie.ie_key(),
 637         })
 638
 639     def process_ie_result(self, ie_result, download=True, extra_info={}):
 640         """
 641         Take the result of the ie(may be modified) and resolve all unresolved
 642         references (URLs, playlist items).
 643
 644         It will also download the videos if 'download'.
 645         Returns the resolved ie_result.
 646         """
 647
 648         result_type = ie_result.get('_type', 'video')
 649
 650         if result_type in ('url', 'url_transparent'):
 651             extract_flat = self.params.get('extract_flat', False)
 652             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 653                     extract_flat is True):
 654                 if self.params.get('forcejson', False):
 655                     self.to_stdout(json.dumps(ie_result))
 656                 return ie_result
 657
 658         if result_type == 'video':
 659             self.add_extra_info(ie_result, extra_info)
 660             return self.process_video_result(ie_result, download=download)
 661         elif result_type == 'url':
 662             # We have to add extra_info to the results because it may be
 663             # contained in a playlist
 664             return self.extract_info(ie_result['url'],
 665                                      download,
 666                                      ie_key=ie_result.get('ie_key'),
 667                                      extra_info=extra_info)
 668         elif result_type == 'url_transparent':
 669             # Use the information from the embedding page
 670             info = self.extract_info(
 671                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 672                 extra_info=extra_info, download=False, process=False)
 673
 674             force_properties = dict(
 675                 (k, v) for k, v in ie_result.items() if v is not None)
 676             for f in ('_type', 'url'):
 677                 if f in force_properties:
 678                     del force_properties[f]
 679             new_result = info.copy()
 680             new_result.update(force_properties)
 681
 682             assert new_result.get('_type') != 'url_transparent'
 683
 684             return self.process_ie_result(
 685                 new_result, download=download, extra_info=extra_info)
 686         elif result_type == 'playlist' or result_type == 'multi_video':
 687             # We process each entry in the playlist
 688             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 689             self.to_screen('[download] Downloading playlist: %s' % playlist)
 690
 691             playlist_results = []
 692
 693             playliststart = self.params.get('playliststart', 1) - 1
 694             playlistend = self.params.get('playlistend', None)
 695             # For backwards compatibility, interpret -1 as whole list
 696             if playlistend == -1:
 697                 playlistend = None
 698
 699             ie_entries = ie_result['entries']
 700             if isinstance(ie_entries, list):
 701                 n_all_entries = len(ie_entries)
 702                 entries = ie_entries[playliststart:playlistend]
 703                 n_entries = len(entries)
 704                 self.to_screen(
 705                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 706                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 707             elif isinstance(ie_entries, PagedList):
 708                 entries = ie_entries.getslice(
 709                     playliststart, playlistend)
 710                 n_entries = len(entries)
 711                 self.to_screen(
 712                     "[%s] playlist %s: Downloading %d videos" %
 713                     (ie_result['extractor'], playlist, n_entries))
 714             else:  # iterable
 715                 entries = list(itertools.islice(
 716                     ie_entries, playliststart, playlistend))
 717                 n_entries = len(entries)
 718                 self.to_screen(
 719                     "[%s] playlist %s: Downloading %d videos" %
 720                     (ie_result['extractor'], playlist, n_entries))
 721
 722             if self.params.get('playlistreverse', False):
 723                 entries = entries[::-1]
 724
 725             for i, entry in enumerate(entries, 1):
 726                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 727                 extra = {
 728                     'n_entries': n_entries,
 729                     'playlist': playlist,
 730                     'playlist_id': ie_result.get('id'),
 731                     'playlist_title': ie_result.get('title'),
 732                     'playlist_index': i + playliststart,
 733                     'extractor': ie_result['extractor'],
 734                     'webpage_url': ie_result['webpage_url'],
 735                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 736                     'extractor_key': ie_result['extractor_key'],
 737                 }
 738
 739                 reason = self._match_entry(entry)
 740                 if reason is not None:
 741                     self.to_screen('[download] ' + reason)
 742                     continue
 743
 744                 entry_result = self.process_ie_result(entry,
 745                                                       download=download,
 746                                                       extra_info=extra)
 747                 playlist_results.append(entry_result)
 748             ie_result['entries'] = playlist_results
 749             return ie_result
 750         elif result_type == 'compat_list':
 751             self.report_warning(
 752                 'Extractor %s returned a compat_list result. '
 753                 'It needs to be updated.' % ie_result.get('extractor'))
 754
 755             def _fixup(r):
 756                 self.add_extra_info(
 757                     r,
 758                     {
 759                         'extractor': ie_result['extractor'],
 760                         'webpage_url': ie_result['webpage_url'],
 761                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 762                         'extractor_key': ie_result['extractor_key'],
 763                     }
 764                 )
 765                 return r
 766             ie_result['entries'] = [
 767                 self.process_ie_result(_fixup(r), download, extra_info)
 768                 for r in ie_result['entries']
 769             ]
 770             return ie_result
 771         else:
 772             raise Exception('Invalid result type: %s' % result_type)
 773
 774     def _apply_format_filter(self, format_spec, available_formats):
 775         " Returns a tuple of the remaining format_spec and filtered formats "
 776
 777         OPERATORS = {
 778             '<': operator.lt,
 779             '<=': operator.le,
 780             '>': operator.gt,
 781             '>=': operator.ge,
 782             '=': operator.eq,
 783             '!=': operator.ne,
 784         }
 785         operator_rex = re.compile(r'''(?x)\s*\[
 786             (?P<key>width|height|tbr|abr|vbr|filesize)
 787             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 788             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 789             \]$
 790             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 791         m = operator_rex.search(format_spec)
 792         if not m:
 793             raise ValueError('Invalid format specification %r' % format_spec)
 794
 795         try:
 796             comparison_value = int(m.group('value'))
 797         except ValueError:
 798             comparison_value = parse_filesize(m.group('value'))
 799             if comparison_value is None:
 800                 comparison_value = parse_filesize(m.group('value') + 'B')
 801             if comparison_value is None:
 802                 raise ValueError(
 803                     'Invalid value %r in format specification %r' % (
 804                         m.group('value'), format_spec))
 805         op = OPERATORS[m.group('op')]
 806
 807         def _filter(f):
 808             actual_value = f.get(m.group('key'))
 809             if actual_value is None:
 810                 return m.group('none_inclusive')
 811             return op(actual_value, comparison_value)
 812         new_formats = [f for f in available_formats if _filter(f)]
 813
 814         new_format_spec = format_spec[:-len(m.group(0))]
 815         if not new_format_spec:
 816             new_format_spec = 'best'
 817
 818         return (new_format_spec, new_formats)
 819
 820     def select_format(self, format_spec, available_formats):
 821         while format_spec.endswith(']'):
 822             format_spec, available_formats = self._apply_format_filter(
 823                 format_spec, available_formats)
 824         if not available_formats:
 825             return None
 826
 827         if format_spec == 'best' or format_spec is None:
 828             return available_formats[-1]
 829         elif format_spec == 'worst':
 830             return available_formats[0]
 831         elif format_spec == 'bestaudio':
 832             audio_formats = [
 833                 f for f in available_formats
 834                 if f.get('vcodec') == 'none']
 835             if audio_formats:
 836                 return audio_formats[-1]
 837         elif format_spec == 'worstaudio':
 838             audio_formats = [
 839                 f for f in available_formats
 840                 if f.get('vcodec') == 'none']
 841             if audio_formats:
 842                 return audio_formats[0]
 843         elif format_spec == 'bestvideo':
 844             video_formats = [
 845                 f for f in available_formats
 846                 if f.get('acodec') == 'none']
 847             if video_formats:
 848                 return video_formats[-1]
 849         elif format_spec == 'worstvideo':
 850             video_formats = [
 851                 f for f in available_formats
 852                 if f.get('acodec') == 'none']
 853             if video_formats:
 854                 return video_formats[0]
 855         else:
 856             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 857             if format_spec in extensions:
 858                 filter_f = lambda f: f['ext'] == format_spec
 859             else:
 860                 filter_f = lambda f: f['format_id'] == format_spec
 861             matches = list(filter(filter_f, available_formats))
 862             if matches:
 863                 return matches[-1]
 864         return None
 865
 866     def process_video_result(self, info_dict, download=True):
 867         assert info_dict.get('_type', 'video') == 'video'
 868
 869         if 'id' not in info_dict:
 870             raise ExtractorError('Missing "id" field in extractor result')
 871         if 'title' not in info_dict:
 872             raise ExtractorError('Missing "title" field in extractor result')
 873
 874         if 'playlist' not in info_dict:
 875             # It isn't part of a playlist
 876             info_dict['playlist'] = None
 877             info_dict['playlist_index'] = None
 878
 879         thumbnails = info_dict.get('thumbnails')
 880         if thumbnails:
 881             thumbnails.sort(key=lambda t: (
 882                 t.get('width'), t.get('height'), t.get('url')))
 883             for t in thumbnails:
 884                 if 'width' in t and 'height' in t:
 885                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 886
 887         if thumbnails and 'thumbnail' not in info_dict:
 888             info_dict['thumbnail'] = thumbnails[-1]['url']
 889
 890         if 'display_id' not in info_dict and 'id' in info_dict:
 891             info_dict['display_id'] = info_dict['id']
 892
 893         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 894             # Working around negative timestamps in Windows
 895             # (see http://bugs.python.org/issue1646728)
 896             if info_dict['timestamp'] < 0 and os.name == 'nt':
 897                 info_dict['timestamp'] = 0
 898             upload_date = datetime.datetime.utcfromtimestamp(
 899                 info_dict['timestamp'])
 900             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 901
 902         # This extractors handle format selection themselves
 903         if info_dict['extractor'] in ['Youku']:
 904             if download:
 905                 self.process_info(info_dict)
 906             return info_dict
 907
 908         # We now pick which formats have to be downloaded
 909         if info_dict.get('formats') is None:
 910             # There's only one format available
 911             formats = [info_dict]
 912         else:
 913             formats = info_dict['formats']
 914
 915         if not formats:
 916             raise ExtractorError('No video formats found!')
 917
 918         # We check that all the formats have the format and format_id fields
 919         for i, format in enumerate(formats):
 920             if 'url' not in format:
 921                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 922
 923             if format.get('format_id') is None:
 924                 format['format_id'] = compat_str(i)
 925             if format.get('format') is None:
 926                 format['format'] = '{id} - {res}{note}'.format(
 927                     id=format['format_id'],
 928                     res=self.format_resolution(format),
 929                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 930                 )
 931             # Automatically determine file extension if missing
 932             if 'ext' not in format:
 933                 format['ext'] = determine_ext(format['url']).lower()
 934
 935         format_limit = self.params.get('format_limit', None)
 936         if format_limit:
 937             formats = list(takewhile_inclusive(
 938                 lambda f: f['format_id'] != format_limit, formats
 939             ))
 940
 941         # TODO Central sorting goes here
 942
 943         if formats[0] is not info_dict:
 944             # only set the 'formats' fields if the original info_dict list them
 945             # otherwise we end up with a circular reference, the first (and unique)
 946             # element in the 'formats' field in info_dict is info_dict itself,
 947             # wich can't be exported to json
 948             info_dict['formats'] = formats
 949         if self.params.get('listformats', None):
 950             self.list_formats(info_dict)
 951             return
 952
 953         req_format = self.params.get('format')
 954         if req_format is None:
 955             req_format = 'best'
 956         formats_to_download = []
 957         # The -1 is for supporting YoutubeIE
 958         if req_format in ('-1', 'all'):
 959             formats_to_download = formats
 960         else:
 961             for rfstr in req_format.split(','):
 962                 # We can accept formats requested in the format: 34/5/best, we pick
 963                 # the first that is available, starting from left
 964                 req_formats = rfstr.split('/')
 965                 for rf in req_formats:
 966                     if re.match(r'.+?\+.+?', rf) is not None:
 967                         # Two formats have been requested like '137+139'
 968                         format_1, format_2 = rf.split('+')
 969                         formats_info = (self.select_format(format_1, formats),
 970                                         self.select_format(format_2, formats))
 971                         if all(formats_info):
 972                             # The first format must contain the video and the
 973                             # second the audio
 974                             if formats_info[0].get('vcodec') == 'none':
 975                                 self.report_error('The first format must '
 976                                                   'contain the video, try using '
 977                                                   '"-f %s+%s"' % (format_2, format_1))
 978                                 return
 979                             output_ext = (
 980                                 formats_info[0]['ext']
 981                                 if self.params.get('merge_output_format') is None
 982                                 else self.params['merge_output_format'])
 983                             selected_format = {
 984                                 'requested_formats': formats_info,
 985                                 'format': rf,
 986                                 'ext': formats_info[0]['ext'],
 987                                 'width': formats_info[0].get('width'),
 988                                 'height': formats_info[0].get('height'),
 989                                 'resolution': formats_info[0].get('resolution'),
 990                                 'fps': formats_info[0].get('fps'),
 991                                 'vcodec': formats_info[0].get('vcodec'),
 992                                 'vbr': formats_info[0].get('vbr'),
 993                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
 994                                 'acodec': formats_info[1].get('acodec'),
 995                                 'abr': formats_info[1].get('abr'),
 996                                 'ext': output_ext,
 997                             }
 998                         else:
 999                             selected_format = None
1000                     else:
1001                         selected_format = self.select_format(rf, formats)
1002                     if selected_format is not None:
1003                         formats_to_download.append(selected_format)
1004                         break
1005         if not formats_to_download:
1006             raise ExtractorError('requested format not available',
1007                                  expected=True)
1008
1009         if download:
1010             if len(formats_to_download) > 1:
1011                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1012             for format in formats_to_download:
1013                 new_info = dict(info_dict)
1014                 new_info.update(format)
1015                 self.process_info(new_info)
1016         # We update the info dict with the best quality format (backwards compatibility)
1017         info_dict.update(formats_to_download[-1])
1018         return info_dict
1019
1020     def process_info(self, info_dict):
1021         """Process a single resolved IE result."""
1022
1023         assert info_dict.get('_type', 'video') == 'video'
1024
1025         max_downloads = self.params.get('max_downloads')
1026         if max_downloads is not None:
1027             if self._num_downloads >= int(max_downloads):
1028                 raise MaxDownloadsReached()
1029
1030         info_dict['fulltitle'] = info_dict['title']
1031         if len(info_dict['title']) > 200:
1032             info_dict['title'] = info_dict['title'][:197] + '...'
1033
1034         # Keep for backwards compatibility
1035         info_dict['stitle'] = info_dict['title']
1036
1037         if 'format' not in info_dict:
1038             info_dict['format'] = info_dict['ext']
1039
1040         reason = self._match_entry(info_dict)
1041         if reason is not None:
1042             self.to_screen('[download] ' + reason)
1043             return
1044
1045         self._num_downloads += 1
1046
1047         filename = self.prepare_filename(info_dict)
1048
1049         # Forced printings
1050         if self.params.get('forcetitle', False):
1051             self.to_stdout(info_dict['fulltitle'])
1052         if self.params.get('forceid', False):
1053             self.to_stdout(info_dict['id'])
1054         if self.params.get('forceurl', False):
1055             if info_dict.get('requested_formats') is not None:
1056                 for f in info_dict['requested_formats']:
1057                     self.to_stdout(f['url'] + f.get('play_path', ''))
1058             else:
1059                 # For RTMP URLs, also include the playpath
1060                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1061         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1062             self.to_stdout(info_dict['thumbnail'])
1063         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1064             self.to_stdout(info_dict['description'])
1065         if self.params.get('forcefilename', False) and filename is not None:
1066             self.to_stdout(filename)
1067         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1068             self.to_stdout(formatSeconds(info_dict['duration']))
1069         if self.params.get('forceformat', False):
1070             self.to_stdout(info_dict['format'])
1071         if self.params.get('forcejson', False):
1072             info_dict['_filename'] = filename
1073             self.to_stdout(json.dumps(info_dict))
1074         if self.params.get('dump_single_json', False):
1075             info_dict['_filename'] = filename
1076
1077         # Do nothing else if in simulate mode
1078         if self.params.get('simulate', False):
1079             return
1080
1081         if filename is None:
1082             return
1083
1084         try:
1085             dn = os.path.dirname(encodeFilename(filename))
1086             if dn and not os.path.exists(dn):
1087                 os.makedirs(dn)
1088         except (OSError, IOError) as err:
1089             self.report_error('unable to create directory ' + compat_str(err))
1090             return
1091
1092         if self.params.get('writedescription', False):
1093             descfn = filename + '.description'
1094             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1095                 self.to_screen('[info] Video description is already present')
1096             elif info_dict.get('description') is None:
1097                 self.report_warning('There\'s no description to write.')
1098             else:
1099                 try:
1100                     self.to_screen('[info] Writing video description to: ' + descfn)
1101                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1102                         descfile.write(info_dict['description'])
1103                 except (OSError, IOError):
1104                     self.report_error('Cannot write description file ' + descfn)
1105                     return
1106
1107         if self.params.get('writeannotations', False):
1108             annofn = filename + '.annotations.xml'
1109             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1110                 self.to_screen('[info] Video annotations are already present')
1111             else:
1112                 try:
1113                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1114                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1115                         annofile.write(info_dict['annotations'])
1116                 except (KeyError, TypeError):
1117                     self.report_warning('There are no annotations to write.')
1118                 except (OSError, IOError):
1119                     self.report_error('Cannot write annotations file: ' + annofn)
1120                     return
1121
1122         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1123                                        self.params.get('writeautomaticsub')])
1124
1125         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1126             # subtitles download errors are already managed as troubles in relevant IE
1127             # that way it will silently go on when used with unsupporting IE
1128             subtitles = info_dict['subtitles']
1129             sub_format = self.params.get('subtitlesformat', 'srt')
1130             for sub_lang in subtitles.keys():
1131                 sub = subtitles[sub_lang]
1132                 if sub is None:
1133                     continue
1134                 try:
1135                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1136                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1137                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1138                     else:
1139                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1140                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1141                             subfile.write(sub)
1142                 except (OSError, IOError):
1143                     self.report_error('Cannot write subtitles file ' + sub_filename)
1144                     return
1145
1146         if self.params.get('writeinfojson', False):
1147             infofn = os.path.splitext(filename)[0] + '.info.json'
1148             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1149                 self.to_screen('[info] Video description metadata is already present')
1150             else:
1151                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1152                 try:
1153                     write_json_file(info_dict, infofn)
1154                 except (OSError, IOError):
1155                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1156                     return
1157
1158         if self.params.get('writethumbnail', False):
1159             if info_dict.get('thumbnail') is not None:
1160                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1161                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1162                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1163                     self.to_screen('[%s] %s: Thumbnail is already present' %
1164                                    (info_dict['extractor'], info_dict['id']))
1165                 else:
1166                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1167                                    (info_dict['extractor'], info_dict['id']))
1168                     try:
1169                         uf = self.urlopen(info_dict['thumbnail'])
1170                         with open(thumb_filename, 'wb') as thumbf:
1171                             shutil.copyfileobj(uf, thumbf)
1172                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1173                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1174                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1175                         self.report_warning('Unable to download thumbnail "%s": %s' %
1176                                             (info_dict['thumbnail'], compat_str(err)))
1177
1178         if not self.params.get('skip_download', False):
1179             try:
1180                 def dl(name, info):
1181                     fd = get_suitable_downloader(info)(self, self.params)
1182                     for ph in self._progress_hooks:
1183                         fd.add_progress_hook(ph)
1184                     if self.params.get('verbose'):
1185                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1186                     return fd.download(name, info)
1187                 if info_dict.get('requested_formats') is not None:
1188                     downloaded = []
1189                     success = True
1190                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1191                     if not merger._executable:
1192                         postprocessors = []
1193                         self.report_warning('You have requested multiple '
1194                                             'formats but ffmpeg or avconv are not installed.'
1195                                             ' The formats won\'t be merged')
1196                     else:
1197                         postprocessors = [merger]
1198                     for f in info_dict['requested_formats']:
1199                         new_info = dict(info_dict)
1200                         new_info.update(f)
1201                         fname = self.prepare_filename(new_info)
1202                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1203                         downloaded.append(fname)
1204                         partial_success = dl(fname, new_info)
1205                         success = success and partial_success
1206                     info_dict['__postprocessors'] = postprocessors
1207                     info_dict['__files_to_merge'] = downloaded
1208                 else:
1209                     # Just a single file
1210                     success = dl(filename, info_dict)
1211             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1212                 self.report_error('unable to download video data: %s' % str(err))
1213                 return
1214             except (OSError, IOError) as err:
1215                 raise UnavailableVideoError(err)
1216             except (ContentTooShortError, ) as err:
1217                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1218                 return
1219
1220             if success:
1221                 # Fixup content
1222                 stretched_ratio = info_dict.get('stretched_ratio')
1223                 if stretched_ratio is not None and stretched_ratio != 1:
1224                     fixup_policy = self.params.get('fixup')
1225                     if fixup_policy is None:
1226                         fixup_policy = 'detect_or_warn'
1227                     if fixup_policy == 'warn':
1228                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1229                             info_dict['id'], stretched_ratio))
1230                     elif fixup_policy == 'detect_or_warn':
1231                         stretched_pp = FFmpegFixupStretchedPP(self)
1232                         if stretched_pp.available:
1233                             info_dict.setdefault('__postprocessors', [])
1234                             info_dict['__postprocessors'].append(stretched_pp)
1235                         else:
1236                             self.report_warning(
1237                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1238                                     info_dict['id'], stretched_ratio))
1239                     else:
1240                         assert fixup_policy == 'ignore'
1241
1242                 try:
1243                     self.post_process(filename, info_dict)
1244                 except (PostProcessingError) as err:
1245                     self.report_error('postprocessing: %s' % str(err))
1246                     return
1247                 self.record_download_archive(info_dict)
1248
1249     def download(self, url_list):
1250         """Download a given list of URLs."""
1251         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1252         if (len(url_list) > 1 and
1253                 '%' not in outtmpl
1254                 and self.params.get('max_downloads') != 1):
1255             raise SameFileError(outtmpl)
1256
1257         for url in url_list:
1258             try:
1259                 # It also downloads the videos
1260                 res = self.extract_info(url)
1261             except UnavailableVideoError:
1262                 self.report_error('unable to download video')
1263             except MaxDownloadsReached:
1264                 self.to_screen('[info] Maximum number of downloaded files reached.')
1265                 raise
1266             else:
1267                 if self.params.get('dump_single_json', False):
1268                     self.to_stdout(json.dumps(res))
1269
1270         return self._download_retcode
1271
1272     def download_with_info_file(self, info_filename):
1273         with io.open(info_filename, 'r', encoding='utf-8') as f:
1274             info = json.load(f)
1275         try:
1276             self.process_ie_result(info, download=True)
1277         except DownloadError:
1278             webpage_url = info.get('webpage_url')
1279             if webpage_url is not None:
1280                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1281                 return self.download([webpage_url])
1282             else:
1283                 raise
1284         return self._download_retcode
1285
1286     def post_process(self, filename, ie_info):
1287         """Run all the postprocessors on the given file."""
1288         info = dict(ie_info)
1289         info['filepath'] = filename
1290         pps_chain = []
1291         if ie_info.get('__postprocessors') is not None:
1292             pps_chain.extend(ie_info['__postprocessors'])
1293         pps_chain.extend(self._pps)
1294         for pp in pps_chain:
1295             keep_video = None
1296             old_filename = info['filepath']
1297             try:
1298                 keep_video_wish, info = pp.run(info)
1299                 if keep_video_wish is not None:
1300                     if keep_video_wish:
1301                         keep_video = keep_video_wish
1302                     elif keep_video is None:
1303                         # No clear decision yet, let IE decide
1304                         keep_video = keep_video_wish
1305             except PostProcessingError as e:
1306                 self.report_error(e.msg)
1307             if keep_video is False and not self.params.get('keepvideo', False):
1308                 try:
1309                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1310                     os.remove(encodeFilename(old_filename))
1311                 except (IOError, OSError):
1312                     self.report_warning('Unable to remove downloaded video file')
1313
1314     def _make_archive_id(self, info_dict):
1315         # Future-proof against any change in case
1316         # and backwards compatibility with prior versions
1317         extractor = info_dict.get('extractor_key')
1318         if extractor is None:
1319             if 'id' in info_dict:
1320                 extractor = info_dict.get('ie_key')  # key in a playlist
1321         if extractor is None:
1322             return None  # Incomplete video information
1323         return extractor.lower() + ' ' + info_dict['id']
1324
1325     def in_download_archive(self, info_dict):
1326         fn = self.params.get('download_archive')
1327         if fn is None:
1328             return False
1329
1330         vid_id = self._make_archive_id(info_dict)
1331         if vid_id is None:
1332             return False  # Incomplete video information
1333
1334         try:
1335             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1336                 for line in archive_file:
1337                     if line.strip() == vid_id:
1338                         return True
1339         except IOError as ioe:
1340             if ioe.errno != errno.ENOENT:
1341                 raise
1342         return False
1343
1344     def record_download_archive(self, info_dict):
1345         fn = self.params.get('download_archive')
1346         if fn is None:
1347             return
1348         vid_id = self._make_archive_id(info_dict)
1349         assert vid_id
1350         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1351             archive_file.write(vid_id + '\n')
1352
1353     @staticmethod
1354     def format_resolution(format, default='unknown'):
1355         if format.get('vcodec') == 'none':
1356             return 'audio only'
1357         if format.get('resolution') is not None:
1358             return format['resolution']
1359         if format.get('height') is not None:
1360             if format.get('width') is not None:
1361                 res = '%sx%s' % (format['width'], format['height'])
1362             else:
1363                 res = '%sp' % format['height']
1364         elif format.get('width') is not None:
1365             res = '?x%d' % format['width']
1366         else:
1367             res = default
1368         return res
1369
1370     def _format_note(self, fdict):
1371         res = ''
1372         if fdict.get('ext') in ['f4f', 'f4m']:
1373             res += '(unsupported) '
1374         if fdict.get('format_note') is not None:
1375             res += fdict['format_note'] + ' '
1376         if fdict.get('tbr') is not None:
1377             res += '%4dk ' % fdict['tbr']
1378         if fdict.get('container') is not None:
1379             if res:
1380                 res += ', '
1381             res += '%s container' % fdict['container']
1382         if (fdict.get('vcodec') is not None and
1383                 fdict.get('vcodec') != 'none'):
1384             if res:
1385                 res += ', '
1386             res += fdict['vcodec']
1387             if fdict.get('vbr') is not None:
1388                 res += '@'
1389         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1390             res += 'video@'
1391         if fdict.get('vbr') is not None:
1392             res += '%4dk' % fdict['vbr']
1393         if fdict.get('fps') is not None:
1394             res += ', %sfps' % fdict['fps']
1395         if fdict.get('acodec') is not None:
1396             if res:
1397                 res += ', '
1398             if fdict['acodec'] == 'none':
1399                 res += 'video only'
1400             else:
1401                 res += '%-5s' % fdict['acodec']
1402         elif fdict.get('abr') is not None:
1403             if res:
1404                 res += ', '
1405             res += 'audio'
1406         if fdict.get('abr') is not None:
1407             res += '@%3dk' % fdict['abr']
1408         if fdict.get('asr') is not None:
1409             res += ' (%5dHz)' % fdict['asr']
1410         if fdict.get('filesize') is not None:
1411             if res:
1412                 res += ', '
1413             res += format_bytes(fdict['filesize'])
1414         elif fdict.get('filesize_approx') is not None:
1415             if res:
1416                 res += ', '
1417             res += '~' + format_bytes(fdict['filesize_approx'])
1418         return res
1419
1420     def list_formats(self, info_dict):
1421         def line(format, idlen=20):
1422             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1423                 format['format_id'],
1424                 format['ext'],
1425                 self.format_resolution(format),
1426                 self._format_note(format),
1427             ))
1428
1429         formats = info_dict.get('formats', [info_dict])
1430         idlen = max(len('format code'),
1431                     max(len(f['format_id']) for f in formats))
1432         formats_s = [
1433             line(f, idlen) for f in formats
1434             if f.get('preference') is None or f['preference'] >= -1000]
1435         if len(formats) > 1:
1436             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1437             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1438
1439         header_line = line({
1440             'format_id': 'format code', 'ext': 'extension',
1441             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1442         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1443                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1444
1445     def urlopen(self, req):
1446         """ Start an HTTP download """
1447
1448         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1449         # always respected by websites, some tend to give out URLs with non percent-encoded
1450         # non-ASCII characters (see telemb.py, ard.py [#3412])
1451         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1452         # To work around aforementioned issue we will replace request's original URL with
1453         # percent-encoded one
1454         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1455         url = req if req_is_string else req.get_full_url()
1456         url_escaped = escape_url(url)
1457
1458         # Substitute URL if any change after escaping
1459         if url != url_escaped:
1460             if req_is_string:
1461                 req = url_escaped
1462             else:
1463                 req = compat_urllib_request.Request(
1464                     url_escaped, data=req.data, headers=req.headers,
1465                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1466
1467         return self._opener.open(req, timeout=self._socket_timeout)
1468
1469     def print_debug_header(self):
1470         if not self.params.get('verbose'):
1471             return
1472
1473         if type('') is not compat_str:
1474             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1475             self.report_warning(
1476                 'Your Python is broken! Update to a newer and supported version')
1477
1478         stdout_encoding = getattr(
1479             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1480         encoding_str = (
1481             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1482                 locale.getpreferredencoding(),
1483                 sys.getfilesystemencoding(),
1484                 stdout_encoding,
1485                 self.get_encoding()))
1486         write_string(encoding_str, encoding=None)
1487
1488         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1489         try:
1490             sp = subprocess.Popen(
1491                 ['git', 'rev-parse', '--short', 'HEAD'],
1492                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1493                 cwd=os.path.dirname(os.path.abspath(__file__)))
1494             out, err = sp.communicate()
1495             out = out.decode().strip()
1496             if re.match('[0-9a-f]+', out):
1497                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1498         except:
1499             try:
1500                 sys.exc_clear()
1501             except:
1502                 pass
1503         self._write_string('[debug] Python version %s - %s\n' % (
1504             platform.python_version(), platform_name()))
1505
1506         exe_versions = FFmpegPostProcessor.get_versions()
1507         exe_versions['rtmpdump'] = rtmpdump_version()
1508         exe_str = ', '.join(
1509             '%s %s' % (exe, v)
1510             for exe, v in sorted(exe_versions.items())
1511             if v
1512         )
1513         if not exe_str:
1514             exe_str = 'none'
1515         self._write_string('[debug] exe versions: %s\n' % exe_str)
1516
1517         proxy_map = {}
1518         for handler in self._opener.handlers:
1519             if hasattr(handler, 'proxies'):
1520                 proxy_map.update(handler.proxies)
1521         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1522
1523         if self.params.get('call_home', False):
1524             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1525             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1526             latest_version = self.urlopen(
1527                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1528             if version_tuple(latest_version) > version_tuple(__version__):
1529                 self.report_warning(
1530                     'You are using an outdated version (newest version: %s)! '
1531                     'See https://yt-dl.org/update if you need help updating.' %
1532                     latest_version)
1533
1534     def _setup_opener(self):
1535         timeout_val = self.params.get('socket_timeout')
1536         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1537
1538         opts_cookiefile = self.params.get('cookiefile')
1539         opts_proxy = self.params.get('proxy')
1540
1541         if opts_cookiefile is None:
1542             self.cookiejar = compat_cookiejar.CookieJar()
1543         else:
1544             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1545                 opts_cookiefile)
1546             if os.access(opts_cookiefile, os.R_OK):
1547                 self.cookiejar.load()
1548
1549         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1550             self.cookiejar)
1551         if opts_proxy is not None:
1552             if opts_proxy == '':
1553                 proxies = {}
1554             else:
1555                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1556         else:
1557             proxies = compat_urllib_request.getproxies()
1558             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1559             if 'http' in proxies and 'https' not in proxies:
1560                 proxies['https'] = proxies['http']
1561         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1562
1563         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1564         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1565         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1566         opener = compat_urllib_request.build_opener(
1567             https_handler, proxy_handler, cookie_processor, ydlh)
1568         # Delete the default user-agent header, which would otherwise apply in
1569         # cases where our custom HTTP handler doesn't come into play
1570         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1571         opener.addheaders = []
1572         self._opener = opener
1573
1574     def encode(self, s):
1575         if isinstance(s, bytes):
1576             return s  # Already encoded
1577
1578         try:
1579             return s.encode(self.get_encoding())
1580         except UnicodeEncodeError as err:
1581             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1582             raise
1583
1584     def get_encoding(self):
1585         encoding = self.params.get('encoding')
1586         if encoding is None:
1587             encoding = preferredencoding()
1588         return encoding