youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     ContentTooShortError,
  32     date_from_str,
  33     DateRange,
  34     determine_ext,
  35     DownloadError,
  36     encodeFilename,
  37     ExtractorError,
  38     format_bytes,
  39     formatSeconds,
  40     get_term_width,
  41     locked_file,
  42     make_HTTPS_handler,
  43     MaxDownloadsReached,
  44     PagedList,
  45     PostProcessingError,
  46     platform_name,
  47     preferredencoding,
  48     SameFileError,
  49     sanitize_filename,
  50     subtitles_filename,
  51     takewhile_inclusive,
  52     UnavailableVideoError,
  53     url_basename,
  54     write_json_file,
  55     write_string,
  56     YoutubeDLHandler,
  57     prepend_extension,
  58 )
  59 from .extractor import get_info_extractor, gen_extractors
  60 from .downloader import get_suitable_downloader
  61 from .postprocessor import FFmpegMergerPP
  62 from .version import __version__
  63
  64
  65 class YoutubeDL(object):
  66     """YoutubeDL class.
  67
  68     YoutubeDL objects are the ones responsible of downloading the
  69     actual video file and writing it to disk if the user has requested
  70     it, among some other tasks. In most cases there should be one per
  71     program. As, given a video URL, the downloader doesn't know how to
  72     extract all the needed information, task that InfoExtractors do, it
  73     has to pass the URL to one of them.
  74
  75     For this, YoutubeDL objects have a method that allows
  76     InfoExtractors to be registered in a given order. When it is passed
  77     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  78     finds that reports being able to handle it. The InfoExtractor extracts
  79     all the information about the video or videos the URL refers to, and
  80     YoutubeDL process the extracted information, possibly using a File
  81     Downloader to download the video.
  82
  83     YoutubeDL objects accept a lot of parameters. In order not to saturate
  84     the object constructor with arguments, it receives a dictionary of
  85     options instead. These options are available through the params
  86     attribute for the InfoExtractors to use. The YoutubeDL also
  87     registers itself as the downloader in charge for the InfoExtractors
  88     that are added to it, so this is a "mutual registration".
  89
  90     Available options:
  91
  92     username:          Username for authentication purposes.
  93     password:          Password for authentication purposes.
  94     videopassword:     Password for acces a video.
  95     usenetrc:          Use netrc for authentication instead.
  96     verbose:           Print additional info to stdout.
  97     quiet:             Do not print messages to stdout.
  98     no_warnings:       Do not print out anything for warnings.
  99     forceurl:          Force printing final URL.
 100     forcetitle:        Force printing title.
 101     forceid:           Force printing ID.
 102     forcethumbnail:    Force printing thumbnail URL.
 103     forcedescription:  Force printing description.
 104     forcefilename:     Force printing final filename.
 105     forceduration:     Force printing duration.
 106     forcejson:         Force printing info_dict as JSON.
 107     simulate:          Do not download the video files.
 108     format:            Video format code.
 109     format_limit:      Highest quality format to try.
 110     outtmpl:           Template for output names.
 111     restrictfilenames: Do not allow "&" and spaces in file names
 112     ignoreerrors:      Do not stop on download errors.
 113     nooverwrites:      Prevent overwriting files.
 114     playliststart:     Playlist item to start at.
 115     playlistend:       Playlist item to end at.
 116     matchtitle:        Download only matching titles.
 117     rejecttitle:       Reject downloads for matching titles.
 118     logger:            Log messages to a logging.Logger instance.
 119     logtostderr:       Log messages to stderr instead of stdout.
 120     writedescription:  Write the video description to a .description file
 121     writeinfojson:     Write the video description to a .info.json file
 122     writeannotations:  Write the video annotations to a .annotations.xml file
 123     writethumbnail:    Write the thumbnail image to a file
 124     writesubtitles:    Write the video subtitles to a file
 125     writeautomaticsub: Write the automatic subtitles to a file
 126     allsubtitles:      Downloads all the subtitles of the video
 127                        (requires writesubtitles or writeautomaticsub)
 128     listsubtitles:     Lists all available subtitles for the video
 129     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 130     subtitleslangs:    List of languages of the subtitles to download
 131     keepvideo:         Keep the video file after post-processing
 132     daterange:         A DateRange object, download only if the upload_date is in the range.
 133     skip_download:     Skip the actual download of the video file
 134     cachedir:          Location of the cache files in the filesystem.
 135                        None to disable filesystem cache.
 136     noplaylist:        Download single video instead of a playlist if in doubt.
 137     age_limit:         An integer representing the user's age in years.
 138                        Unsuitable videos for the given age are skipped.
 139     min_views:         An integer representing the minimum view count the video
 140                        must have in order to not be skipped.
 141                        Videos without view count information are always
 142                        downloaded. None for no limit.
 143     max_views:         An integer representing the maximum view count.
 144                        Videos that are more popular than that are not
 145                        downloaded.
 146                        Videos without view count information are always
 147                        downloaded. None for no limit.
 148     download_archive:  File name of a file where all downloads are recorded.
 149                        Videos already present in the file are not downloaded
 150                        again.
 151     cookiefile:        File name where cookies should be read from and dumped to.
 152     nocheckcertificate:Do not verify SSL certificates
 153     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 154                        At the moment, this is only supported by YouTube.
 155     proxy:             URL of the proxy server to use
 156     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 157     bidi_workaround:   Work around buggy terminals without bidirectional text
 158                        support, using fridibi
 159     debug_printtraffic:Print out sent and received HTTP traffic
 160     include_ads:       Download ads as well
 161     default_search:    Prepend this string if an input url is not valid.
 162                        'auto' for elaborate guessing
 163     encoding:          Use this encoding instead of the system-specified.
 164
 165     The following parameters are not used by YoutubeDL itself, they are used by
 166     the FileDownloader:
 167     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 168     noresizebuffer, retries, continuedl, noprogress, consoletitle
 169
 170     The following options are used by the post processors:
 171     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 172                        otherwise prefer avconv.
 173     """
 174
 175     params = None
 176     _ies = []
 177     _pps = []
 178     _download_retcode = None
 179     _num_downloads = None
 180     _screen_file = None
 181
 182     def __init__(self, params=None):
 183         """Create a FileDownloader object with the given options."""
 184         if params is None:
 185             params = {}
 186         self._ies = []
 187         self._ies_instances = {}
 188         self._pps = []
 189         self._progress_hooks = []
 190         self._download_retcode = 0
 191         self._num_downloads = 0
 192         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 193         self._err_file = sys.stderr
 194         self.params = params
 195
 196         if params.get('bidi_workaround', False):
 197             try:
 198                 import pty
 199                 master, slave = pty.openpty()
 200                 width = get_term_width()
 201                 if width is None:
 202                     width_args = []
 203                 else:
 204                     width_args = ['-w', str(width)]
 205                 sp_kwargs = dict(
 206                     stdin=subprocess.PIPE,
 207                     stdout=slave,
 208                     stderr=self._err_file)
 209                 try:
 210                     self._output_process = subprocess.Popen(
 211                         ['bidiv'] + width_args, **sp_kwargs
 212                     )
 213                 except OSError:
 214                     self._output_process = subprocess.Popen(
 215                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 216                 self._output_channel = os.fdopen(master, 'rb')
 217             except OSError as ose:
 218                 if ose.errno == 2:
 219                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 220                 else:
 221                     raise
 222
 223         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 224                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 225                 and not params['restrictfilenames']):
 226             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 227             self.report_warning(
 228                 'Assuming --restrict-filenames since file system encoding '
 229                 'cannot encode all charactes. '
 230                 'Set the LC_ALL environment variable to fix this.')
 231             self.params['restrictfilenames'] = True
 232
 233         if '%(stitle)s' in self.params.get('outtmpl', ''):
 234             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 235
 236         self._setup_opener()
 237
 238     def add_info_extractor(self, ie):
 239         """Add an InfoExtractor object to the end of the list."""
 240         self._ies.append(ie)
 241         self._ies_instances[ie.ie_key()] = ie
 242         ie.set_downloader(self)
 243
 244     def get_info_extractor(self, ie_key):
 245         """
 246         Get an instance of an IE with name ie_key, it will try to get one from
 247         the _ies list, if there's no instance it will create a new one and add
 248         it to the extractor list.
 249         """
 250         ie = self._ies_instances.get(ie_key)
 251         if ie is None:
 252             ie = get_info_extractor(ie_key)()
 253             self.add_info_extractor(ie)
 254         return ie
 255
 256     def add_default_info_extractors(self):
 257         """
 258         Add the InfoExtractors returned by gen_extractors to the end of the list
 259         """
 260         for ie in gen_extractors():
 261             self.add_info_extractor(ie)
 262
 263     def add_post_processor(self, pp):
 264         """Add a PostProcessor object to the end of the chain."""
 265         self._pps.append(pp)
 266         pp.set_downloader(self)
 267
 268     def add_progress_hook(self, ph):
 269         """Add the progress hook (currently only for the file downloader)"""
 270         self._progress_hooks.append(ph)
 271
 272     def _bidi_workaround(self, message):
 273         if not hasattr(self, '_output_channel'):
 274             return message
 275
 276         assert hasattr(self, '_output_process')
 277         assert type(message) == type('')
 278         line_count = message.count('\n') + 1
 279         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 280         self._output_process.stdin.flush()
 281         res = ''.join(self._output_channel.readline().decode('utf-8')
 282                        for _ in range(line_count))
 283         return res[:-len('\n')]
 284
 285     def to_screen(self, message, skip_eol=False):
 286         """Print message to stdout if not in quiet mode."""
 287         return self.to_stdout(message, skip_eol, check_quiet=True)
 288
 289     def _write_string(self, s, out=None):
 290         write_string(s, out=out, encoding=self.params.get('encoding'))
 291
 292     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 293         """Print message to stdout if not in quiet mode."""
 294         if self.params.get('logger'):
 295             self.params['logger'].debug(message)
 296         elif not check_quiet or not self.params.get('quiet', False):
 297             message = self._bidi_workaround(message)
 298             terminator = ['\n', ''][skip_eol]
 299             output = message + terminator
 300
 301             self._write_string(output, self._screen_file)
 302
 303     def to_stderr(self, message):
 304         """Print message to stderr."""
 305         assert type(message) == type('')
 306         if self.params.get('logger'):
 307             self.params['logger'].error(message)
 308         else:
 309             message = self._bidi_workaround(message)
 310             output = message + '\n'
 311             self._write_string(output, self._err_file)
 312
 313     def to_console_title(self, message):
 314         if not self.params.get('consoletitle', False):
 315             return
 316         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 317             # c_wchar_p() might not be necessary if `message` is
 318             # already of type unicode()
 319             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 320         elif 'TERM' in os.environ:
 321             self._write_string('\033]0;%s\007' % message, self._screen_file)
 322
 323     def save_console_title(self):
 324         if not self.params.get('consoletitle', False):
 325             return
 326         if 'TERM' in os.environ:
 327             # Save the title on stack
 328             self._write_string('\033[22;0t', self._screen_file)
 329
 330     def restore_console_title(self):
 331         if not self.params.get('consoletitle', False):
 332             return
 333         if 'TERM' in os.environ:
 334             # Restore the title from stack
 335             self._write_string('\033[23;0t', self._screen_file)
 336
 337     def __enter__(self):
 338         self.save_console_title()
 339         return self
 340
 341     def __exit__(self, *args):
 342         self.restore_console_title()
 343
 344         if self.params.get('cookiefile') is not None:
 345             self.cookiejar.save()
 346
 347     def trouble(self, message=None, tb=None):
 348         """Determine action to take when a download problem appears.
 349
 350         Depending on if the downloader has been configured to ignore
 351         download errors or not, this method may throw an exception or
 352         not when errors are found, after printing the message.
 353
 354         tb, if given, is additional traceback information.
 355         """
 356         if message is not None:
 357             self.to_stderr(message)
 358         if self.params.get('verbose'):
 359             if tb is None:
 360                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 361                     tb = ''
 362                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 363                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 364                     tb += compat_str(traceback.format_exc())
 365                 else:
 366                     tb_data = traceback.format_list(traceback.extract_stack())
 367                     tb = ''.join(tb_data)
 368             self.to_stderr(tb)
 369         if not self.params.get('ignoreerrors', False):
 370             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 371                 exc_info = sys.exc_info()[1].exc_info
 372             else:
 373                 exc_info = sys.exc_info()
 374             raise DownloadError(message, exc_info)
 375         self._download_retcode = 1
 376
 377     def report_warning(self, message):
 378         '''
 379         Print the message to stderr, it will be prefixed with 'WARNING:'
 380         If stderr is a tty file the 'WARNING:' will be colored
 381         '''
 382         if self.params.get('logger') is not None:
 383             self.params['logger'].warning(message)
 384         else:
 385             if self.params.get('no_warnings'):
 386                 return
 387             if self._err_file.isatty() and os.name != 'nt':
 388                 _msg_header = '\033[0;33mWARNING:\033[0m'
 389             else:
 390                 _msg_header = 'WARNING:'
 391             warning_message = '%s %s' % (_msg_header, message)
 392             self.to_stderr(warning_message)
 393
 394     def report_error(self, message, tb=None):
 395         '''
 396         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 397         in red if stderr is a tty file.
 398         '''
 399         if self._err_file.isatty() and os.name != 'nt':
 400             _msg_header = '\033[0;31mERROR:\033[0m'
 401         else:
 402             _msg_header = 'ERROR:'
 403         error_message = '%s %s' % (_msg_header, message)
 404         self.trouble(error_message, tb)
 405
 406     def report_file_already_downloaded(self, file_name):
 407         """Report file has already been fully downloaded."""
 408         try:
 409             self.to_screen('[download] %s has already been downloaded' % file_name)
 410         except UnicodeEncodeError:
 411             self.to_screen('[download] The file has already been downloaded')
 412
 413     def prepare_filename(self, info_dict):
 414         """Generate the output filename."""
 415         try:
 416             template_dict = dict(info_dict)
 417
 418             template_dict['epoch'] = int(time.time())
 419             autonumber_size = self.params.get('autonumber_size')
 420             if autonumber_size is None:
 421                 autonumber_size = 5
 422             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 423             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 424             if template_dict.get('playlist_index') is not None:
 425                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
 426             if template_dict.get('resolution') is None:
 427                 if template_dict.get('width') and template_dict.get('height'):
 428                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 429                 elif template_dict.get('height'):
 430                     template_dict['resolution'] = '%sp' % template_dict['height']
 431                 elif template_dict.get('width'):
 432                     template_dict['resolution'] = '?x%d' % template_dict['width']
 433
 434             sanitize = lambda k, v: sanitize_filename(
 435                 compat_str(v),
 436                 restricted=self.params.get('restrictfilenames'),
 437                 is_id=(k == 'id'))
 438             template_dict = dict((k, sanitize(k, v))
 439                                  for k, v in template_dict.items()
 440                                  if v is not None)
 441             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 442
 443             tmpl = os.path.expanduser(self.params['outtmpl'])
 444             filename = tmpl % template_dict
 445             return filename
 446         except ValueError as err:
 447             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 448             return None
 449
 450     def _match_entry(self, info_dict):
 451         """ Returns None iff the file should be downloaded """
 452
 453         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 454         if 'title' in info_dict:
 455             # This can happen when we're just evaluating the playlist
 456             title = info_dict['title']
 457             matchtitle = self.params.get('matchtitle', False)
 458             if matchtitle:
 459                 if not re.search(matchtitle, title, re.IGNORECASE):
 460                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 461             rejecttitle = self.params.get('rejecttitle', False)
 462             if rejecttitle:
 463                 if re.search(rejecttitle, title, re.IGNORECASE):
 464                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 465         date = info_dict.get('upload_date', None)
 466         if date is not None:
 467             dateRange = self.params.get('daterange', DateRange())
 468             if date not in dateRange:
 469                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 470         view_count = info_dict.get('view_count', None)
 471         if view_count is not None:
 472             min_views = self.params.get('min_views')
 473             if min_views is not None and view_count < min_views:
 474                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 475             max_views = self.params.get('max_views')
 476             if max_views is not None and view_count > max_views:
 477                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 478         age_limit = self.params.get('age_limit')
 479         if age_limit is not None:
 480             if age_limit < info_dict.get('age_limit', 0):
 481                 return 'Skipping "' + title + '" because it is age restricted'
 482         if self.in_download_archive(info_dict):
 483             return '%s has already been recorded in archive' % video_title
 484         return None
 485
 486     @staticmethod
 487     def add_extra_info(info_dict, extra_info):
 488         '''Set the keys from extra_info in info dict if they are missing'''
 489         for key, value in extra_info.items():
 490             info_dict.setdefault(key, value)
 491
 492     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 493                      process=True):
 494         '''
 495         Returns a list with a dictionary for each video we find.
 496         If 'download', also downloads the videos.
 497         extra_info is a dict containing the extra values to add to each result
 498          '''
 499
 500         if ie_key:
 501             ies = [self.get_info_extractor(ie_key)]
 502         else:
 503             ies = self._ies
 504
 505         for ie in ies:
 506             if not ie.suitable(url):
 507                 continue
 508
 509             if not ie.working():
 510                 self.report_warning('The program functionality for this site has been marked as broken, '
 511                                     'and will probably not work.')
 512
 513             try:
 514                 ie_result = ie.extract(url)
 515                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 516                     break
 517                 if isinstance(ie_result, list):
 518                     # Backwards compatibility: old IE result format
 519                     ie_result = {
 520                         '_type': 'compat_list',
 521                         'entries': ie_result,
 522                     }
 523                 self.add_default_extra_info(ie_result, ie, url)
 524                 if process:
 525                     return self.process_ie_result(ie_result, download, extra_info)
 526                 else:
 527                     return ie_result
 528             except ExtractorError as de: # An error we somewhat expected
 529                 self.report_error(compat_str(de), de.format_traceback())
 530                 break
 531             except MaxDownloadsReached:
 532                 raise
 533             except Exception as e:
 534                 if self.params.get('ignoreerrors', False):
 535                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 536                     break
 537                 else:
 538                     raise
 539         else:
 540             self.report_error('no suitable InfoExtractor for URL %s' % url)
 541
 542     def add_default_extra_info(self, ie_result, ie, url):
 543         self.add_extra_info(ie_result, {
 544             'extractor': ie.IE_NAME,
 545             'webpage_url': url,
 546             'webpage_url_basename': url_basename(url),
 547             'extractor_key': ie.ie_key(),
 548         })
 549
 550     def process_ie_result(self, ie_result, download=True, extra_info={}):
 551         """
 552         Take the result of the ie(may be modified) and resolve all unresolved
 553         references (URLs, playlist items).
 554
 555         It will also download the videos if 'download'.
 556         Returns the resolved ie_result.
 557         """
 558
 559         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 560         if result_type == 'video':
 561             self.add_extra_info(ie_result, extra_info)
 562             return self.process_video_result(ie_result, download=download)
 563         elif result_type == 'url':
 564             # We have to add extra_info to the results because it may be
 565             # contained in a playlist
 566             return self.extract_info(ie_result['url'],
 567                                      download,
 568                                      ie_key=ie_result.get('ie_key'),
 569                                      extra_info=extra_info)
 570         elif result_type == 'url_transparent':
 571             # Use the information from the embedding page
 572             info = self.extract_info(
 573                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 574                 extra_info=extra_info, download=False, process=False)
 575
 576             def make_result(embedded_info):
 577                 new_result = ie_result.copy()
 578                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 579                           'entries', 'ie_key', 'duration',
 580                           'subtitles', 'annotations', 'format',
 581                           'thumbnail', 'thumbnails'):
 582                     if f in new_result:
 583                         del new_result[f]
 584                     if f in embedded_info:
 585                         new_result[f] = embedded_info[f]
 586                 return new_result
 587             new_result = make_result(info)
 588
 589             assert new_result.get('_type') != 'url_transparent'
 590             if new_result.get('_type') == 'compat_list':
 591                 new_result['entries'] = [
 592                     make_result(e) for e in new_result['entries']]
 593
 594             return self.process_ie_result(
 595                 new_result, download=download, extra_info=extra_info)
 596         elif result_type == 'playlist':
 597             # We process each entry in the playlist
 598             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 599             self.to_screen('[download] Downloading playlist: %s' % playlist)
 600
 601             playlist_results = []
 602
 603             playliststart = self.params.get('playliststart', 1) - 1
 604             playlistend = self.params.get('playlistend', None)
 605             # For backwards compatibility, interpret -1 as whole list
 606             if playlistend == -1:
 607                 playlistend = None
 608
 609             if isinstance(ie_result['entries'], list):
 610                 n_all_entries = len(ie_result['entries'])
 611                 entries = ie_result['entries'][playliststart:playlistend]
 612                 n_entries = len(entries)
 613                 self.to_screen(
 614                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 615                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 616             else:
 617                 assert isinstance(ie_result['entries'], PagedList)
 618                 entries = ie_result['entries'].getslice(
 619                     playliststart, playlistend)
 620                 n_entries = len(entries)
 621                 self.to_screen(
 622                     "[%s] playlist %s: Downloading %d videos" %
 623                     (ie_result['extractor'], playlist, n_entries))
 624
 625             for i, entry in enumerate(entries, 1):
 626                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 627                 extra = {
 628                     'playlist': playlist,
 629                     'playlist_index': i + playliststart,
 630                     'extractor': ie_result['extractor'],
 631                     'webpage_url': ie_result['webpage_url'],
 632                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 633                     'extractor_key': ie_result['extractor_key'],
 634                 }
 635
 636                 reason = self._match_entry(entry)
 637                 if reason is not None:
 638                     self.to_screen('[download] ' + reason)
 639                     continue
 640
 641                 entry_result = self.process_ie_result(entry,
 642                                                       download=download,
 643                                                       extra_info=extra)
 644                 playlist_results.append(entry_result)
 645             ie_result['entries'] = playlist_results
 646             return ie_result
 647         elif result_type == 'compat_list':
 648             def _fixup(r):
 649                 self.add_extra_info(r,
 650                     {
 651                         'extractor': ie_result['extractor'],
 652                         'webpage_url': ie_result['webpage_url'],
 653                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 654                         'extractor_key': ie_result['extractor_key'],
 655                     })
 656                 return r
 657             ie_result['entries'] = [
 658                 self.process_ie_result(_fixup(r), download, extra_info)
 659                 for r in ie_result['entries']
 660             ]
 661             return ie_result
 662         else:
 663             raise Exception('Invalid result type: %s' % result_type)
 664
 665     def select_format(self, format_spec, available_formats):
 666         if format_spec == 'best' or format_spec is None:
 667             return available_formats[-1]
 668         elif format_spec == 'worst':
 669             return available_formats[0]
 670         elif format_spec == 'bestaudio':
 671             audio_formats = [
 672                 f for f in available_formats
 673                 if f.get('vcodec') == 'none']
 674             if audio_formats:
 675                 return audio_formats[-1]
 676         elif format_spec == 'worstaudio':
 677             audio_formats = [
 678                 f for f in available_formats
 679                 if f.get('vcodec') == 'none']
 680             if audio_formats:
 681                 return audio_formats[0]
 682         elif format_spec == 'bestvideo':
 683             video_formats = [
 684                 f for f in available_formats
 685                 if f.get('acodec') == 'none']
 686             if video_formats:
 687                 return video_formats[-1]
 688         elif format_spec == 'worstvideo':
 689             video_formats = [
 690                 f for f in available_formats
 691                 if f.get('acodec') == 'none']
 692             if video_formats:
 693                 return video_formats[0]
 694         else:
 695             extensions = ['mp4', 'flv', 'webm', '3gp']
 696             if format_spec in extensions:
 697                 filter_f = lambda f: f['ext'] == format_spec
 698             else:
 699                 filter_f = lambda f: f['format_id'] == format_spec
 700             matches = list(filter(filter_f, available_formats))
 701             if matches:
 702                 return matches[-1]
 703         return None
 704
 705     def process_video_result(self, info_dict, download=True):
 706         assert info_dict.get('_type', 'video') == 'video'
 707
 708         if 'id' not in info_dict:
 709             raise ExtractorError('Missing "id" field in extractor result')
 710         if 'title' not in info_dict:
 711             raise ExtractorError('Missing "title" field in extractor result')
 712
 713         if 'playlist' not in info_dict:
 714             # It isn't part of a playlist
 715             info_dict['playlist'] = None
 716             info_dict['playlist_index'] = None
 717
 718         if 'display_id' not in info_dict and 'id' in info_dict:
 719             info_dict['display_id'] = info_dict['id']
 720
 721         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 722             upload_date = datetime.datetime.utcfromtimestamp(
 723                 info_dict['timestamp'])
 724             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 725
 726         # This extractors handle format selection themselves
 727         if info_dict['extractor'] in ['Youku']:
 728             if download:
 729                 self.process_info(info_dict)
 730             return info_dict
 731
 732         # We now pick which formats have to be downloaded
 733         if info_dict.get('formats') is None:
 734             # There's only one format available
 735             formats = [info_dict]
 736         else:
 737             formats = info_dict['formats']
 738
 739         if not formats:
 740             raise ExtractorError('No video formats found!')
 741
 742         # We check that all the formats have the format and format_id fields
 743         for i, format in enumerate(formats):
 744             if 'url' not in format:
 745                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 746
 747             if format.get('format_id') is None:
 748                 format['format_id'] = compat_str(i)
 749             if format.get('format') is None:
 750                 format['format'] = '{id} - {res}{note}'.format(
 751                     id=format['format_id'],
 752                     res=self.format_resolution(format),
 753                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 754                 )
 755             # Automatically determine file extension if missing
 756             if 'ext' not in format:
 757                 format['ext'] = determine_ext(format['url']).lower()
 758
 759         format_limit = self.params.get('format_limit', None)
 760         if format_limit:
 761             formats = list(takewhile_inclusive(
 762                 lambda f: f['format_id'] != format_limit, formats
 763             ))
 764
 765         # TODO Central sorting goes here
 766
 767         if formats[0] is not info_dict:
 768             # only set the 'formats' fields if the original info_dict list them
 769             # otherwise we end up with a circular reference, the first (and unique)
 770             # element in the 'formats' field in info_dict is info_dict itself,
 771             # wich can't be exported to json
 772             info_dict['formats'] = formats
 773         if self.params.get('listformats', None):
 774             self.list_formats(info_dict)
 775             return
 776
 777         req_format = self.params.get('format')
 778         if req_format is None:
 779             req_format = 'best'
 780         formats_to_download = []
 781         # The -1 is for supporting YoutubeIE
 782         if req_format in ('-1', 'all'):
 783             formats_to_download = formats
 784         else:
 785             # We can accept formats requested in the format: 34/5/best, we pick
 786             # the first that is available, starting from left
 787             req_formats = req_format.split('/')
 788             for rf in req_formats:
 789                 if re.match(r'.+?\+.+?', rf) is not None:
 790                     # Two formats have been requested like '137+139'
 791                     format_1, format_2 = rf.split('+')
 792                     formats_info = (self.select_format(format_1, formats),
 793                         self.select_format(format_2, formats))
 794                     if all(formats_info):
 795                         selected_format = {
 796                             'requested_formats': formats_info,
 797                             'format': rf,
 798                             'ext': formats_info[0]['ext'],
 799                         }
 800                     else:
 801                         selected_format = None
 802                 else:
 803                     selected_format = self.select_format(rf, formats)
 804                 if selected_format is not None:
 805                     formats_to_download = [selected_format]
 806                     break
 807         if not formats_to_download:
 808             raise ExtractorError('requested format not available',
 809                                  expected=True)
 810
 811         if download:
 812             if len(formats_to_download) > 1:
 813                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 814             for format in formats_to_download:
 815                 new_info = dict(info_dict)
 816                 new_info.update(format)
 817                 self.process_info(new_info)
 818         # We update the info dict with the best quality format (backwards compatibility)
 819         info_dict.update(formats_to_download[-1])
 820         return info_dict
 821
 822     def process_info(self, info_dict):
 823         """Process a single resolved IE result."""
 824
 825         assert info_dict.get('_type', 'video') == 'video'
 826
 827         max_downloads = self.params.get('max_downloads')
 828         if max_downloads is not None:
 829             if self._num_downloads >= int(max_downloads):
 830                 raise MaxDownloadsReached()
 831
 832         info_dict['fulltitle'] = info_dict['title']
 833         if len(info_dict['title']) > 200:
 834             info_dict['title'] = info_dict['title'][:197] + '...'
 835
 836         # Keep for backwards compatibility
 837         info_dict['stitle'] = info_dict['title']
 838
 839         if not 'format' in info_dict:
 840             info_dict['format'] = info_dict['ext']
 841
 842         reason = self._match_entry(info_dict)
 843         if reason is not None:
 844             self.to_screen('[download] ' + reason)
 845             return
 846
 847         self._num_downloads += 1
 848
 849         filename = self.prepare_filename(info_dict)
 850
 851         # Forced printings
 852         if self.params.get('forcetitle', False):
 853             self.to_stdout(info_dict['fulltitle'])
 854         if self.params.get('forceid', False):
 855             self.to_stdout(info_dict['id'])
 856         if self.params.get('forceurl', False):
 857             # For RTMP URLs, also include the playpath
 858             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 859         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 860             self.to_stdout(info_dict['thumbnail'])
 861         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 862             self.to_stdout(info_dict['description'])
 863         if self.params.get('forcefilename', False) and filename is not None:
 864             self.to_stdout(filename)
 865         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 866             self.to_stdout(formatSeconds(info_dict['duration']))
 867         if self.params.get('forceformat', False):
 868             self.to_stdout(info_dict['format'])
 869         if self.params.get('forcejson', False):
 870             info_dict['_filename'] = filename
 871             self.to_stdout(json.dumps(info_dict))
 872
 873         # Do nothing else if in simulate mode
 874         if self.params.get('simulate', False):
 875             return
 876
 877         if filename is None:
 878             return
 879
 880         try:
 881             dn = os.path.dirname(encodeFilename(filename))
 882             if dn and not os.path.exists(dn):
 883                 os.makedirs(dn)
 884         except (OSError, IOError) as err:
 885             self.report_error('unable to create directory ' + compat_str(err))
 886             return
 887
 888         if self.params.get('writedescription', False):
 889             descfn = filename + '.description'
 890             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 891                 self.to_screen('[info] Video description is already present')
 892             else:
 893                 try:
 894                     self.to_screen('[info] Writing video description to: ' + descfn)
 895                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 896                         descfile.write(info_dict['description'])
 897                 except (KeyError, TypeError):
 898                     self.report_warning('There\'s no description to write.')
 899                 except (OSError, IOError):
 900                     self.report_error('Cannot write description file ' + descfn)
 901                     return
 902
 903         if self.params.get('writeannotations', False):
 904             annofn = filename + '.annotations.xml'
 905             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 906                 self.to_screen('[info] Video annotations are already present')
 907             else:
 908                 try:
 909                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 910                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 911                         annofile.write(info_dict['annotations'])
 912                 except (KeyError, TypeError):
 913                     self.report_warning('There are no annotations to write.')
 914                 except (OSError, IOError):
 915                     self.report_error('Cannot write annotations file: ' + annofn)
 916                     return
 917
 918         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 919                                        self.params.get('writeautomaticsub')])
 920
 921         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 922             # subtitles download errors are already managed as troubles in relevant IE
 923             # that way it will silently go on when used with unsupporting IE
 924             subtitles = info_dict['subtitles']
 925             sub_format = self.params.get('subtitlesformat', 'srt')
 926             for sub_lang in subtitles.keys():
 927                 sub = subtitles[sub_lang]
 928                 if sub is None:
 929                     continue
 930                 try:
 931                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 932                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 933                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 934                     else:
 935                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 936                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 937                                 subfile.write(sub)
 938                 except (OSError, IOError):
 939                     self.report_error('Cannot write subtitles file ' + sub_filename)
 940                     return
 941
 942         if self.params.get('writeinfojson', False):
 943             infofn = os.path.splitext(filename)[0] + '.info.json'
 944             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 945                 self.to_screen('[info] Video description metadata is already present')
 946             else:
 947                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 948                 try:
 949                     write_json_file(info_dict, encodeFilename(infofn))
 950                 except (OSError, IOError):
 951                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 952                     return
 953
 954         if self.params.get('writethumbnail', False):
 955             if info_dict.get('thumbnail') is not None:
 956                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 957                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 958                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 959                     self.to_screen('[%s] %s: Thumbnail is already present' %
 960                                    (info_dict['extractor'], info_dict['id']))
 961                 else:
 962                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 963                                    (info_dict['extractor'], info_dict['id']))
 964                     try:
 965                         uf = self.urlopen(info_dict['thumbnail'])
 966                         with open(thumb_filename, 'wb') as thumbf:
 967                             shutil.copyfileobj(uf, thumbf)
 968                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 969                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 970                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 971                         self.report_warning('Unable to download thumbnail "%s": %s' %
 972                             (info_dict['thumbnail'], compat_str(err)))
 973
 974         if not self.params.get('skip_download', False):
 975             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 976                 success = True
 977             else:
 978                 try:
 979                     def dl(name, info):
 980                         fd = get_suitable_downloader(info)(self, self.params)
 981                         for ph in self._progress_hooks:
 982                             fd.add_progress_hook(ph)
 983                         return fd.download(name, info)
 984                     if info_dict.get('requested_formats') is not None:
 985                         downloaded = []
 986                         success = True
 987                         merger = FFmpegMergerPP(self)
 988                         if not merger._get_executable():
 989                             postprocessors = []
 990                             self.report_warning('You have requested multiple '
 991                                 'formats but ffmpeg or avconv are not installed.'
 992                                 ' The formats won\'t be merged')
 993                         else:
 994                             postprocessors = [merger]
 995                         for f in info_dict['requested_formats']:
 996                             new_info = dict(info_dict)
 997                             new_info.update(f)
 998                             fname = self.prepare_filename(new_info)
 999                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1000                             downloaded.append(fname)
1001                             partial_success = dl(fname, new_info)
1002                             success = success and partial_success
1003                         info_dict['__postprocessors'] = postprocessors
1004                         info_dict['__files_to_merge'] = downloaded
1005                     else:
1006                         # Just a single file
1007                         success = dl(filename, info_dict)
1008                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1009                     self.report_error('unable to download video data: %s' % str(err))
1010                     return
1011                 except (OSError, IOError) as err:
1012                     raise UnavailableVideoError(err)
1013                 except (ContentTooShortError, ) as err:
1014                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1015                     return
1016
1017             if success:
1018                 try:
1019                     self.post_process(filename, info_dict)
1020                 except (PostProcessingError) as err:
1021                     self.report_error('postprocessing: %s' % str(err))
1022                     return
1023
1024         self.record_download_archive(info_dict)
1025
1026     def download(self, url_list):
1027         """Download a given list of URLs."""
1028         if (len(url_list) > 1 and
1029                 '%' not in self.params['outtmpl']
1030                 and self.params.get('max_downloads') != 1):
1031             raise SameFileError(self.params['outtmpl'])
1032
1033         for url in url_list:
1034             try:
1035                 #It also downloads the videos
1036                 self.extract_info(url)
1037             except UnavailableVideoError:
1038                 self.report_error('unable to download video')
1039             except MaxDownloadsReached:
1040                 self.to_screen('[info] Maximum number of downloaded files reached.')
1041                 raise
1042
1043         return self._download_retcode
1044
1045     def download_with_info_file(self, info_filename):
1046         with io.open(info_filename, 'r', encoding='utf-8') as f:
1047             info = json.load(f)
1048         try:
1049             self.process_ie_result(info, download=True)
1050         except DownloadError:
1051             webpage_url = info.get('webpage_url')
1052             if webpage_url is not None:
1053                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1054                 return self.download([webpage_url])
1055             else:
1056                 raise
1057         return self._download_retcode
1058
1059     def post_process(self, filename, ie_info):
1060         """Run all the postprocessors on the given file."""
1061         info = dict(ie_info)
1062         info['filepath'] = filename
1063         keep_video = None
1064         pps_chain = []
1065         if ie_info.get('__postprocessors') is not None:
1066             pps_chain.extend(ie_info['__postprocessors'])
1067         pps_chain.extend(self._pps)
1068         for pp in pps_chain:
1069             try:
1070                 keep_video_wish, new_info = pp.run(info)
1071                 if keep_video_wish is not None:
1072                     if keep_video_wish:
1073                         keep_video = keep_video_wish
1074                     elif keep_video is None:
1075                         # No clear decision yet, let IE decide
1076                         keep_video = keep_video_wish
1077             except PostProcessingError as e:
1078                 self.report_error(e.msg)
1079         if keep_video is False and not self.params.get('keepvideo', False):
1080             try:
1081                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1082                 os.remove(encodeFilename(filename))
1083             except (IOError, OSError):
1084                 self.report_warning('Unable to remove downloaded video file')
1085
1086     def _make_archive_id(self, info_dict):
1087         # Future-proof against any change in case
1088         # and backwards compatibility with prior versions
1089         extractor = info_dict.get('extractor_key')
1090         if extractor is None:
1091             if 'id' in info_dict:
1092                 extractor = info_dict.get('ie_key')  # key in a playlist
1093         if extractor is None:
1094             return None  # Incomplete video information
1095         return extractor.lower() + ' ' + info_dict['id']
1096
1097     def in_download_archive(self, info_dict):
1098         fn = self.params.get('download_archive')
1099         if fn is None:
1100             return False
1101
1102         vid_id = self._make_archive_id(info_dict)
1103         if vid_id is None:
1104             return False  # Incomplete video information
1105
1106         try:
1107             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1108                 for line in archive_file:
1109                     if line.strip() == vid_id:
1110                         return True
1111         except IOError as ioe:
1112             if ioe.errno != errno.ENOENT:
1113                 raise
1114         return False
1115
1116     def record_download_archive(self, info_dict):
1117         fn = self.params.get('download_archive')
1118         if fn is None:
1119             return
1120         vid_id = self._make_archive_id(info_dict)
1121         assert vid_id
1122         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1123             archive_file.write(vid_id + '\n')
1124
1125     @staticmethod
1126     def format_resolution(format, default='unknown'):
1127         if format.get('vcodec') == 'none':
1128             return 'audio only'
1129         if format.get('resolution') is not None:
1130             return format['resolution']
1131         if format.get('height') is not None:
1132             if format.get('width') is not None:
1133                 res = '%sx%s' % (format['width'], format['height'])
1134             else:
1135                 res = '%sp' % format['height']
1136         elif format.get('width') is not None:
1137             res = '?x%d' % format['width']
1138         else:
1139             res = default
1140         return res
1141
1142     def _format_note(self, fdict):
1143         res = ''
1144         if fdict.get('ext') in ['f4f', 'f4m']:
1145             res += '(unsupported) '
1146         if fdict.get('format_note') is not None:
1147             res += fdict['format_note'] + ' '
1148         if fdict.get('tbr') is not None:
1149             res += '%4dk ' % fdict['tbr']
1150         if fdict.get('container') is not None:
1151             if res:
1152                 res += ', '
1153             res += '%s container' % fdict['container']
1154         if (fdict.get('vcodec') is not None and
1155                 fdict.get('vcodec') != 'none'):
1156             if res:
1157                 res += ', '
1158             res += fdict['vcodec']
1159             if fdict.get('vbr') is not None:
1160                 res += '@'
1161         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1162             res += 'video@'
1163         if fdict.get('vbr') is not None:
1164             res += '%4dk' % fdict['vbr']
1165         if fdict.get('acodec') is not None:
1166             if res:
1167                 res += ', '
1168             if fdict['acodec'] == 'none':
1169                 res += 'video only'
1170             else:
1171                 res += '%-5s' % fdict['acodec']
1172         elif fdict.get('abr') is not None:
1173             if res:
1174                 res += ', '
1175             res += 'audio'
1176         if fdict.get('abr') is not None:
1177             res += '@%3dk' % fdict['abr']
1178         if fdict.get('asr') is not None:
1179             res += ' (%5dHz)' % fdict['asr']
1180         if fdict.get('filesize') is not None:
1181             if res:
1182                 res += ', '
1183             res += format_bytes(fdict['filesize'])
1184         return res
1185
1186     def list_formats(self, info_dict):
1187         def line(format, idlen=20):
1188             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1189                 format['format_id'],
1190                 format['ext'],
1191                 self.format_resolution(format),
1192                 self._format_note(format),
1193             ))
1194
1195         formats = info_dict.get('formats', [info_dict])
1196         idlen = max(len('format code'),
1197                     max(len(f['format_id']) for f in formats))
1198         formats_s = [line(f, idlen) for f in formats]
1199         if len(formats) > 1:
1200             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1201             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1202
1203         header_line = line({
1204             'format_id': 'format code', 'ext': 'extension',
1205             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1206         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1207                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1208
1209     def urlopen(self, req):
1210         """ Start an HTTP download """
1211         return self._opener.open(req, timeout=self._socket_timeout)
1212
1213     def print_debug_header(self):
1214         if not self.params.get('verbose'):
1215             return
1216
1217         write_string(
1218             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1219                 locale.getpreferredencoding(),
1220                 sys.getfilesystemencoding(),
1221                 sys.stdout.encoding,
1222                 self.get_encoding()),
1223             encoding=None
1224         )
1225
1226         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1227         try:
1228             sp = subprocess.Popen(
1229                 ['git', 'rev-parse', '--short', 'HEAD'],
1230                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1231                 cwd=os.path.dirname(os.path.abspath(__file__)))
1232             out, err = sp.communicate()
1233             out = out.decode().strip()
1234             if re.match('[0-9a-f]+', out):
1235                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1236         except:
1237             try:
1238                 sys.exc_clear()
1239             except:
1240                 pass
1241         self._write_string('[debug] Python version %s - %s' %
1242                      (platform.python_version(), platform_name()) + '\n')
1243
1244         proxy_map = {}
1245         for handler in self._opener.handlers:
1246             if hasattr(handler, 'proxies'):
1247                 proxy_map.update(handler.proxies)
1248         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1249
1250     def _setup_opener(self):
1251         timeout_val = self.params.get('socket_timeout')
1252         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1253
1254         opts_cookiefile = self.params.get('cookiefile')
1255         opts_proxy = self.params.get('proxy')
1256
1257         if opts_cookiefile is None:
1258             self.cookiejar = compat_cookiejar.CookieJar()
1259         else:
1260             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1261                 opts_cookiefile)
1262             if os.access(opts_cookiefile, os.R_OK):
1263                 self.cookiejar.load()
1264
1265         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1266             self.cookiejar)
1267         if opts_proxy is not None:
1268             if opts_proxy == '':
1269                 proxies = {}
1270             else:
1271                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1272         else:
1273             proxies = compat_urllib_request.getproxies()
1274             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1275             if 'http' in proxies and 'https' not in proxies:
1276                 proxies['https'] = proxies['http']
1277         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1278
1279         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1280         https_handler = make_HTTPS_handler(
1281             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1282         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1283         opener = compat_urllib_request.build_opener(
1284             https_handler, proxy_handler, cookie_processor, ydlh)
1285         # Delete the default user-agent header, which would otherwise apply in
1286         # cases where our custom HTTP handler doesn't come into play
1287         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1288         opener.addheaders = []
1289         self._opener = opener
1290
1291     def encode(self, s):
1292         if isinstance(s, bytes):
1293             return s  # Already encoded
1294
1295         try:
1296             return s.encode(self.get_encoding())
1297         except UnicodeEncodeError as err:
1298             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1299             raise
1300
1301     def get_encoding(self):
1302         encoding = self.params.get('encoding')
1303         if encoding is None:
1304             encoding = preferredencoding()
1305         return encoding