Changed repo name to yt-dlp

[yt-dlp.git] / youtube_dlc / YoutubeDL.py
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py

index 2cc02e46fad4e8f28447e789a6f3dc491e8c7d0a..2d3eacfebdbb29eea385c0e10cfb60a155355456 100644 (file)
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@@ -99,6 +99,7 @@
      YoutubeDLCookieProcessor,
      YoutubeDLHandler,
      YoutubeDLRedirectHandler,
+    process_communicate_or_kill,
  )
  from .cache import Cache
  from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@ -178,9 +179,11 @@ class YoutubeDL(object):
      outtmpl:           Template for output names.
      restrictfilenames: Do not allow "&" and spaces in file names.
      trim_file_name:    Limit length of filename (extension excluded).
-    ignoreerrors:      Do not stop on download errors.
+    ignoreerrors:      Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
      force_generic_extractor: Force downloader to use the generic extractor
-    nooverwrites:      Prevent overwriting files.
+    overwrites:        Overwrite all video and metadata files if True,
+                       overwrite only non-video files if None
+                       and don't overwrite any file if False
      playliststart:     Playlist item to start at.
      playlistend:       Playlist item to end at.
      playlist_items:    Specific indices of playlist to download.
@@ -252,6 +255,9 @@ class YoutubeDL(object):
                                 youtube_dlc/postprocessor/__init__.py for a list.
                         as well as any further keyword arguments for the
                         postprocessor.
+    post_hooks:        A list of functions that get called as the final step
+                       for each video file, after all postprocessors have been
+                       called. The filename will be passed as the only argument.
      progress_hooks:    A list of functions that get called on download
                         progress, with a dictionary with the entries
                         * status: One of "downloading", "error", or "finished".
@@ -333,8 +339,9 @@ class YoutubeDL(object):
                         otherwise prefer ffmpeg.
      ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
                         to the binary or its containing directory.
-    postprocessor_args: A list of additional command-line arguments for the
-                        postprocessor.
+    postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
+                        of additional command-line arguments for the postprocessor.
+                        Use 'default' as the name for arguments to passed to all PP.
  
      The following options are used by the Youtube extractor:
      youtube_include_dash_manifest: If True (default), DASH manifests and related
@@ -368,6 +375,7 @@ def __init__(self, params=None, auto_init=True):
          self._ies = []
          self._ies_instances = {}
          self._pps = []
+        self._post_hooks = []
          self._progress_hooks = []
          self._download_retcode = 0
          self._num_downloads = 0
@@ -471,6 +479,9 @@ def check_deprecated(param, option, suggestion):
              pp = pp_class(self, **compat_kwargs(pp_def))
              self.add_post_processor(pp)
  
+        for ph in self.params.get('post_hooks', []):
+            self.add_post_hook(ph)
+
          for ph in self.params.get('progress_hooks', []):
              self.add_progress_hook(ph)
  
@@ -523,6 +534,10 @@ def add_post_processor(self, pp):
          self._pps.append(pp)
          pp.set_downloader(self)
  
+    def add_post_hook(self, ph):
+        """Add the post hook"""
+        self._post_hooks.append(ph)
+
      def add_progress_hook(self, ph):
          """Add the progress hook (currently only for the file downloader)"""
          self._progress_hooks.append(ph)
@@ -577,7 +592,7 @@ def to_console_title(self, message):
                  # already of type unicode()
                  ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
          elif 'TERM' in os.environ:
-            self._write_string('\033]0;%s\007' % message, self._screen_file)
+            self._write_string('\033[0;%s\007' % message, self._screen_file)
  
      def save_console_title(self):
          if not self.params.get('consoletitle', False):
@@ -673,6 +688,13 @@ def report_file_already_downloaded(self, file_name):
          except UnicodeEncodeError:
              self.to_screen('[download] The file has already been downloaded')
  
+    def report_file_delete(self, file_name):
+        """Report that existing file will be deleted."""
+        try:
+            self.to_screen('Deleting already existent file %s' % file_name)
+        except UnicodeEncodeError:
+            self.to_screen('Deleting already existent file')
+
      def prepare_filename(self, info_dict):
          """Generate the output filename."""
          try:
@@ -908,6 +930,10 @@ def add_default_extra_info(self, ie_result, ie, url):
          self.add_extra_info(ie_result, {
              'extractor': ie.IE_NAME,
              'webpage_url': url,
+            'duration_string': (
+                formatSeconds(ie_result['duration'], '-')
+                if ie_result.get('duration', None) is not None
+                else None),
              'webpage_url_basename': url_basename(url),
              'extractor_key': ie.ie_key(),
          })
@@ -1185,23 +1211,20 @@ def can_merge():
              merger = FFmpegMergerPP(self)
              return merger.available and merger.can_merge()
  
-        def prefer_best():
-            if self.params.get('simulate', False):
-                return False
-            if not download:
-                return False
-            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
-                return True
-            if info_dict.get('is_live'):
-                return True
-            if not can_merge():
-                return True
-            return False
-
-        req_format_list = ['bestvideo+bestaudio', 'best']
-        if prefer_best():
-            req_format_list.reverse()
-        return '/'.join(req_format_list)
+        prefer_best = (
+            not self.params.get('simulate', False)
+            and download
+            and (
+                not can_merge()
+                or info_dict.get('is_live', False)
+                or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
+
+        return (
+            'best/bestvideo+bestaudio'
+            if prefer_best
+            else 'bestvideo*+bestaudio/best'
+            if not self.params.get('allow_multiple_audio_streams', False)
+            else 'bestvideo+bestaudio/best')
  
      def build_format_selector(self, format_spec):
          def syntax_error(note, start):
@@ -1216,8 +1239,8 @@ def syntax_error(note, start):
          GROUP = 'GROUP'
          FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
  
-        allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),
-                                  'video': self.params.get('allow_multiple_video_streams', True)}
+        allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
+                                  'video': self.params.get('allow_multiple_video_streams', False)}
  
          def _parse_filter(tokens):
              filter_parts = []
@@ -1705,7 +1728,7 @@ def is_wellformed(f):
          if req_format is None:
              req_format = self._default_format_spec(info_dict, download=download)
              if self.params.get('verbose'):
-                self.to_stdout('[debug] Default format spec: %s' % req_format)
+                self._write_string('[debug] Default format spec: %s\n' % req_format)
  
          format_selector = self.build_format_selector(req_format)
  
@@ -1884,7 +1907,7 @@ def ensure_dir_exists(path):
  
          if self.params.get('writedescription', False):
              descfn = replace_extension(filename, 'description', info_dict.get('ext'))
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
+            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
                  self.to_screen('[info] Video description is already present')
              elif info_dict.get('description') is None:
                  self.report_warning('There\'s no description to write.')
@@ -1899,7 +1922,7 @@ def ensure_dir_exists(path):
  
          if self.params.get('writeannotations', False):
              annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
                  self.to_screen('[info] Video annotations are already present')
              elif not info_dict.get('annotations'):
                  self.report_warning('There are no annotations to write.')
@@ -1919,7 +1942,7 @@ def dl(name, info, subtitle=False):
              for ph in self._progress_hooks:
                  fd.add_progress_hook(ph)
              if self.params.get('verbose'):
-                self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
+                self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
              return fd.download(name, info, subtitle)
  
          subtitles_are_requested = any([self.params.get('writesubtitles', False),
@@ -1933,7 +1956,7 @@ def dl(name, info, subtitle=False):
              for sub_lang, sub_info in subtitles.items():
                  sub_format = sub_info['ext']
                  sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
-                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+                if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
                      self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
                  else:
                      self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
@@ -1988,7 +2011,7 @@ def dl(name, info, subtitle=False):
  
          if self.params.get('writeinfojson', False):
              infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
+            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
                  self.to_screen('[info] Video description metadata is already present')
              else:
                  self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
@@ -2096,11 +2119,15 @@ def compatible_formats(formats):
                              'Requested formats are incompatible for merge and will be merged into mkv.')
                      # Ensure filename always has a correct extension for successful merge
                      filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
-                    if os.path.exists(encodeFilename(filename)):
+                    file_exists = os.path.exists(encodeFilename(filename))
+                    if not self.params.get('overwrites', False) and file_exists:
                          self.to_screen(
                              '[download] %s has already been downloaded and '
                              'merged' % filename)
                      else:
+                        if file_exists:
+                            self.report_file_delete(filename)
+                            os.remove(encodeFilename(filename))
                          for f in requested_formats:
                              new_info = dict(info_dict)
                              new_info.update(f)
@@ -2117,6 +2144,11 @@ def compatible_formats(formats):
                          # Even if there were no downloads, it is being merged only now
                          info_dict['__real_download'] = True
                  else:
+                    # Delete existing file with --yes-overwrites
+                    if self.params.get('overwrites', False):
+                        if os.path.exists(encodeFilename(filename)):
+                            self.report_file_delete(filename)
+                            os.remove(encodeFilename(filename))
                      # Just a single file
                      success, real_download = dl(filename, info_dict)
                      info_dict['__real_download'] = real_download
@@ -2197,10 +2229,19 @@ def compatible_formats(formats):
                  except (PostProcessingError) as err:
                      self.report_error('postprocessing: %s' % str(err))
                      return
+                try:
+                    for ph in self._post_hooks:
+                        ph(filename)
+                except Exception as err:
+                    self.report_error('post hooks: %s' % str(err))
+                    return
                  must_record_download_archive = True
  
          if must_record_download_archive or self.params.get('force_write_download_archive', False):
              self.record_download_archive(info_dict)
+        max_downloads = self.params.get('max_downloads')
+        if max_downloads is not None and self._num_downloads >= int(max_downloads):
+            raise MaxDownloadsReached()
  
      def download(self, url_list):
          """Download a given list of URLs."""
@@ -2491,7 +2532,7 @@ def print_debug_header(self):
                  self.get_encoding()))
          write_string(encoding_str, encoding=None)
  
-        self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
+        self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
          if _LAZY_LOADER:
              self._write_string('[debug] Lazy loading extractors enabled' + '\n')
          try:
@@ -2499,7 +2540,7 @@ def print_debug_header(self):
                  ['git', 'rev-parse', '--short', 'HEAD'],
                  stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                  cwd=os.path.dirname(os.path.abspath(__file__)))
-            out, err = sp.communicate()
+            out, err = process_communicate_or_kill(sp)
              out = out.decode().strip()
              if re.match('[0-9a-f]+', out):
                  self._write_string('[debug] Git HEAD: ' + out + '\n')
@@ -2540,6 +2581,7 @@ def python_implementation():
          if self.params.get('call_home', False):
              ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
              self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+            return
              latest_version = self.urlopen(
                  'https://yt-dl.org/latest/version').read().decode('utf-8')
              if version_tuple(latest_version) > version_tuple(__version__):
@@ -2635,9 +2677,9 @@ def _write_thumbnails(self, info_dict, filename):
              thumb_ext = determine_ext(t['url'], 'jpg')
              suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
              thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
-            t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+            t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
  
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
                  self.to_screen('[%s] %s: Thumbnail %sis already present' %
                                 (info_dict['extractor'], info_dict['id'], thumb_display_id))
              else: