Allow running some `postprocessors` before actual download

author pukkandan <redacted>

Sat, 10 Apr 2021 22:18:07 +0000 (03:48 +0530)

committer pukkandan <redacted>

Sun, 11 Apr 2021 20:42:46 +0000 (02:12 +0530)
author pukkandan <redacted>
Sat, 10 Apr 2021 22:18:07 +0000 (03:48 +0530)
committer pukkandan <redacted>
Sun, 11 Apr 2021 20:42:46 +0000 (02:12 +0530)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 600ba6ee1a729278a3f1dae69bc4db1b84bdac2f..e1ce8c9b30314b10c3ad54a1f239011784c550c7 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -291,10 +291,9 @@ class YoutubeDL(object):
      postprocessors:    A list of dictionaries, each with an entry
                         * key:  The name of the postprocessor. See
                                 yt_dlp/postprocessor/__init__.py for a list.
-                       * _after_move: Optional. If True, run this post_processor
-                               after 'MoveFilesAfterDownload'
-                       as well as any further keyword arguments for the
-                       postprocessor.
+                       * when: When to run the postprocessor. Can be one of
+                               pre_process|before_dl|post_process|after_move.
+                               Assumed to be 'post_process' if not given
      post_hooks:        A list of functions that get called as the final step
                         for each video file, after all postprocessors have been
                         called. The filename will be passed as the only argument.
@@ -423,7 +422,7 @@ class YoutubeDL(object):
  
      params = None
      _ies = []
-    _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+    _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
      __prepare_filename_warned = False
      _first_webpage_request = True
      _download_retcode = None
@@ -438,7 +437,7 @@ def __init__(self, params=None, auto_init=True):
              params = {}
          self._ies = []
          self._ies_instances = {}
-        self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+        self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
          self.__prepare_filename_warned = False
          self._first_webpage_request = True
          self._post_hooks = []
@@ -551,7 +550,7 @@ def check_deprecated(param, option, suggestion):
                  when = pp_def['when']
                  del pp_def['when']
              else:
-                when = 'normal'
+                when = 'post_process'
              pp = pp_class(self, **compat_kwargs(pp_def))
              self.add_post_processor(pp, when=when)
  
@@ -605,7 +604,7 @@ def add_default_info_extractors(self):
          for ie in gen_extractor_classes():
              self.add_info_extractor(ie)
  
-    def add_post_processor(self, pp, when='normal'):
+    def add_post_processor(self, pp, when='post_process'):
          """Add a PostProcessor object to the end of the chain."""
          self._pps[when].append(pp)
          pp.set_downloader(self)
@@ -2114,13 +2113,12 @@ def process_info(self, info_dict):
          self.post_extract(info_dict)
          self._num_downloads += 1
  
-        info_dict = self.pre_process(info_dict)
+        info_dict, _ = self.pre_process(info_dict)
  
          # info_dict['_filename'] needs to be set for backward compatibility
          info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
          temp_filename = self.prepare_filename(info_dict, 'temp')
          files_to_move = {}
-        skip_dl = self.params.get('skip_download', False)
  
          # Forced printings
          self.__forced_printings(info_dict, full_filename, incomplete=False)
@@ -2197,11 +2195,9 @@ def dl(name, info, subtitle=False):
              # ie = self.get_info_extractor(info_dict['extractor_key'])
              for sub_lang, sub_info in subtitles.items():
                  sub_format = sub_info['ext']
-                sub_fn = self.prepare_filename(info_dict, 'subtitle')
-                sub_filename = subtitles_filename(
-                    temp_filename if not skip_dl else sub_fn,
-                    sub_lang, sub_format, info_dict.get('ext'))
-                sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
+                sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
+                sub_filename_final = subtitles_filename(
+                    self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
                  if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
                      self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
                      sub_info['filepath'] = sub_filename
@@ -2229,28 +2225,6 @@ def dl(name, info, subtitle=False):
                                                  (sub_lang, error_to_compat_str(err)))
                              continue
  
-        if skip_dl:
-            if self.params.get('convertsubtitles', False):
-                # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
-                filename_real_ext = os.path.splitext(full_filename)[1][1:]
-                filename_wo_ext = (
-                    os.path.splitext(full_filename)[0]
-                    if filename_real_ext == info_dict['ext']
-                    else full_filename)
-                afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
-                # if subconv.available:
-                #     info_dict['__postprocessors'].append(subconv)
-                if os.path.exists(encodeFilename(afilename)):
-                    self.to_screen(
-                        '[download] %s has already been downloaded and '
-                        'converted' % afilename)
-                else:
-                    try:
-                        self.post_process(full_filename, info_dict, files_to_move)
-                    except PostProcessingError as err:
-                        self.report_error('Postprocessing: %s' % str(err))
-                        return
-
          if self.params.get('writeinfojson', False):
              infofn = self.prepare_filename(info_dict, 'infojson')
              if not self._ensure_dir_exists(encodeFilename(infofn)):
@@ -2266,11 +2240,10 @@ def dl(name, info, subtitle=False):
                      return
              info_dict['__infojson_filename'] = infofn
  
-        thumbfn = self.prepare_filename(info_dict, 'thumbnail')
-        thumb_fn_temp = temp_filename if not skip_dl else thumbfn
-        for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
-            thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
-            thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
+        for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
+            thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
+            thumb_filename = replace_extension(
+                self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
              files_to_move[thumb_filename_temp] = thumb_filename
  
          # Write internet shortcut files
@@ -2322,9 +2295,20 @@ def _write_link_file(extension, template, newline, embed_filename):
              if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
                  return
  
-        # Download
+        try:
+            info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
+        except PostProcessingError as err:
+            self.report_error('Preprocessing: %s' % str(err))
+            return
+
          must_record_download_archive = False
-        if not skip_dl:
+        if self.params.get('skip_download', False):
+            info_dict['filepath'] = temp_filename
+            info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+            info_dict['__files_to_move'] = files_to_move
+            info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
+        else:
+            # Download
              try:
  
                  def existing_file(*filepaths):
@@ -2633,11 +2617,12 @@ def actual_post_extract(info_dict):
  
          actual_post_extract(info_dict or {})
  
-    def pre_process(self, ie_info):
+    def pre_process(self, ie_info, key='pre_process', files_to_move=None):
          info = dict(ie_info)
-        for pp in self._pps['beforedl']:
+        info['__files_to_move'] = files_to_move or {}
+        for pp in self._pps[key]:
              info = self.run_pp(pp, info)
-        return info
+        return info, info.pop('__files_to_move', None)
  
      def post_process(self, filename, ie_info, files_to_move=None):
          """Run all the postprocessors on the given file."""
@@ -2645,11 +2630,11 @@ def post_process(self, filename, ie_info, files_to_move=None):
          info['filepath'] = filename
          info['__files_to_move'] = files_to_move or {}
  
-        for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
+        for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
              info = self.run_pp(pp, info)
          info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
          del info['__files_to_move']
-        for pp in self._pps['aftermove']:
+        for pp in self._pps['after_move']:
              info = self.run_pp(pp, info)
          return info
  
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py

index bf728e6139dcb2f77e63e13199204dfe1d81e315..efb852891ef5f43a773260b686ea3b13c71739a1 100644 (file)
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -228,7 +228,7 @@ def parse_retries(retries, name=''):
          if not re.match(remux_regex, opts.remuxvideo):
              parser.error('invalid video remux format specified')
      if opts.convertsubtitles is not None:
-        if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
+        if opts.convertsubtitles not in ('srt', 'vtt', 'ass', 'lrc'):
              parser.error('invalid subtitle format specified')
  
      if opts.date is not None:
@@ -322,7 +322,15 @@ def report_conflict(arg1, arg2):
          postprocessors.append({
              'key': 'MetadataFromField',
              'formats': opts.metafromfield,
-            'when': 'beforedl'
+            # Run this immediately after extraction is complete
+            'when': 'pre_process'
+        })
+    if opts.convertsubtitles:
+        postprocessors.append({
+            'key': 'FFmpegSubtitlesConvertor',
+            'format': opts.convertsubtitles,
+            # Run this before the actual video download
+            'when': 'before_dl'
          })
      if opts.extractaudio:
          postprocessors.append({
@@ -351,15 +359,11 @@ def report_conflict(arg1, arg2):
      # so metadata can be added here.
      if opts.addmetadata:
          postprocessors.append({'key': 'FFmpegMetadata'})
-    if opts.convertsubtitles:
-        postprocessors.append({
-            'key': 'FFmpegSubtitlesConvertor',
-            'format': opts.convertsubtitles,
-        })
      if opts.embedsubtitles:
          already_have_subtitle = opts.writesubtitles
          postprocessors.append({
              'key': 'FFmpegEmbedSubtitle',
+            # already_have_subtitle = True prevents the file from being deleted after embedding
              'already_have_subtitle': already_have_subtitle
          })
          if not already_have_subtitle:
@@ -385,6 +389,7 @@ def report_conflict(arg1, arg2):
          already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
          postprocessors.append({
              'key': 'EmbedThumbnail',
+            # already_have_thumbnail = True prevents the file from being deleted after embedding
              'already_have_thumbnail': already_have_thumbnail
          })
          if not already_have_thumbnail:
@@ -399,7 +404,8 @@ def report_conflict(arg1, arg2):
          postprocessors.append({
              'key': 'ExecAfterDownload',
              'exec_cmd': opts.exec_cmd,
-            'when': 'aftermove'
+            # Run this only after the files have been moved to their final locations
+            'when': 'after_move'
          })
  
      def report_args_compat(arg, name):
@@ -425,7 +431,6 @@ def report_args_compat(arg, name):
          else match_filter_func(opts.match_filter))
  
      ydl_opts = {
-        'convertsubtitles': opts.convertsubtitles,
          'usenetrc': opts.usenetrc,
          'username': opts.username,
          'password': opts.password,
diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py

index 905fbeb31748966349698233dfcaf69b608fe2d1..25124161a77dd28541a80fff00c83195344f9651 100644 (file)
--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@@ -1,7 +1,6 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-
  import os
  import subprocess
  import struct
diff --git a/yt_dlp/postprocessor/movefilesafterdownload.py b/yt_dlp/postprocessor/movefilesafterdownload.py

index 0ab7744ca6ead31fc1a046dc59ce794fc4232cdb..1064a8cb8d639fd870b90cddb2fb26bb48db1028 100644 (file)
--- a/yt_dlp/postprocessor/movefilesafterdownload.py
+++ b/yt_dlp/postprocessor/movefilesafterdownload.py
@@ -13,6 +13,10 @@
  
  class MoveFilesAfterDownloadPP(PostProcessor):
  
+    def __init__(self, downloader=None, downloaded=True):
+        PostProcessor.__init__(self, downloader)
+        self._downloaded = downloaded
+
      @classmethod
      def pp_key(cls):
          return 'MoveFiles'
@@ -21,7 +25,8 @@ def run(self, info):
          dl_path, dl_name = os.path.split(encodeFilename(info['filepath']))
          finaldir = info.get('__finaldir', dl_path)
          finalpath = os.path.join(finaldir, dl_name)
-        info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
+        if self._downloaded:
+            info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
  
          make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old))))
          for oldfile, newfile in info['__files_to_move'].items():
author	pukkandan <redacted>
	Sat, 10 Apr 2021 22:18:07 +0000 (03:48 +0530)
committer	pukkandan <redacted>
	Sun, 11 Apr 2021 20:42:46 +0000 (02:12 +0530)
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/__init__.py		patch \| blob \| blame \| history
yt_dlp/postprocessor/embedthumbnail.py		patch \| blob \| blame \| history
yt_dlp/postprocessor/movefilesafterdownload.py		patch \| blob \| blame \| history