Added option `--break-on-reject`

author pukkandan <redacted>

Wed, 13 Jan 2021 01:01:01 +0000 (06:31 +0530)

committer pukkandan <redacted>

Wed, 13 Jan 2021 01:14:35 +0000 (06:44 +0530)
author pukkandan <redacted>
Wed, 13 Jan 2021 01:01:01 +0000 (06:31 +0530)
committer pukkandan <redacted>
Wed, 13 Jan 2021 01:14:35 +0000 (06:44 +0530)
diff --git a/README.md b/README.md

index 80fe87536833ad44474855d66d35ddae715badc2..ef70223cd76246defbb376fb544ec55078ba11e4 100644 (file)
--- a/README.md
+++ b/README.md
@@ -250,8 +250,10 @@ ## Video Selection:
      --download-archive FILE          Download only videos not listed in the
                                       archive file. Record the IDs of all
                                       downloaded videos in it.
-    --break-on-existing              Stop the download process after attempting
-                                     to download a file that's in the archive.
+    --break-on-existing              Stop the download process when encountering
+                                     a file that's in the archive.
+    --break-on-reject                Stop the download process when encountering
+                                     a file that has been filtered out.
      --no-download-archive            Do not use archive file (default)
      --include-ads                    Download advertisements as well
                                       (experimental)
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py

index 2d3eacfebdbb29eea385c0e10cfb60a155355456..dadf500c4d30bd1f63165876cda709881c47e3b7 100644 (file)
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@@ -58,6 +58,7 @@
      encode_compat_str,
      encodeFilename,
      error_to_compat_str,
+    ExistingVideoReached,
      expand_path,
      ExtractorError,
      format_bytes,
@@ -81,6 +82,7 @@
      register_socks_protocols,
      render_table,
      replace_extension,
+    RejectedVideoReached,
      SameFileError,
      sanitize_filename,
      sanitize_path,
@@ -232,6 +234,7 @@ class YoutubeDL(object):
                         again.
      break_on_existing: Stop the download process after attempting to download a file that's
                         in the archive.
+    break_on_reject:   Stop the download process when encountering a video that has been filtered out.
      cookiefile:        File name where cookies should be read from and dumped to.
      nocheckcertificate:Do not verify SSL certificates
      prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
@@ -797,44 +800,53 @@ def prepare_filename(self, info_dict):
      def _match_entry(self, info_dict, incomplete):
          """ Returns None if the file should be downloaded """
  
-        video_title = info_dict.get('title', info_dict.get('id', 'video'))
-        if 'title' in info_dict:
-            # This can happen when we're just evaluating the playlist
-            title = info_dict['title']
-            matchtitle = self.params.get('matchtitle', False)
-            if matchtitle:
-                if not re.search(matchtitle, title, re.IGNORECASE):
-                    return '"' + title + '" title did not match pattern "' + matchtitle + '"'
-            rejecttitle = self.params.get('rejecttitle', False)
-            if rejecttitle:
-                if re.search(rejecttitle, title, re.IGNORECASE):
-                    return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
-        date = info_dict.get('upload_date')
-        if date is not None:
-            dateRange = self.params.get('daterange', DateRange())
-            if date not in dateRange:
-                return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
-        view_count = info_dict.get('view_count')
-        if view_count is not None:
-            min_views = self.params.get('min_views')
-            if min_views is not None and view_count < min_views:
-                return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
-            max_views = self.params.get('max_views')
-            if max_views is not None and view_count > max_views:
-                return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
-        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
-            return 'Skipping "%s" because it is age restricted' % video_title
-        if self.in_download_archive(info_dict):
-            return '%s has already been recorded in archive' % video_title
-
-        if not incomplete:
-            match_filter = self.params.get('match_filter')
-            if match_filter is not None:
-                ret = match_filter(info_dict)
-                if ret is not None:
-                    return ret
-
-        return None
+        def check_filter():
+            video_title = info_dict.get('title', info_dict.get('id', 'video'))
+            if 'title' in info_dict:
+                # This can happen when we're just evaluating the playlist
+                title = info_dict['title']
+                matchtitle = self.params.get('matchtitle', False)
+                if matchtitle:
+                    if not re.search(matchtitle, title, re.IGNORECASE):
+                        return '"' + title + '" title did not match pattern "' + matchtitle + '"'
+                rejecttitle = self.params.get('rejecttitle', False)
+                if rejecttitle:
+                    if re.search(rejecttitle, title, re.IGNORECASE):
+                        return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+            date = info_dict.get('upload_date')
+            if date is not None:
+                dateRange = self.params.get('daterange', DateRange())
+                if date not in dateRange:
+                    return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+            view_count = info_dict.get('view_count')
+            if view_count is not None:
+                min_views = self.params.get('min_views')
+                if min_views is not None and view_count < min_views:
+                    return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+                max_views = self.params.get('max_views')
+                if max_views is not None and view_count > max_views:
+                    return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
+            if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+                return 'Skipping "%s" because it is age restricted' % video_title
+            if self.in_download_archive(info_dict):
+                return '%s has already been recorded in archive' % video_title
+
+            if not incomplete:
+                match_filter = self.params.get('match_filter')
+                if match_filter is not None:
+                    ret = match_filter(info_dict)
+                    if ret is not None:
+                        return ret
+            return None
+
+        reason = check_filter()
+        if reason is not None:
+            self.to_screen('[download] ' + reason)
+            if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
+                raise ExistingVideoReached()
+            elif self.params.get('break_on_reject'):
+                raise RejectedVideoReached()
+        return reason
  
      @staticmethod
      def add_extra_info(info_dict, extra_info):
@@ -895,7 +907,7 @@ def wrapper(self, *args, **kwargs):
                  self.report_error(msg)
              except ExtractorError as e:  # An error we somewhat expected
                  self.report_error(compat_str(e), e.format_traceback())
-            except MaxDownloadsReached:
+            except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
                  raise
              except Exception as e:
                  if self.params.get('ignoreerrors', False):
@@ -1098,14 +1110,7 @@ def report_download(num_entries):
                      'extractor_key': ie_result['extractor_key'],
                  }
  
-                reason = self._match_entry(entry, incomplete=True)
-                if reason is not None:
-                    if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
-                        print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
-                        break
-                    else:
-                        self.to_screen('[download] ' + reason)
-                        continue
+                self._match_entry(entry, incomplete=True)
  
                  entry_result = self.__process_iterable_entry(entry, download, extra)
                  # TODO: skip failed (empty) entries?
@@ -1870,9 +1875,7 @@ def process_info(self, info_dict):
          if 'format' not in info_dict:
              info_dict['format'] = info_dict['ext']
  
-        reason = self._match_entry(info_dict, incomplete=False)
-        if reason is not None:
-            self.to_screen('[download] ' + reason)
+        if self._match_entry(info_dict, incomplete=False) is not None:
              return
  
          self._num_downloads += 1
@@ -2260,7 +2263,13 @@ def download(self, url_list):
              except UnavailableVideoError:
                  self.report_error('unable to download video')
              except MaxDownloadsReached:
-                self.to_screen('[info] Maximum number of downloaded files reached.')
+                self.to_screen('[info] Maximum number of downloaded files reached')
+                raise
+            except ExistingVideoReached:
+                self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
+                raise
+            except RejectedVideoReached:
+                self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
                  raise
              else:
                  if self.params.get('dump_single_json', False):
diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py

index 9c32d98b954873c14d97496383526c8b9511e34a..1ba240c0df7285a2ea1b768a9b173d7705c81e7e 100644 (file)
--- a/youtube_dlc/__init__.py
+++ b/youtube_dlc/__init__.py
@@ -26,11 +26,13 @@
      decodeOption,
      DEFAULT_OUTTMPL,
      DownloadError,
+    ExistingVideoReached,
      expand_path,
      match_filter_func,
      MaxDownloadsReached,
      preferredencoding,
      read_batch_urls,
+    RejectedVideoReached,
      SameFileError,
      setproctitle,
      std_headers,
@@ -449,6 +451,7 @@ def parse_retries(retries):
          'age_limit': opts.age_limit,
          'download_archive': download_archive_fn,
          'break_on_existing': opts.break_on_existing,
+        'break_on_reject': opts.break_on_reject,
          'cookiefile': opts.cookiefile,
          'nocheckcertificate': opts.no_check_certificate,
          'prefer_insecure': opts.prefer_insecure,
@@ -519,8 +522,8 @@ def parse_retries(retries):
                  retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
              else:
                  retcode = ydl.download(all_urls)
-        except MaxDownloadsReached:
-            ydl.to_screen('--max-download limit reached, aborting.')
+        except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
+            ydl.to_screen('Aborting remaining downloads')
              retcode = 101
  
      sys.exit(retcode)
diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py

index 17429050767830c38d504f532ff186f282cb2194..c94e3abb429cd6bcbd16c5b3d0da84daa2eb7139 100644 (file)
--- a/youtube_dlc/options.py
+++ b/youtube_dlc/options.py
@@ -367,7 +367,11 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser):
      selection.add_option(
          '--break-on-existing',
          action='store_true', dest='break_on_existing', default=False,
-        help="Stop the download process after attempting to download a file that's in the archive.")
+        help="Stop the download process when encountering a file that's in the archive.")
+    selection.add_option(
+        '--break-on-reject',
+        action='store_true', dest='break_on_reject', default=False,
+        help="Stop the download process when encountering a file that has been filtered out.")
      selection.add_option(
          '--no-download-archive',
          dest='download_archive', action="store_const", const=None,
diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py

index c99b94423505e41873cd466950a08b671fabd4f8..cf9d8258a4c10ab0cb71cdc38ec03abac5644898 100644 (file)
--- a/youtube_dlc/utils.py
+++ b/youtube_dlc/utils.py
@@ -2433,6 +2433,16 @@ def __init__(self, msg):
          self.msg = msg
  
  
+class ExistingVideoReached(YoutubeDLError):
+    """ --max-downloads limit has been reached. """
+    pass
+
+
+class RejectedVideoReached(YoutubeDLError):
+    """ --max-downloads limit has been reached. """
+    pass
+
+
  class MaxDownloadsReached(YoutubeDLError):
      """ --max-downloads limit has been reached. """
      pass
author	pukkandan <redacted>
	Wed, 13 Jan 2021 01:01:01 +0000 (06:31 +0530)
committer	pukkandan <redacted>
	Wed, 13 Jan 2021 01:14:35 +0000 (06:44 +0530)
README.md		patch \| blob \| blame \| history
youtube_dlc/YoutubeDL.py		patch \| blob \| blame \| history
youtube_dlc/__init__.py		patch \| blob \| blame \| history
youtube_dlc/options.py		patch \| blob \| blame \| history
youtube_dlc/utils.py		patch \| blob \| blame \| history