Let `--match-filter` reject entries early

author pukkandan <redacted>

Sun, 15 Aug 2021 08:12:23 +0000 (13:42 +0530)

committer pukkandan <redacted>

Mon, 16 Aug 2021 22:59:56 +0000 (04:29 +0530)
author pukkandan <redacted>
Sun, 15 Aug 2021 08:12:23 +0000 (13:42 +0530)
committer pukkandan <redacted>
Mon, 16 Aug 2021 22:59:56 +0000 (04:29 +0530)
diff --git a/README.md b/README.md

index 7877a4a274f95c67942f33bfba76881cd5509fa8..bd7d31c13f391f71d0694d93c72efd40cbc1b32d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1439,6 +1439,10 @@ #### Redundant options
      -e, --get-title                  --print title
      -g, --get-url                    --print urls
      -j, --dump-json                  --print "%()j"
+    --match-title REGEX              --match-filter "title ~= (?i)REGEX"
+    --reject-title REGEX             --match-filter "title !~= (?i)REGEX"
+    --min-views COUNT                --match-filter "view_count >=? COUNT"
+    --max-views COUNT                --match-filter "view_count <=? COUNT"
  
  
  #### Not recommended
diff --git a/test/test_utils.py b/test/test_utils.py

index aef59e49197ff090744bec2d0618877ccb808942..dedc598f7b9f37b9c99a834f8ceb98ea689032fd 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1285,9 +1285,15 @@ def test_match_str(self):
          self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
  
          # Example from docs
-        self.assertTrue(
-            r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'',
-            {'description': 'Raining Cats & Dogs'})
+        self.assertTrue(match_str(
+            r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'",
+            {'description': 'Raining Cats & Dogs'}))
+
+        # Incomplete
+        self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True))
+        self.assertTrue(match_str('x', {'id': 'foo'}, True))
+        self.assertTrue(match_str('!x', {'id': 'foo'}, True))
+        self.assertFalse(match_str('x', {'id': 'foo'}, False))
  
      def test_parse_dfxp_time_expr(self):
          self.assertEqual(parse_dfxp_time_expr(None), None)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index d12131acd8c4abae40ee915fbdaf04b2b1af5262..eef3f8b4ca42b2021cc0aa288b9eae6066fe62e6 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1117,12 +1117,15 @@ def check_filter():
              if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
                  return 'Skipping "%s" because it is age restricted' % video_title
  
-            if not incomplete:
-                match_filter = self.params.get('match_filter')
-                if match_filter is not None:
-                    ret = match_filter(info_dict)
-                    if ret is not None:
-                        return ret
+            match_filter = self.params.get('match_filter')
+            if match_filter is not None:
+                try:
+                    ret = match_filter(info_dict, incomplete=incomplete)
+                except TypeError:
+                    # For backward compatibility
+                    ret = None if incomplete else match_filter(info_dict)
+                if ret is not None:
+                    return ret
              return None
  
          if self.in_download_archive(info_dict):
@@ -2873,13 +2876,13 @@ def download(self, url_list):
              except UnavailableVideoError:
                  self.report_error('unable to download video')
              except MaxDownloadsReached:
-                self.to_screen('[info] Maximum number of downloaded files reached')
+                self.to_screen('[info] Maximum number of downloads reached')
                  raise
              except ExistingVideoReached:
-                self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
+                self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
                  raise
              except RejectedVideoReached:
-                self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
+                self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
                  raise
              else:
                  if self.params.get('dump_single_json', False):
diff --git a/yt_dlp/options.py b/yt_dlp/options.py

index 1499991a11cd9a4c9139a9adeec69a62de620d91..ef821eb11a8d9e2eaf63c6ede9c42960d563c6e4 100644 (file)
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -356,11 +356,11 @@ def _dict_from_options_callback(
      selection.add_option(
          '--match-title',
          dest='matchtitle', metavar='REGEX',
-        help='Download only matching titles (regex or caseless sub-string)')
+        help=optparse.SUPPRESS_HELP)
      selection.add_option(
          '--reject-title',
          dest='rejecttitle', metavar='REGEX',
-        help='Skip download for matching titles (regex or caseless sub-string)')
+        help=optparse.SUPPRESS_HELP)
      selection.add_option(
          '--max-downloads',
          dest='max_downloads', metavar='NUMBER', type=int, default=None,
@@ -395,11 +395,11 @@ def _dict_from_options_callback(
      selection.add_option(
          '--min-views',
          metavar='COUNT', dest='min_views', default=None, type=int,
-        help='Do not download any videos with less than COUNT views')
+        help=optparse.SUPPRESS_HELP)
      selection.add_option(
          '--max-views',
          metavar='COUNT', dest='max_views', default=None, type=int,
-        help='Do not download any videos with more than COUNT views')
+        help=optparse.SUPPRESS_HELP)
      selection.add_option(
          '--match-filter',
          metavar='FILTER', dest='match_filter', default=None,
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py

index 0e8392fdffe7d6679e146648284679981dfae487..6276ac726be379f628e056b8e76b5d7b8523d0b8 100644 (file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -4657,7 +4657,7 @@ def filter_using_list(row, filterArray):
      return '\n'.join(format_str % tuple(row) for row in table)
  
  
-def _match_one(filter_part, dct):
+def _match_one(filter_part, dct, incomplete):
      # TODO: Generalize code with YoutubeDL._build_format_filter
      STRING_OPERATORS = {
          '*=': operator.contains,
@@ -4718,7 +4718,7 @@ def _match_one(filter_part, dct):
                          'Invalid integer value %r in filter part %r' % (
                              m.group('intval'), filter_part))
          if actual_value is None:
-            return m.group('none_inclusive')
+            return incomplete or m.group('none_inclusive')
          return op(actual_value, comparison_value)
  
      UNARY_OPERATORS = {
@@ -4733,22 +4733,25 @@ def _match_one(filter_part, dct):
      if m:
          op = UNARY_OPERATORS[m.group('op')]
          actual_value = dct.get(m.group('key'))
+        if incomplete and actual_value is None:
+            return True
          return op(actual_value)
  
      raise ValueError('Invalid filter part %r' % filter_part)
  
  
-def match_str(filter_str, dct):
-    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
-
+def match_str(filter_str, dct, incomplete=False):
+    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
+        When incomplete, all conditions passes on missing fields
+    """
      return all(
-        _match_one(filter_part.replace(r'\&', '&'), dct)
+        _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
          for filter_part in re.split(r'(?<!\\)&', filter_str))
  
  
  def match_filter_func(filter_str):
-    def _match_func(info_dict):
-        if match_str(filter_str, info_dict):
+    def _match_func(info_dict, *args, **kwargs):
+        if match_str(filter_str, info_dict, *args, **kwargs):
              return None
          else:
              video_title = info_dict.get('title', info_dict.get('id', 'video'))
author	pukkandan <redacted>
	Sun, 15 Aug 2021 08:12:23 +0000 (13:42 +0530)
committer	pukkandan <redacted>
	Mon, 16 Aug 2021 22:59:56 +0000 (04:29 +0530)
README.md		patch \| blob \| blame \| history
test/test_utils.py		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/options.py		patch \| blob \| blame \| history
yt_dlp/utils.py		patch \| blob \| blame \| history