COUNT views
--match-filter FILTER Generic video filter. Any field (see
"OUTPUT TEMPLATE") can be compared with a
- number or a quoted string using the
- operators defined in "Filtering formats".
- You can also simply specify a field to
- match if the field is present and "!field"
- to check if the field is not present.
- Multiple filters can be checked using "&".
- For example, to only match videos that are
- not live, has a like count more than 100, a
- dislike count less than 50 (or the dislike
+ number or a string using the operators
+ defined in "Filtering formats". You can
+ also simply specify a field to match if the
+ field is present and "!field" to check if
+ the field is not present. In addition,
+ Python style regular expression matching
+ can be done using "~=", and multiple
+ filters can be checked with "&". Use a "\"
+ to escape "&" or quotes if needed. Eg:
+ --match-filter "!is_live & like_count>?100
+ & description~=\'(?i)\bcats \& dogs\b\'"
+ matches only videos that are not live, has
+ a like count more than 100 (or the like
field is not available), and also has a
- description that contains "python", use
- --match-filter "!is_live & like_count>100 &
- dislike_count<?50 & description*='python'"
+ description that contains the phrase "cats
+ & dogs" (ignoring case)
--no-match-filter Do not use generic video filter (default)
--no-playlist Download only the video, if the URL refers
to a video and a playlist
'9999 51')
def test_match_str(self):
+ # Unary
self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200}))
self.assertFalse(match_str('!x', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 0}))
+ self.assertTrue(match_str('is_live', {'is_live': True}))
+ self.assertFalse(match_str('is_live', {'is_live': False}))
+ self.assertFalse(match_str('is_live', {'is_live': None}))
+ self.assertFalse(match_str('is_live', {}))
+ self.assertFalse(match_str('!is_live', {'is_live': True}))
+ self.assertTrue(match_str('!is_live', {'is_live': False}))
+ self.assertTrue(match_str('!is_live', {'is_live': None}))
+ self.assertTrue(match_str('!is_live', {}))
+ self.assertTrue(match_str('title', {'title': 'abc'}))
+ self.assertTrue(match_str('title', {'title': ''}))
+ self.assertFalse(match_str('!title', {'title': 'abc'}))
+ self.assertFalse(match_str('!title', {'title': ''}))
+
+ # Numeric
self.assertFalse(match_str('x>0', {'x': 0}))
self.assertFalse(match_str('x>0', {}))
self.assertTrue(match_str('x>?0', {}))
self.assertFalse(match_str('x>2K', {'x': 1200}))
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+
+ # String
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
+
+ # And
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
- self.assertTrue(match_str('is_live', {'is_live': True}))
- self.assertFalse(match_str('is_live', {'is_live': False}))
- self.assertFalse(match_str('is_live', {'is_live': None}))
- self.assertFalse(match_str('is_live', {}))
- self.assertFalse(match_str('!is_live', {'is_live': True}))
- self.assertTrue(match_str('!is_live', {'is_live': False}))
- self.assertTrue(match_str('!is_live', {'is_live': None}))
- self.assertTrue(match_str('!is_live', {}))
- self.assertTrue(match_str('title', {'title': 'abc'}))
- self.assertTrue(match_str('title', {'title': ''}))
- self.assertFalse(match_str('!title', {'title': 'abc'}))
- self.assertFalse(match_str('!title', {'title': ''}))
+
+ # Regex
+ self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'}))
+ self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'}))
+ self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'}))
+ self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'}))
+
+ # Quotes
+ self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'}))
+ self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'}))
+ self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'}))
+ self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'}))
+
+ # Escaping &
+ self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
+
+ # Example from docs
+ self.assertTrue(
+ r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'',
+ {'description': 'Raining Cats & Dogs'})
def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), None)
'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
'number or a string using the operators defined in "Filtering formats". '
'You can also simply specify a field to match if the field is present '
- 'and "!field" to check if the field is not present. '
- 'Multiple filters can be checked using "&". '
- 'For example, to only match videos that are not live, '
- 'has a like count more than 100, a dislike count less than 50 '
- '(or the dislike field is not available), and also has a description '
- 'that contains "python", use --match-filter "!is_live & '
- 'like_count>100 & dislike_count<?50 & description*=\'python\'"'))
+ 'and "!field" to check if the field is not present. In addition, '
+ 'Python style regular expression matching can be done using "~=", '
+ 'and multiple filters can be checked with "&". '
+ 'Use a "\\" to escape "&" or quotes if needed. Eg: --match-filter '
+ r'"!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'" '
+ 'matches only videos that are not live, has a like count more than 100 '
+ '(or the like field is not available), and also has a description '
+ 'that contains the phrase "cats & dogs" (ignoring case)'))
selection.add_option(
'--no-match-filter',
metavar='FILTER', dest='match_filter', action='store_const', const=None,
def _match_one(filter_part, dct):
# TODO: Generalize code with YoutubeDL._build_format_filter
+ STRING_OPERATORS = {
+ '*=': operator.contains,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
+ '~=': lambda attr, value: re.search(value, attr),
+ }
COMPARISON_OPERATORS = {
+ **STRING_OPERATORS,
+ '<=': operator.le, # "<=" must be defined above "<"
'<': operator.lt,
- '<=': operator.le,
- '>': operator.gt,
'>=': operator.ge,
+ '>': operator.gt,
'=': operator.eq,
- '*=': operator.contains,
- '^=': lambda attr, value: attr.startswith(value),
- '$=': lambda attr, value: attr.endswith(value),
}
+
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
- (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
- (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+ (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
+ (?P<strval>.+?)
)
\s*$
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
if quote is not None:
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
else:
- if m.group('op') in ('*=', '^=', '$='):
- raise ValueError(
- 'Operator %s only supports string values!' % m.group('op'))
+ if m.group('op') in STRING_OPERATORS:
+ raise ValueError('Operator %s only supports string values!' % m.group('op'))
try:
comparison_value = int(m.group('intval'))
except ValueError:
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
return all(
- _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+ _match_one(filter_part.replace(r'\&', '&'), dct)
+ for filter_part in re.split(r'(?<!\\)&', filter_str))
def match_filter_func(filter_str):