mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-08-11 03:09:35 +00:00
Add regex to --match-filter
This does not fully deprecate `--match-title`/`--reject-title` since `--match-filter` is only checked after the extraction is complete, while `--match-title` can often be checked from the flat playlist. Fixes: https://github.com/ytdl-org/youtube-dl/issues/9092, https://github.com/ytdl-org/youtube-dl/issues/23035
This commit is contained in:
@@ -378,13 +378,14 @@ def parseOpts(overrideArguments=None):
|
||||
'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
|
||||
'number or a string using the operators defined in "Filtering formats". '
|
||||
'You can also simply specify a field to match if the field is present '
|
||||
'and "!field" to check if the field is not present. '
|
||||
'Multiple filters can be checked using "&". '
|
||||
'For example, to only match videos that are not live, '
|
||||
'has a like count more than 100, a dislike count less than 50 '
|
||||
'(or the dislike field is not available), and also has a description '
|
||||
'that contains "python", use --match-filter "!is_live & '
|
||||
'like_count>100 & dislike_count<?50 & description*=\'python\'"'))
|
||||
'and "!field" to check if the field is not present. In addition, '
|
||||
'Python style regular expression matching can be done using "~=", '
|
||||
'and multiple filters can be checked with "&". '
|
||||
'Use a "\\" to escape "&" or quotes if needed. Eg: --match-filter '
|
||||
r'"!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'" '
|
||||
'matches only videos that are not live, has a like count more than 100 '
|
||||
'(or the like field is not available), and also has a description '
|
||||
'that contains the phrase "cats & dogs" (ignoring case)'))
|
||||
selection.add_option(
|
||||
'--no-match-filter',
|
||||
metavar='FILTER', dest='match_filter', action='store_const', const=None,
|
||||
|
@@ -4664,23 +4664,28 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
|
||||
|
||||
def _match_one(filter_part, dct):
|
||||
# TODO: Generalize code with YoutubeDL._build_format_filter
|
||||
COMPARISON_OPERATORS = {
|
||||
'<': operator.lt,
|
||||
'<=': operator.le,
|
||||
'>': operator.gt,
|
||||
'>=': operator.ge,
|
||||
'=': operator.eq,
|
||||
STRING_OPERATORS = {
|
||||
'*=': operator.contains,
|
||||
'^=': lambda attr, value: attr.startswith(value),
|
||||
'$=': lambda attr, value: attr.endswith(value),
|
||||
'~=': lambda attr, value: re.search(value, attr),
|
||||
}
|
||||
COMPARISON_OPERATORS = {
|
||||
**STRING_OPERATORS,
|
||||
'<=': operator.le, # "<=" must be defined above "<"
|
||||
'<': operator.lt,
|
||||
'>=': operator.ge,
|
||||
'>': operator.gt,
|
||||
'=': operator.eq,
|
||||
}
|
||||
|
||||
operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>[a-z_]+)
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?:
|
||||
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
|
||||
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
|
||||
(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
|
||||
(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
|
||||
(?P<strval>.+?)
|
||||
)
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
|
||||
@@ -4705,9 +4710,8 @@ def _match_one(filter_part, dct):
|
||||
if quote is not None:
|
||||
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
|
||||
else:
|
||||
if m.group('op') in ('*=', '^=', '$='):
|
||||
raise ValueError(
|
||||
'Operator %s only supports string values!' % m.group('op'))
|
||||
if m.group('op') in STRING_OPERATORS:
|
||||
raise ValueError('Operator %s only supports string values!' % m.group('op'))
|
||||
try:
|
||||
comparison_value = int(m.group('intval'))
|
||||
except ValueError:
|
||||
@@ -4743,7 +4747,8 @@ def match_str(filter_str, dct):
|
||||
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
|
||||
|
||||
return all(
|
||||
_match_one(filter_part, dct) for filter_part in filter_str.split('&'))
|
||||
_match_one(filter_part.replace(r'\&', '&'), dct)
|
||||
for filter_part in re.split(r'(?<!\\)&', filter_str))
|
||||
|
||||
|
||||
def match_filter_func(filter_str):
|
||||
|
Reference in New Issue
Block a user