mirror of
https://github.com/python/cpython
synced 2024-10-14 17:59:38 +00:00
bpo-47080: Use atomic groups to simplify fnmatch (GH-32029)
Use re's new atomic groups to greatly simplify the construction of worst-case linear-time patterns.
This commit is contained in:
parent
345b390ed6
commit
5c3201e146
|
@ -16,12 +16,6 @@
|
|||
|
||||
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
|
||||
|
||||
# Build a thread-safe incrementing counter to help create unique regexp group
|
||||
# names across calls.
|
||||
from itertools import count
|
||||
_nextgroupnum = count().__next__
|
||||
del count
|
||||
|
||||
def fnmatch(name, pat):
|
||||
"""Test whether FILENAME matches PATTERN.
|
||||
|
||||
|
@ -149,17 +143,10 @@ def translate(pat):
|
|||
# Now deal with STAR fixed STAR fixed ...
|
||||
# For an interior `STAR fixed` pairing, we want to do a minimal
|
||||
# .*? match followed by `fixed`, with no possibility of backtracking.
|
||||
# We can't spell that directly, but can trick it into working by matching
|
||||
# .*?fixed
|
||||
# in a lookahead assertion, save the matched part in a group, then
|
||||
# consume that group via a backreference. If the overall match fails,
|
||||
# the lookahead assertion won't try alternatives. So the translation is:
|
||||
# (?=(?P<name>.*?fixed))(?P=name)
|
||||
# Group names are created as needed: g0, g1, g2, ...
|
||||
# The numbers are obtained from _nextgroupnum() to ensure they're unique
|
||||
# across calls and across threads. This is because people rely on the
|
||||
# undocumented ability to join multiple translate() results together via
|
||||
# "|" to build large regexps matching "one of many" shell patterns.
|
||||
# Atomic groups ("(?>...)") allow us to spell that directly.
|
||||
# Note: people rely on the undocumented ability to join multiple
|
||||
# translate() results together via "|" to build large regexps matching
|
||||
# "one of many" shell patterns.
|
||||
while i < n:
|
||||
assert inp[i] is STAR
|
||||
i += 1
|
||||
|
@ -176,8 +163,7 @@ def translate(pat):
|
|||
add(".*")
|
||||
add(fixed)
|
||||
else:
|
||||
groupnum = _nextgroupnum()
|
||||
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
|
||||
add(f"(?>.*?{fixed})")
|
||||
assert i == n
|
||||
res = "".join(res)
|
||||
return fr'(?s:{res})\Z'
|
||||
|
|
|
@ -124,17 +124,9 @@ def test_translate(self):
|
|||
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
|
||||
# fancy translation to prevent exponential-time match failure
|
||||
t = translate('**a*a****a')
|
||||
digits = re.findall(r'\d+', t)
|
||||
self.assertEqual(len(digits), 4)
|
||||
self.assertEqual(digits[0], digits[1])
|
||||
self.assertEqual(digits[2], digits[3])
|
||||
g1 = f"g{digits[0]}" # e.g., group name "g4"
|
||||
g2 = f"g{digits[2]}" # e.g., group name "g5"
|
||||
self.assertEqual(t,
|
||||
fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
|
||||
self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z')
|
||||
# and try pasting multiple translate results - it's an undocumented
|
||||
# feature that this works; all the pain of generating unique group
|
||||
# names across calls exists to support this
|
||||
# feature that this works
|
||||
r1 = translate('**a**a**a*')
|
||||
r2 = translate('**b**b**b*')
|
||||
r3 = translate('*c*c*c*')
|
||||
|
|
Loading…
Reference in a new issue