From aa8e4a02904b3a1c4b3064eb7502d887f7de958b Mon Sep 17 00:00:00 2001
From: Peter Stephenson
Date: Tue, 1 Aug 2023 14:32:55 +0100
Subject: [PATCH] 52008: Pattern bug with branches + exclusion
Add tests.
---
ChangeLog | 5 +++++
Src/pattern.c | 22 ++++++++++++++++++++--
Test/D02glob.ztst | 26 ++++++++++++++++++++++++++
3 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 372092a32..8e6e3fb18 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2023-08-01 Peter Stephenson
+
+ * 52008: Src/pattern.c, Test/D02glob.ztst: Fix bug with branches
+ in patterns followed by an exculsion, and add tests.
+
2023-07-31 dana
* github #100: HexorCatZ: Completion/Unix/Command/_qemu:
diff --git a/Src/pattern.c b/Src/pattern.c
index 3edda1772..2a1a514fb 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2987,14 +2987,15 @@ patmatch(Upat prog)
case P_EXCSYNC:
/* See the P_EXCLUDE code below for where syncptr comes from */
{
- unsigned char *syncptr;
+ unsigned char *syncstart, *syncptr, *ptr;
Upat after;
after = P_OPERAND(scan);
DPUTS(!P_ISEXCLUDE(after),
"BUG: EXCSYNC not followed by EXCLUDE.");
DPUTS(!P_OPERAND(after)->p,
"BUG: EXCSYNC not handled by EXCLUDE");
- syncptr = P_OPERAND(after)->p + (patinput - patinstart);
+ syncstart = P_OPERAND(after)->p;
+ syncptr = syncstart + (patinput - patinstart);
/*
* If we already matched from here, this time we fail.
* See WBRANCH code for story about error count.
@@ -3009,6 +3010,23 @@ patmatch(Upat prog)
* failed anyway.
*/
*syncptr = errsfound + 1;
+ /*
+ * Because of backtracking, any match before this point
+ * can't apply to the current branch we're on so is now
+ * a failure --- this can happen if, on a previous
+ * branch, we initially marked a success before failing
+ * on a later part of the pattern after marking up the
+ * P_EXCSYNC (even an end anchor will have this effect).
+ * To make sure we record the current match point
+ * correctly, mark those down now.
+ *
+ * This might have side effects on the efficiency of
+ * pathological cases involving nested branches. To
+ * fix that we'd probably need to record matches on
+ * different branches separately.
+ */
+ for (ptr = syncstart; ptr < syncptr; ++ptr)
+ *ptr = 0;
}
break;
case P_EXCEND:
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index 850a535e5..4d88e5c27 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -817,6 +817,32 @@
*>*/glob.tmp/(flip|flop)
*>*/glob.tmp/(flip|flop)/trailing/components
+# The following set test an obscure problem with branches followed by
+# exclusions that shows up when the exclusion matches against
+# something other than the complete test string, hence the complicated
+# double negative.
+ [[ ab = (|a*)~^(*b) ]]
+0:Regression test for exclusion after branches: empty first alternative
+
+ [[ ab = (b|a*)~^(*b) ]]
+0:Regression test for exclusion after branches: non-empty first alternative
+
+ [[ ab = (b*|a*)~^(*b) ]]
+0:Regression test for exclusion after branches: full length first alternative
+
+# Corresponding tests where the exclusion should succeed, so the
+# match fails. It's hard to know how to provoke bugs here...
+ [[ abc = (|a*)~^(*b) ]]
+1:Regression test for exclusion after branches: failure case 1
+
+ [[ abc = (b|a*)~^(*b) ]]
+1:Regression test for exclusion after branches: failure case 2
+
+ [[ abc = (b*|a*)~^(*b) ]]
+1:Regression test for exclusion after branches: failure case 3
+
+# Careful: extendedglob off from this point.
+
unsetopt extendedglob
print -r -- ${(*)=${(@s.+.):-A+B}/(#b)(?)/-${(L)match[1]} ${match[1]}}
0:the '*' qualfier enables extended_glob for pattern matching