grep: fix null pattern and empty pattern file behavior

The null pattern semantics were terrible because I tried to match gnugrep,
but I got it wrong.  Let's unwind that:

- The null pattern should match every line if neither -w nor -x.
- The null pattern should match empty lines if -x.
- The null pattern should not match any lines if -w.

The first two will stop processing (shortcut) even if additional patterns
are specified. In any other case, we will continue processing other
patterns.  If no other patterns are specified beside a null pattern, then
we match if neither -w nor -x or set and do not match if either of those
are specified.

The justification for -w is that it should match on a whole word, but the
null pattern deos not have a whole word to match on.

Empty pattern files should never match anything, and more importantly, -v
should cause everything to be written.

PR:		253209
MFC-after:	4 days
This commit is contained in:
Kyle Evans 2021-02-04 15:26:45 -06:00
parent 856789c123
commit f823c6dc73
3 changed files with 35 additions and 33 deletions

View file

@ -489,11 +489,11 @@ wflag_emptypat_body()
atf_check -s exit:1 -o empty grep -w -e "" test1
atf_check -o file:test2 grep -w -e "" test2
atf_check -o file:test2 grep -vw -e "" test2
atf_check -s exit:1 -o empty grep -w -e "" test3
atf_check -o file:test4 grep -w -e "" test4
atf_check -o file:test4 grep -vw -e "" test4
}
atf_test_case xflag_emptypat
@ -504,7 +504,6 @@ xflag_emptypat_body()
printf "qaz" > test3
printf " qaz\n" > test4
# -x is whole-line, more strict than -w.
atf_check -s exit:1 -o empty grep -x -e "" test1
atf_check -o file:test2 grep -x -e "" test2
@ -550,6 +549,22 @@ xflag_emptypat_plus_body()
atf_check -o file:spacelines grep -Fxvf patlist1 target_spacelines
}
atf_test_case emptyfile
emptyfile_descr()
{
atf_set "descr" "Check for proper handling of empty pattern files (PR 253209)"
}
emptyfile_body()
{
:> epatfile
echo "blubb" > subj
# From PR 253209, bsdgrep was short-circuiting completely on an empty
# file, but we should have still been processing lines.
atf_check -s exit:1 -o empty fgrep -f epatfile subj
atf_check -o file:subj fgrep -vf epatfile subj
}
atf_test_case excessive_matches
excessive_matches_head()
{
@ -946,6 +961,7 @@ atf_init_test_cases()
atf_add_test_case wflag_emptypat
atf_add_test_case xflag_emptypat
atf_add_test_case xflag_emptypat_plus
atf_add_test_case emptyfile
atf_add_test_case excessive_matches
atf_add_test_case wv_combo_break
atf_add_test_case fgrep_sanity

View file

@ -69,13 +69,6 @@ const char *errstr[] = {
int cflags = REG_NOSUB | REG_NEWLINE;
int eflags = REG_STARTEND;
/* XXX TODO: Get rid of this flag.
* matchall is a gross hack that means that an empty pattern was passed to us.
* It is a necessary evil at the moment because our regex(3) implementation
* does not allow for empty patterns, as supported by POSIX's definition of
* grammar for BREs/EREs. When libregex becomes available, it would be wise
* to remove this and let regex(3) handle the dirty details of empty patterns.
*/
bool matchall;
/* Searching patterns */
@ -637,10 +630,6 @@ main(int argc, char *argv[])
aargc -= optind;
aargv += optind;
/* Empty pattern file matches nothing */
if (!needpattern && (patterns == 0) && !matchall)
exit(1);
/* Fail if we don't have any pattern */
if (aargc == 0 && needpattern)
usage();

View file

@ -471,31 +471,28 @@ procline(struct parsec *pc)
matchidx = pc->matchidx;
/*
* With matchall (empty pattern), we can try to take some shortcuts.
* Emtpy patterns trivially match every line except in the -w and -x
* cases. For -w (whole-word) cases, we only match if the first
* character isn't a word-character. For -x (whole-line) cases, we only
* match if the line is empty.
*/
/* Null pattern shortcuts. */
if (matchall) {
if (pc->ln.len == 0)
if (xflag && pc->ln.len == 0) {
/* Matches empty lines (-x). */
return (true);
if (wflag) {
wend = L' ';
if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 &&
!iswword(wend))
return (true);
} else if (!xflag)
} else if (!wflag && !xflag) {
/* Matches every line (no -w or -x). */
return (true);
}
/*
* If we don't have any other patterns, we really don't match.
* If we do have other patterns, we must fall through and check
* them.
* If we only have the NULL pattern, whether we match or not
* depends on if we got here with -w or -x. If either is set,
* the answer is no. If we have other patterns, we'll defer
* to them.
*/
if (patterns == 0)
return (false);
if (patterns == 0) {
return (!(wflag || xflag));
}
} else if (patterns == 0) {
/* Pattern file with no patterns. */
return (false);
}
matched = false;