ota: Import 9f971fba471b from bsd-feature for 20240623

Jun 23, 2024
	Fix signal for system-status test. Thanks to Tim van der Molen.
	Rewrite if-else chain as switch. Thanks to Andrew Sukach.

May 27, 2024
	Spelling fixes and removal of unneeded prototypes and extern.
	Thanks to Jonathan Gray.

May 4, 2024
	Fixed a use-after-free bug with ARGV for "delete ARGV".
	Also ENVtab is no longer global. Thanks to Benjamin Sturz
	for spotting the ARGV issue and	Todd Miller for the fix.

May 3, 2024:
	Remove warnings when compiling with g++. Thanks to Arnold Robbins.
This commit is contained in:
Warner Losh 2024-07-23 15:11:33 -06:00
parent 887b27736b
commit 381c116afc
19 changed files with 143 additions and 71 deletions

26
FIXES
View file

@ -25,15 +25,31 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the
second edition of the AWK book was published in September 2023.
Jun 23, 2024
Fix signal for system-status test. Thanks to Tim van der Molen.
Rewrite if-else chain as switch. Thanks to Andrew Sukach.
May 27, 2024
Spelling fixes and removal of unneeded prototypes and extern.
Thanks to Jonathan Gray.
May 4, 2024
Fixed a use-after-free bug with ARGV for "delete ARGV".
Also ENVtab is no longer global. Thanks to Benjamin Sturz
for spotting the ARGV issue and Todd Miller for the fix.
May 3, 2024:
Remove warnings when compiling with g++. Thanks to Arnold Robbins.
Apr 22, 2024:
fixed regex engine gototab reallocation issue that was
introduced during the Nov 24 rewrite. Thanks to Arnold Robbins.
Fixed regex engine gototab reallocation issue that was
Introduced during the Nov 24 rewrite. Thanks to Arnold Robbins.
Fixed a scan bug in split in the case the separator is a single
character. thanks to Oguz Ismail for spotting the issue.
character. Thanks to Oguz Ismail for spotting the issue.
Mar 10, 2024:
fixed use-after-free bug in fnematch due to adjbuf invalidating
the pointers to buf. thanks to github user caffe3 for spotting
Fixed use-after-free bug in fnematch due to adjbuf invalidating
the pointers to buf. Thanks to github user caffe3 for spotting
the issue and providing a fix, and to Miguel Pineiro Jr.
for the alternative fix.
MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max.

View file

@ -224,7 +224,7 @@ January 9, 2020:
mere warnings. Thanks to Martijn Dekker <martijn@inlv.org>.
January 5, 2020:
Fix a bug in the concatentation of two string constants into
Fix a bug in the concatenation of two string constants into
one done in the grammar. Fixes GitHub issue #61. Thanks
to GitHub user awkfan77 for pointing out the direction for
the fix. New test T.concat added to the test suite.
@ -866,7 +866,7 @@ Jan 13, 1999:
added a few (int) casts to silence useless compiler warnings.
e.g., errorflag= in run.c jump().
added proctab.c to the bundle outout; one less thing
added proctab.c to the bundle output; one less thing
to have to compile out of the box.
added calls to _popen and _pclose to the win95 stub for

View file

@ -16,7 +16,7 @@ this affects `length`, `substr`, `index`, `match`, `split`,
points are not necessarily characters.
UTF-8 sequences may appear in literal strings and regular expressions.
Aribtrary characters may be included with `\u` followed by 1 to 8 hexadecimal digits.
Arbitrary characters may be included with `\u` followed by 1 to 8 hexadecimal digits.
### Regular expressions ###

2
TODO
View file

@ -14,6 +14,6 @@ and see exactly which tests fail:
The beebe.tar file appears to be from sometime in the 1990s.
3. Make the One True Awk valgrind clean. In particular add a
a test suite target that runs valgrind on all the tests and
test suite target that runs valgrind on all the tests and
reports if there are any definite losses or any invalid reads
or writes (similar to gawk's test of this nature).

1
awk.h
View file

@ -176,7 +176,6 @@ typedef struct Node {
#define NIL ((Node *) 0)
extern Node *winner;
extern Node *nullstat;
extern Node *nullnode;
/* ctypes */

55
b.c
View file

@ -369,36 +369,49 @@ int quoted(const uschar **pp) /* pick up next thing after a \\ */
/* BUG: should advance by utf-8 char even if makes no sense */
if ((c = *p++) == 't') {
switch ((c = *p++)) {
case 't':
c = '\t';
} else if (c == 'n') {
break;
case 'n':
c = '\n';
} else if (c == 'f') {
break;
case 'f':
c = '\f';
} else if (c == 'r') {
break;
case 'r':
c = '\r';
} else if (c == 'b') {
break;
case 'b':
c = '\b';
} else if (c == 'v') {
break;
case 'v':
c = '\v';
} else if (c == 'a') {
break;
case 'a':
c = '\a';
} else if (c == '\\') {
break;
case '\\':
c = '\\';
} else if (c == 'x') { /* 2 hex digits follow */
c = hexstr(&p, 2); /* this adds a null if number is invalid */
} else if (c == 'u') { /* unicode char number up to 8 hex digits */
break;
case 'x': /* 2 hex digits follow */
c = hexstr(&p, 2); /* this adds a null if number is invalid */
break;
case 'u': /* unicode char number up to 8 hex digits */
c = hexstr(&p, 8);
} else if (isoctdigit(c)) { /* \d \dd \ddd */
int n = c - '0';
if (isoctdigit(*p)) {
n = 8 * n + *p++ - '0';
if (isoctdigit(*p))
break;
default:
if (isoctdigit(c)) { /* \d \dd \ddd */
int n = c - '0';
if (isoctdigit(*p)) {
n = 8 * n + *p++ - '0';
if (isoctdigit(*p))
n = 8 * n + *p++ - '0';
}
c = n;
}
c = n;
} /* else */
/* c = c; */
}
*pp = p;
return c;
}
@ -645,7 +658,7 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem
f->gototab[state].entries[0].state = val;
f->gototab[state].inuse++;
return val;
} else if (ch > f->gototab[state].entries[f->gototab[state].inuse-1].ch) {
} else if ((unsigned)ch > f->gototab[state].entries[f->gototab[state].inuse-1].ch) {
// not seen yet, insert and return
gtt *tab = & f->gototab[state];
if (tab->inuse + 1 >= tab->allocated)
@ -869,7 +882,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
* Call u8_rune with at least awk_mb_cur_max ahead in
* the buffer until EOF interferes.
*/
if (k - j < awk_mb_cur_max) {
if (k - j < (int)awk_mb_cur_max) {
if (k + awk_mb_cur_max > buf + bufsize) {
char *obuf = buf;
adjbuf((char **) &buf, &bufsize,

View file

@ -10,7 +10,7 @@ BEGIN {
# Change OFS after (conceptually) rebuilding the record
OFS = "<>"
# Unmodifed nawk prints "a<>b<>3333<>d<>e<>f<>g" because
# Unmodified nawk prints "a<>b<>3333<>d<>e<>f<>g" because
# it delays rebuilding $0 until it's needed, and then it uses
# the current value of OFS. Oops.
print

View file

@ -9,7 +9,7 @@ BEGIN {
status = system("exit 42")
print "normal status", status
status = system("kill -HUP $$")
status = system("kill -KILL $$")
print "death by signal status", status
status = system("kill -ABRT $$")

View file

@ -1,3 +1,3 @@
normal status 42
death by signal status 257
death by signal status 265
death by signal with core dump status 518

View file

@ -1,3 +1,3 @@
normal status 42
death by signal status 257
death by signal status 265
death by signal with core dump status 262

2
lex.c
View file

@ -225,7 +225,7 @@ int yylex(void)
;
unput(c);
/*
* Next line is a hack, itcompensates for
* Next line is a hack, it compensates for
* unput's treatment of \n.
*/
lineno++;

8
lib.c
View file

@ -335,14 +335,16 @@ int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can h
char *getargv(int n) /* get ARGV[n] */
{
Array *ap;
Cell *x;
char *s, temp[50];
extern Array *ARGVtab;
extern Cell *ARGVcell;
ap = (Array *)ARGVcell->sval;
snprintf(temp, sizeof(temp), "%d", n);
if (lookup(temp, ARGVtab) == NULL)
if (lookup(temp, ap) == NULL)
return NULL;
x = setsymtab(temp, "", 0.0, STR, ARGVtab);
x = setsymtab(temp, "", 0.0, STR, ap);
s = getsval(x);
DPRINTF("getargv(%d) returns |%s|\n", n, s);
return s;

48
main.c
View file

@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
const char *version = "version 20240422";
const char *version = "version 20240623";
#define DEBUG
#include <stdio.h>
@ -62,22 +62,42 @@ static noreturn void fpecatch(int n
)
{
#ifdef SA_SIGINFO
static const char *emsg[] = {
[0] = "Unknown error",
[FPE_INTDIV] = "Integer divide by zero",
[FPE_INTOVF] = "Integer overflow",
[FPE_FLTDIV] = "Floating point divide by zero",
[FPE_FLTOVF] = "Floating point overflow",
[FPE_FLTUND] = "Floating point underflow",
[FPE_FLTRES] = "Floating point inexact result",
[FPE_FLTINV] = "Invalid Floating point operation",
[FPE_FLTSUB] = "Subscript out of range",
};
const char *mesg = NULL;
switch (si->si_code) {
case FPE_INTDIV:
mesg = "Integer divide by zero";
break;
case FPE_INTOVF:
mesg = "Integer overflow";
break;
case FPE_FLTDIV:
mesg = "Floating point divide by zero";
break;
case FPE_FLTOVF:
mesg = "Floating point overflow";
break;
case FPE_FLTUND:
mesg = "Floating point underflow";
break;
case FPE_FLTRES:
mesg = "Floating point inexact result";
break;
case FPE_FLTINV:
mesg = "Invalid Floating point operation";
break;
case FPE_FLTSUB:
mesg = "Subscript out of range";
break;
case 0:
default:
mesg = "Unknown error";
break;
}
#endif
FATAL("floating point exception"
#ifdef SA_SIGINFO
": %s", (size_t)si->si_code < sizeof(emsg) / sizeof(emsg[0]) &&
emsg[si->si_code] ? emsg[si->si_code] : emsg[0]
": %s", mesg
#endif
);
}

View file

@ -32,6 +32,7 @@ CFLAGS = -O2
#CC = cc -O4 -Wall -pedantic -fno-strict-aliasing
#CC = cc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov
HOSTCC = cc -g -Wall -pedantic -Wcast-qual
# HOSTCC = g++ -g -Wall -pedantic -Wcast-qual
CC = $(HOSTCC) # change this is cross-compiling.
# By fiat, to make our lives easier, yacc is now defined to be bison.

View file

@ -34,9 +34,6 @@ extern void startreg(void);
extern int input(void);
extern void unput(int);
extern void unputstr(const char *);
extern int yylook(void);
extern int yyback(int *, int);
extern int yyinput(void);
extern fa *makedfa(const char *, bool);
extern fa *mkdfa(const char *, bool);
@ -169,7 +166,6 @@ extern Cell *boolop(Node **, int);
extern Cell *relop(Node **, int);
extern void tfree(Cell *);
extern Cell *gettemp(void);
extern Cell *field(Node **, int);
extern Cell *indirect(Node **, int);
extern Cell *substr(Node **, int);
extern Cell *sindex(Node **, int);

6
run.c
View file

@ -724,7 +724,7 @@ int u8_byte2char(const char *s, int bytenum)
return charnum;
}
/* runetochar() adapted from rune.c in the Plan 9 distributione */
/* runetochar() adapted from rune.c in the Plan 9 distribution */
enum
{
@ -2061,7 +2061,7 @@ static char *nawk_tolower(const char *s)
Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
{
Cell *x, *y;
Awkfloat u;
Awkfloat u = 0;
int t, sz;
Awkfloat tmp;
char *buf, *fmt;
@ -2513,7 +2513,7 @@ Cell *dosub(Node **a, int subop) /* sub and gsub */
const char *start;
const char *noempty = NULL; /* empty match disallowed here */
size_t m = 0; /* match count */
size_t whichm; /* which match to select, 0 = global */
size_t whichm = 0; /* which match to select, 0 = global */
int mtype; /* match type */
if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */

View file

@ -148,3 +148,26 @@ END {
printf("ARGV[%d] is %s\n", i, ARGV[i])
}' >foo2
diff foo1 foo2 || echo 'BAD: T.argv delete ARGV[2]'
# deleting ARGV used to trigger a use-after-free crash when awk
# iterates over it to read files.
echo >foo1
echo >foo2
echo >foo3
$awk 'BEGIN {
delete ARGV
ARGV[0] = "awk"
ARGV[1] = "/dev/null"
ARGC = 2
} {
# this should not be executed
print "FILENAME: " FILENAME
fflush()
}' foo1 foo2 foo3 >foo4
awkstatus=$?
diff /dev/null foo4
if [ $? -ne 0 ] || [ $awkstatus -ne 0 ]; then
echo 'BAD: T.argv delete ARGV'
fi

View file

@ -1,4 +1,4 @@
echo T.csconcat: test constant string concatentation
echo T.csconcat: test constant string concatenation
awk=${awk-../a.out}

22
tran.c
View file

@ -57,8 +57,7 @@ Cell *fnrloc; /* FNR */
Cell *ofsloc; /* OFS */
Cell *orsloc; /* ORS */
Cell *rsloc; /* RS */
Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *ARGVcell; /* cell with symbol table containing ARGV[...] */
Cell *rstartloc; /* RSTART */
Cell *rlengthloc; /* RLENGTH */
Cell *subseploc; /* SUBSEP */
@ -107,36 +106,39 @@ void syminit(void) /* initialize symbol table with builtin vars */
void arginit(int ac, char **av) /* set up ARGV and ARGC */
{
Array *ap;
Cell *cp;
int i;
char temp[50];
ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
ap = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
free(cp->sval);
cp->sval = (char *) ARGVtab;
cp->sval = (char *) ap;
for (i = 0; i < ac; i++) {
double result;
sprintf(temp, "%d", i);
if (is_number(*av, & result))
setsymtab(temp, *av, result, STR|NUM, ARGVtab);
setsymtab(temp, *av, result, STR|NUM, ap);
else
setsymtab(temp, *av, 0.0, STR, ARGVtab);
setsymtab(temp, *av, 0.0, STR, ap);
av++;
}
ARGVcell = cp;
}
void envinit(char **envp) /* set up ENVIRON variable */
{
Array *ap;
Cell *cp;
char *p;
cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
ENVtab = makesymtab(NSYMTAB);
ap = makesymtab(NSYMTAB);
free(cp->sval);
cp->sval = (char *) ENVtab;
cp->sval = (char *) ap;
for ( ; *envp; envp++) {
double result;
@ -146,9 +148,9 @@ void envinit(char **envp) /* set up ENVIRON variable */
continue;
*p++ = 0; /* split into two strings at = */
if (is_number(p, & result))
setsymtab(*envp, p, result, STR|NUM, ENVtab);
setsymtab(*envp, p, result, STR|NUM, ap);
else
setsymtab(*envp, p, 0.0, STR, ENVtab);
setsymtab(*envp, p, 0.0, STR, ap);
p[-1] = '='; /* restore in case env is passed down to a shell */
}
}