mirror of
https://github.com/freebsd/freebsd-src
synced 2024-07-21 10:19:04 +00:00
ota: Import One True Awk from 20240122 (6a07a6d3bb63)
Jan 22, 2024: Restore the ability to compile with g++. Thanks to Arnold Robbins. Dec 24, 2023: Matchop dereference after free problem fix when the first argument is a function call. Thanks to Oguz Ismail Uysal. Fix inconsistent handling of --csv and FS set in the command line. Thanks to Wilbert van der Poel. Casting changes to int for is* functions. Nov 27, 2023: Fix exit status of system on MacOS. Update to REGRESS. Thanks to Arnold Robbins. Fix inconsistent handling of -F and --csv, and loss of csv mode when FS is set. Sponsored by: Netflix
This commit is contained in:
parent
18df98168f
commit
e8a605e129
48
FIXES
48
FIXES
|
@ -25,10 +25,27 @@ THIS SOFTWARE.
|
|||
This file lists all bug fixes, changes, etc., made since the
|
||||
second edition of the AWK book was published in September 2023.
|
||||
|
||||
Jan 22, 2024:
|
||||
Restore the ability to compile with g++. Thanks to
|
||||
Arnold Robbins.
|
||||
|
||||
Dec 24, 2023:
|
||||
Matchop dereference after free problem fix when the first
|
||||
argument is a function call. Thanks to Oguz Ismail Uysal.
|
||||
Fix inconsistent handling of --csv and FS set in the
|
||||
command line. Thanks to Wilbert van der Poel.
|
||||
Casting changes to int for is* functions.
|
||||
|
||||
Nov 27, 2023:
|
||||
Fix exit status of system on MacOS. Update to REGRESS.
|
||||
Thanks to Arnold Robbins.
|
||||
Fix inconsistent handling of -F and --csv, and loss of csv
|
||||
mode when FS is set.
|
||||
|
||||
Nov 24, 2023:
|
||||
Fix issue #199: gototab improvements to dynamically resize the
|
||||
table, qsort and bsearch to improve the lookup speed as the
|
||||
table gets larger for multibyte input. thanks to Arnold Robbins.
|
||||
table gets larger for multibyte input. Thanks to Arnold Robbins.
|
||||
|
||||
Nov 23, 2023:
|
||||
Fix Issue #169, related to escape sequences in strings.
|
||||
|
@ -37,29 +54,29 @@ Nov 23, 2023:
|
|||
by Miguel Pineiro Jr.
|
||||
|
||||
Nov 20, 2023:
|
||||
rewrite of fnematch to fix a number of issues, including
|
||||
Rewrite of fnematch to fix a number of issues, including
|
||||
extraneous output, out-of-bounds access, number of bytes
|
||||
to push back after a failed match etc.
|
||||
thanks to Miguel Pineiro Jr.
|
||||
Thanks to Miguel Pineiro Jr.
|
||||
|
||||
Nov 15, 2023:
|
||||
Man page edit, regression test fixes. thanks to Arnold Robbins
|
||||
consolidation of sub and gsub into dosub, removing duplicate
|
||||
code. thanks to Miguel Pineiro Jr.
|
||||
Man page edit, regression test fixes. Thanks to Arnold Robbins
|
||||
Consolidation of sub and gsub into dosub, removing duplicate
|
||||
code. Thanks to Miguel Pineiro Jr.
|
||||
gcc replaced with cc everywhere.
|
||||
|
||||
Oct 30, 2023:
|
||||
multiple fixes and a minor code cleanup.
|
||||
disabled utf-8 for non-multibyte locales, such as C or POSIX.
|
||||
fixed a bad char * cast that causes incorrect results on big-endian
|
||||
systems. also fixed an out-of-bounds read for empty CCL.
|
||||
fixed a buffer overflow in substr with utf-8 strings.
|
||||
many thanks to Todd C Miller.
|
||||
Multiple fixes and a minor code cleanup.
|
||||
Disabled utf-8 for non-multibyte locales, such as C or POSIX.
|
||||
Fixed a bad char * cast that causes incorrect results on big-endian
|
||||
systems. Also fixed an out-of-bounds read for empty CCL.
|
||||
Fixed a buffer overflow in substr with utf-8 strings.
|
||||
Many thanks to Todd C Miller.
|
||||
|
||||
Sep 24, 2023:
|
||||
fnematch and getrune have been overhauled to solve issues around
|
||||
unicode FS and RS. also fixed gsub null match issue with unicode.
|
||||
big thanks to Arnold Robbins.
|
||||
unicode FS and RS. Also fixed gsub null match issue with unicode.
|
||||
Big thanks to Arnold Robbins.
|
||||
|
||||
Sep 12, 2023:
|
||||
Fixed a length error in u8_byte2char that set RSTART to
|
||||
|
@ -84,9 +101,8 @@ Sep 12, 2023:
|
|||
of a string of 3 emojis is 3, not 12 as it would be if bytes
|
||||
were counted.
|
||||
|
||||
Regular expressions are processes as UTF-8.
|
||||
Regular expressions are processed as UTF-8.
|
||||
|
||||
Unicode literals can be written as \u followed by one
|
||||
to eight hexadecimal digits. These may appear in strings and
|
||||
regular expressions.
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ Regular expressions may include UTF-8 code points, including `\u`.
|
|||
The option `--csv` turns on CSV processing of input:
|
||||
fields are separated by commas, fields may be quoted with
|
||||
double-quote (`"`) characters, quoted fields may contain embedded newlines.
|
||||
Double-quotes in fields have to be doubled and enclosed in quoted fields.
|
||||
In CSV mode, `FS` is ignored.
|
||||
|
||||
If no explicit separator argument is provided,
|
||||
|
@ -117,6 +118,8 @@ move this to some place like `/usr/bin/awk`.
|
|||
|
||||
If your system does not have `yacc` or `bison` (the GNU
|
||||
equivalent), you need to install one of them first.
|
||||
The default in the `makefile` is `bison`; you will have
|
||||
to edit the `makefile` to use `yacc`.
|
||||
|
||||
NOTE: This version uses ISO/IEC C99, as you should also. We have
|
||||
compiled this without any changes using `gcc -Wall` and/or local C
|
||||
|
@ -143,4 +146,4 @@ is not at the top of our priority list.
|
|||
|
||||
#### Last Updated
|
||||
|
||||
Mon 16 Oct 2023 11:23:08 IDT
|
||||
Mon 05 Feb 2024 08:46:55 IST
|
||||
|
|
38
b.c
38
b.c
|
@ -116,7 +116,7 @@ static int entry_cmp(const void *l, const void *r);
|
|||
static int get_gototab(fa*, int, int);
|
||||
static int set_gototab(fa*, int, int, int);
|
||||
static void clear_gototab(fa*, int);
|
||||
extern int u8_rune(int *, const uschar *);
|
||||
extern int u8_rune(int *, const char *);
|
||||
|
||||
static int *
|
||||
intalloc(size_t n, const char *f)
|
||||
|
@ -346,7 +346,7 @@ int hexstr(const uschar **pp, int max) /* find and eval hex string at pp, return
|
|||
int i;
|
||||
|
||||
for (i = 0, p = *pp; i < max && isxdigit(*p); i++, p++) {
|
||||
if (isdigit(*p))
|
||||
if (isdigit((int) *p))
|
||||
n = 16 * n + *p - '0';
|
||||
else if (*p >= 'a' && *p <= 'f')
|
||||
n = 16 * n + *p - 'a' + 10;
|
||||
|
@ -416,7 +416,7 @@ int *cclenter(const char *argp) /* add a character class */
|
|||
FATAL("out of space for character class [%.10s...] 1", p);
|
||||
bp = buf;
|
||||
for (i = 0; *p != 0; ) {
|
||||
n = u8_rune(&c, p);
|
||||
n = u8_rune(&c, (const char *) p);
|
||||
p += n;
|
||||
if (c == '\\') {
|
||||
c = quoted(&p);
|
||||
|
@ -424,7 +424,7 @@ int *cclenter(const char *argp) /* add a character class */
|
|||
if (*p != 0) {
|
||||
c = bp[-1];
|
||||
/* c2 = *p++; */
|
||||
n = u8_rune(&c2, p);
|
||||
n = u8_rune(&c2, (const char *) p);
|
||||
p += n;
|
||||
if (c2 == '\\')
|
||||
c2 = quoted(&p); /* BUG: sets p, has to be u8 size */
|
||||
|
@ -607,18 +607,18 @@ static void resize_gototab(fa *f, int state)
|
|||
size_t orig_size = f->gototab[state].allocated; // 2nd half of new mem is this size
|
||||
memset(p + orig_size, 0, orig_size * sizeof(gtte)); // clean it out
|
||||
|
||||
f->gototab[state].allocated = new_size; // update gotottab info
|
||||
f->gototab[state].allocated = new_size; // update gototab info
|
||||
f->gototab[state].entries = p;
|
||||
}
|
||||
|
||||
static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation */
|
||||
static int get_gototab(fa *f, int state, int ch) /* hide gototab implementation */
|
||||
{
|
||||
gtte key;
|
||||
gtte *item;
|
||||
|
||||
key.ch = ch;
|
||||
key.state = 0; /* irrelevant */
|
||||
item = bsearch(& key, f->gototab[state].entries,
|
||||
item = (gtte *) bsearch(& key, f->gototab[state].entries,
|
||||
f->gototab[state].inuse, sizeof(gtte),
|
||||
entry_cmp);
|
||||
|
||||
|
@ -638,7 +638,7 @@ static int entry_cmp(const void *l, const void *r)
|
|||
return left->ch - right->ch;
|
||||
}
|
||||
|
||||
static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplementation */
|
||||
static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implementation */
|
||||
{
|
||||
if (f->gototab[state].inuse == 0) {
|
||||
f->gototab[state].entries[0].ch = ch;
|
||||
|
@ -662,7 +662,7 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplem
|
|||
|
||||
key.ch = ch;
|
||||
key.state = 0; /* irrelevant */
|
||||
item = bsearch(& key, f->gototab[state].entries,
|
||||
item = (gtte *) bsearch(& key, f->gototab[state].entries,
|
||||
f->gototab[state].inuse, sizeof(gtte),
|
||||
entry_cmp);
|
||||
|
||||
|
@ -710,7 +710,7 @@ int match(fa *f, const char *p0) /* shortest match ? */
|
|||
return(1);
|
||||
do {
|
||||
/* assert(*p < NCHARS); */
|
||||
n = u8_rune(&rune, p);
|
||||
n = u8_rune(&rune, (const char *) p);
|
||||
if ((ns = get_gototab(f, s, rune)) != 0)
|
||||
s = ns;
|
||||
else
|
||||
|
@ -743,7 +743,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
|||
if (f->out[s]) /* final state */
|
||||
patlen = q-p;
|
||||
/* assert(*q < NCHARS); */
|
||||
n = u8_rune(&rune, q);
|
||||
n = u8_rune(&rune, (const char *) q);
|
||||
if ((ns = get_gototab(f, s, rune)) != 0)
|
||||
s = ns;
|
||||
else
|
||||
|
@ -774,7 +774,7 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
|||
s = 2;
|
||||
if (*p == 0)
|
||||
break;
|
||||
n = u8_rune(&rune, p);
|
||||
n = u8_rune(&rune, (const char *) p);
|
||||
p += n;
|
||||
} while (1); /* was *p++ */
|
||||
return (0);
|
||||
|
@ -799,7 +799,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
|||
if (f->out[s]) /* final state */
|
||||
patlen = q-p;
|
||||
/* assert(*q < NCHARS); */
|
||||
n = u8_rune(&rune, q);
|
||||
n = u8_rune(&rune, (const char *) q);
|
||||
if ((ns = get_gototab(f, s, rune)) != 0)
|
||||
s = ns;
|
||||
else
|
||||
|
@ -887,7 +887,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
|
|||
}
|
||||
}
|
||||
|
||||
j += u8_rune(&c, (uschar *)j);
|
||||
j += u8_rune(&c, j);
|
||||
|
||||
if ((ns = get_gototab(pfa, s, c)) != 0)
|
||||
s = ns;
|
||||
|
@ -907,7 +907,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
|
|||
break; /* best match found */
|
||||
|
||||
/* no match at origin i, next i and start over */
|
||||
i += u8_rune(&c, (uschar *)i);
|
||||
i += u8_rune(&c, i);
|
||||
if (c == 0)
|
||||
break; /* no match */
|
||||
j = i;
|
||||
|
@ -1229,8 +1229,6 @@ static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern int u8_rune(int *, const uschar *); /* run.c; should be in header file */
|
||||
|
||||
int relex(void) /* lexical analyzer for reparse */
|
||||
{
|
||||
int c, n;
|
||||
|
@ -1248,7 +1246,7 @@ int relex(void) /* lexical analyzer for reparse */
|
|||
rescan:
|
||||
starttok = prestr;
|
||||
|
||||
if ((n = u8_rune(&rlxval, prestr)) > 1) {
|
||||
if ((n = u8_rune(&rlxval, (const char *) prestr)) > 1) {
|
||||
prestr += n;
|
||||
starttok = prestr;
|
||||
return CHAR;
|
||||
|
@ -1295,7 +1293,7 @@ int relex(void) /* lexical analyzer for reparse */
|
|||
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
|
||||
FATAL("out of space for reg expr %.10s...", lastre);
|
||||
for (; ; ) {
|
||||
if ((n = u8_rune(&rlxval, prestr)) > 1) {
|
||||
if ((n = u8_rune(&rlxval, (const char *) prestr)) > 1) {
|
||||
for (i = 0; i < n; i++)
|
||||
*bp++ = *prestr++;
|
||||
continue;
|
||||
|
@ -1389,7 +1387,7 @@ int relex(void) /* lexical analyzer for reparse */
|
|||
}
|
||||
break;
|
||||
case '{':
|
||||
if (isdigit(*(prestr))) {
|
||||
if (isdigit((int) *(prestr))) {
|
||||
num = 0; /* Process as a repetition */
|
||||
n = -1; m = -1;
|
||||
commafound = false;
|
||||
|
|
|
@ -11,6 +11,7 @@ do
|
|||
echo === $i
|
||||
OUT=${i%.awk}.OUT
|
||||
OK=${i%.awk}.ok
|
||||
OK2=${i%.awk}.ok2
|
||||
IN=${i%.awk}.in
|
||||
input=
|
||||
if [ -f $IN ]
|
||||
|
@ -20,9 +21,12 @@ do
|
|||
|
||||
../a.out -f $i $input > $OUT 2>&1
|
||||
if cmp -s $OK $OUT
|
||||
then
|
||||
rm -f $OUT
|
||||
elif [ -f $OK2 ] && cmp -s $OK2 $OUT
|
||||
then
|
||||
rm -f $OUT
|
||||
else
|
||||
echo ++++ $i failed!
|
||||
echo '++++ $i failed!'
|
||||
fi
|
||||
done
|
||||
|
|
12
lib.c
12
lib.c
|
@ -399,7 +399,7 @@ void fldbld(void) /* create fields from current record */
|
|||
i = 0; /* number of fields accumulated here */
|
||||
if (inputFS == NULL) /* make sure we have a copy of FS */
|
||||
savefs();
|
||||
if (strlen(inputFS) > 1) { /* it's a regular expression */
|
||||
if (!CSV && strlen(inputFS) > 1) { /* it's a regular expression */
|
||||
i = refldbld(r, inputFS);
|
||||
} else if (!CSV && (sep = *inputFS) == ' ') { /* default whitespace */
|
||||
for (i = 0; ; ) {
|
||||
|
@ -845,10 +845,10 @@ int isclvar(const char *s) /* is s of form var=something ? */
|
|||
{
|
||||
const char *os = s;
|
||||
|
||||
if (!isalpha((uschar) *s) && *s != '_')
|
||||
if (!isalpha((int) *s) && *s != '_')
|
||||
return 0;
|
||||
for ( ; *s; s++)
|
||||
if (!(isalnum((uschar) *s) || *s == '_'))
|
||||
if (!(isalnum((int) *s) || *s == '_'))
|
||||
break;
|
||||
return *s == '=' && s > os;
|
||||
}
|
||||
|
@ -883,7 +883,7 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok,
|
|||
if (no_trailing)
|
||||
*no_trailing = false;
|
||||
|
||||
while (isspace(*s))
|
||||
while (isspace((int) *s))
|
||||
s++;
|
||||
|
||||
/* no hex floating point, sorry */
|
||||
|
@ -895,7 +895,7 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok,
|
|||
is_nan = (strncasecmp(s+1, "nan", 3) == 0);
|
||||
is_inf = (strncasecmp(s+1, "inf", 3) == 0);
|
||||
if ((is_nan || is_inf)
|
||||
&& (isspace(s[4]) || s[4] == '\0'))
|
||||
&& (isspace((int) s[4]) || s[4] == '\0'))
|
||||
goto convert;
|
||||
else if (! isdigit(s[1]) && s[1] != '.')
|
||||
return false;
|
||||
|
@ -918,7 +918,7 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok,
|
|||
/*
|
||||
* check for trailing stuff
|
||||
*/
|
||||
while (isspace(*ep))
|
||||
while (isspace((int) *ep))
|
||||
ep++;
|
||||
|
||||
if (no_trailing != NULL)
|
||||
|
|
6
main.c
6
main.c
|
@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
|||
THIS SOFTWARE.
|
||||
****************************************************************/
|
||||
|
||||
const char *version = "version 20231124";
|
||||
const char *version = "version 20240122";
|
||||
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
|
@ -199,6 +199,10 @@ int main(int argc, char *argv[])
|
|||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
if (CSV && (fs != NULL || lookup("FS", symtab) != NULL))
|
||||
WARNING("danger: don't set FS when --csv is in effect");
|
||||
|
||||
/* argv[1] is now the first argument */
|
||||
if (npfile == 0) { /* no -f; first argument is program */
|
||||
if (argc <= 1) {
|
||||
|
|
31
run.c
31
run.c
|
@ -795,7 +795,7 @@ int runetochar(char *str, int c)
|
|||
|
||||
Cell *matchop(Node **a, int n) /* ~ and match() */
|
||||
{
|
||||
Cell *x, *y;
|
||||
Cell *x, *y, *z;
|
||||
char *s, *t;
|
||||
int i;
|
||||
int cstart, cpatlen, len;
|
||||
|
@ -817,7 +817,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */
|
|||
i = (*mf)(pfa, s);
|
||||
tempfree(y);
|
||||
}
|
||||
tempfree(x);
|
||||
z = x;
|
||||
if (n == MATCHFCN) {
|
||||
int start = patbeg - s + 1; /* origin 1 */
|
||||
if (patlen < 0) {
|
||||
|
@ -839,11 +839,13 @@ Cell *matchop(Node **a, int n) /* ~ and match() */
|
|||
x = gettemp();
|
||||
x->tval = NUM;
|
||||
x->fval = start;
|
||||
return x;
|
||||
} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
|
||||
return(True);
|
||||
x = True;
|
||||
else
|
||||
return(False);
|
||||
x = False;
|
||||
|
||||
tempfree(z);
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1298,7 +1300,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
|
|||
|
||||
if (bs == NULL) { // invalid character
|
||||
// use unicode invalid character, 0xFFFD
|
||||
bs = "\357\277\275";
|
||||
static char invalid_char[] = "\357\277\275";
|
||||
bs = invalid_char;
|
||||
count = 3;
|
||||
}
|
||||
t = bs;
|
||||
|
@ -2067,6 +2070,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
|||
int status = 0;
|
||||
time_t tv;
|
||||
struct tm *tm;
|
||||
int estatus = 0;
|
||||
|
||||
t = ptoi(a[0]);
|
||||
x = execute(a[1]);
|
||||
|
@ -2167,20 +2171,21 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
|||
break;
|
||||
case FSYSTEM:
|
||||
fflush(stdout); /* in case something is buffered already */
|
||||
status = system(getsval(x));
|
||||
u = status;
|
||||
estatus = status = system(getsval(x));
|
||||
if (status != -1) {
|
||||
if (WIFEXITED(status)) {
|
||||
u = WEXITSTATUS(status);
|
||||
estatus = WEXITSTATUS(status);
|
||||
} else if (WIFSIGNALED(status)) {
|
||||
u = WTERMSIG(status) + 256;
|
||||
estatus = WTERMSIG(status) + 256;
|
||||
#ifdef WCOREDUMP
|
||||
if (WCOREDUMP(status))
|
||||
u += 256;
|
||||
estatus += 256;
|
||||
#endif
|
||||
} else /* something else?!? */
|
||||
u = 0;
|
||||
estatus = 0;
|
||||
}
|
||||
/* else estatus was set to -1 */
|
||||
u = estatus;
|
||||
break;
|
||||
case FRAND:
|
||||
/* random() returns numbers in [0..2^31-1]
|
||||
|
@ -2539,7 +2544,7 @@ Cell *dosub(Node **a, int subop) /* sub and gsub */
|
|||
start = getsval(x);
|
||||
while (pmatch(pfa, start)) {
|
||||
if (buf == NULL) {
|
||||
if ((pb = buf = malloc(bufsz)) == NULL)
|
||||
if ((pb = buf = (char *) malloc(bufsz)) == NULL)
|
||||
FATAL("out of memory in dosub");
|
||||
tempstat = pfa->initstat;
|
||||
pfa->initstat = 2;
|
||||
|
|
|
@ -17,7 +17,7 @@ $1 ~ /try/ { # new test
|
|||
sub(/try /, "")
|
||||
prog = $0
|
||||
printf("%3d %s\n", nt, prog)
|
||||
prog = sprintf("%s -F\"\\t\" '"'"'%s'"'"'", awk, prog)
|
||||
prog = sprintf("%s '"'"'%s'"'"'", awk, prog)
|
||||
# print "prog is", prog
|
||||
nt2 = 0
|
||||
while (getline > 0) {
|
||||
|
|
Can't render this file because it contains an unexpected character in line 9 and column 7.
|
|
@ -84,3 +84,5 @@ grep "out of range field" foo >/dev/null || echo 1>&2 "BAD: T.overflow \$400000"
|
|||
rm -rf /tmp/awktestfoo*
|
||||
$awk 'BEGIN { for (i=1; i <= 1000; i++) print i >("/tmp/awktestfoo" i) }'
|
||||
ls /tmp/awktestfoo* | grep '1000' >/dev/null || echo 1>&2 "BAD: T.overflow openfiles"
|
||||
rm -rf /tmp/awktestfoo*
|
||||
exit 0
|
||||
|
|
|
@ -220,5 +220,6 @@ $awk 'BEGIN {
|
|||
echo 'cat dog' > $TEMP2
|
||||
diff $TEMP1 $TEMP2 || fail 'BAD: T.split(a, b, "[\r\n]+")'
|
||||
|
||||
rm -rf $WORKDIR
|
||||
|
||||
exit $RESULT
|
||||
|
|
Loading…
Reference in a new issue