34587: ensure multibyte characters don't overflow.

They could start incorporating tokens, with bad karma.

Add test.
This commit is contained in:
Peter Stephenson 2015-02-20 16:25:47 +00:00
parent df7a657b10
commit b237ba0a8e
3 changed files with 28 additions and 5 deletions

View file

@ -1,3 +1,8 @@
2015-02-20 Peter Stephenson <p.stephenson@samsung.com>
* 34587: Src/utils.c, Test/D07multibyte.ztst: ensure multibyte
characters don't overflow into tokens and add test.
2015-02-19 Barton E. Schaefer <schaefer@zsh.org>
* 34568: Src/Module.c: use META_HEAPDUP when passing dlerror()

View file

@ -4797,6 +4797,14 @@ mb_metacharlenconv_r(const char *s, wint_t *wcp, mbstate_t *mbsp)
inchar = *++ptr ^ 32;
DPUTS(!*ptr,
"BUG: unexpected end of string in mb_metacharlen()\n");
} else if (imeta(*ptr)) {
/*
* As this is metafied input, this is a token --- this
* can't be a part of the string. It might be
* something on the end of an unbracketed parameter
* reference, for example.
*/
break;
} else
inchar = *ptr;
ptr++;

View file

@ -448,20 +448,30 @@
0:read passes through invalid multibyte characters
>0xC5
word=abcま
word=abcま
word[-1]=
print $word
word=abcま
word=abcま
word[-2]=
print $word
word=abcま
word=abcま
word[4]=d
print $word
word=abcま
word=abcま
word[3]=not_c
print $word
print $word
0:assignment with negative indices
>abc
>abま
>abcd
>abnot_cま
# The following doesn't necessarily need UTF-8, but this gives
# us the full effect --- if we parse this wrongly the \xe9
# in combination with the tokenized input afterwards looks like a
# valid UTF-8 character. But it isn't.
print $'$\xe9#``' >test_bad_param
(setopt nonomatch
. ./test_bad_param)
127:Invalid parameter name with following tokenized input
?./test_bad_param:1: command not found: $\M-i#