24275: fixes for multibyte characters on Solaris

This commit is contained in:
Peter Stephenson 2007-12-17 17:11:29 +00:00
parent 1ac4f6a77f
commit 20607774dc
4 changed files with 43 additions and 9 deletions

View file

@ -1,5 +1,10 @@
2007-12-17 Peter Stephenson <pws@csr.com>
* 24275: Src/builtin.c, Src/Zle/zle_utils.c,
Test/D07multibyte.ztst: Solaris returns the full character
length from mbrlen() etc. even if the call started in the
middle; bad characters are silently converted to a question mark.
* unposted: Config/version.mk: 4.3.4-dev-5.
* unposted: Src/lex.c: minor typo

View file

@ -294,6 +294,16 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
* (certainly true for Unicode and unlikely to be false
* in any non-pathological multibyte representation). */
cnt = 1;
} else if (cnt > ll) {
/*
* Some multibyte implementations return the
* full length of a previous incomplete character
* instead of the remaining length.
* This is paranoia: it only applies if we start
* midway through a multibyte character, which
* presumably can't happen.
*/
cnt = ll;
}
if (outcs) {
@ -843,6 +853,12 @@ showmsg(char const *msg)
cnt = 1;
/* FALL THROUGH */
default:
/*
* Paranoia: only needed if we start in the middle
* of a multibyte string and only in some implementations.
*/
if (cnt > ulen)
cnt = ulen;
n = wcs_nicechar(c, &width, NULL);
break;
}

View file

@ -4927,7 +4927,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
break;
}
*bptr = (char) val;
#ifdef MULTIBYTE_SUPPORT
#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE)) {
ret = mbrlen(bptr++, 1, &mbs);
if (ret == MB_INVALID)
@ -4954,8 +4954,8 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
eof = 1;
break;
}
#ifdef MULTIBYTE_SUPPORT
#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE)) {
while (val > 0) {
ret = mbrlen(bptr, val, &mbs);
@ -4970,6 +4970,10 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
}
else if (ret == 0) /* handle null as normal char */
ret = 1;
else if (ret > val) {
/* Some mbrlen()s return the full char len */
ret = val;
}
nchars--;
val -= ret;
bptr += ret;

View file

@ -388,9 +388,18 @@
# This also isn't strictly multibyte and is here to reduce the
# likelihood of a "can't do character set conversion" error.
testfn() { (LC_ALL=C; print $'\u00e9') }
repeat 4 testfn
1:error handling in Unicode quoting
?testfn: character not in range
?testfn: character not in range
?testfn: character not in range
?testfn: character not in range
repeat 4 testfn 2>&1 | while read line; do
if [[ $line = *"character not in range"* ]]; then
print OK
elif [[ $line = "?" ]]; then
print OK
else
print Failed: no error message and no question mark
fi
done
true
0:error handling in Unicode quoting
>OK
>OK
>OK
>OK