mirror of
https://github.com/zsh-users/zsh
synced 2024-10-06 16:09:31 +00:00
36415: remap bytes from invalid multibyte characters.
These now go to 0xdc00 + index. If wchar_t is a Unicode code point, this is by construction an invalid character within the Unicode range. If it isn't, we would hope the result was no worse than the current fudge.
This commit is contained in:
parent
32f5d3d8c1
commit
f52795ea3e
|
@ -1,3 +1,11 @@
|
|||
2015-09-04 Peter Stephenson <p.stephenson@samsung.com>
|
||||
|
||||
* 36415: Src/pattern.c, Test/D07multibyte.ztst: remap bytes from
|
||||
invalid multibyte characters to 0xDC00 + index which is invalid
|
||||
in Unicode. Strictly this only works if whcar_t is
|
||||
ISO-10646-compliant, however it ought to be at least as good as
|
||||
the current fudge in any case.
|
||||
|
||||
2015-09-03 Peter Stephenson <p.stephenson@samsung.com>
|
||||
|
||||
* 36416: Src/Zle/zle_refresh.c, Src/Zle/zle_utils.c: If
|
||||
|
|
|
@ -224,6 +224,22 @@ typedef zlong zrange_t;
|
|||
typedef unsigned long zrange_t;
|
||||
#endif
|
||||
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
/*
|
||||
* Handle a byte that's not part of a valid character.
|
||||
*
|
||||
* This range in Unicode is recommended for purposes of this
|
||||
* kind as it corresponds to invalid characters.
|
||||
*
|
||||
* Note that this strictly only works if wchar_t represents
|
||||
* Unicode code points, which isn't necessarily true; however,
|
||||
* converting an invalid character into an unknown format is
|
||||
* a bit tricky...
|
||||
*/
|
||||
#define WCHAR_INVALID(ch) \
|
||||
((wchar_t) (0xDC00 + STOUC(ch)))
|
||||
#endif /* MULTIBYTE_SUPPORT */
|
||||
|
||||
/*
|
||||
* Array of characters corresponding to zpc_chars enum, which it must match.
|
||||
*/
|
||||
|
@ -353,10 +369,10 @@ metacharinc(char **x)
|
|||
return wc;
|
||||
}
|
||||
|
||||
/* Error. Treat as single byte. */
|
||||
/* Error. */
|
||||
/* Reset the shift state for next time. */
|
||||
memset(&shiftstate, 0, sizeof(shiftstate));
|
||||
return (wchar_t) STOUC(*(*x)++);
|
||||
return WCHAR_INVALID(*(*x)++);
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -1867,10 +1883,10 @@ charref(char *x, char *y)
|
|||
ret = mbrtowc(&wc, x, y-x, &shiftstate);
|
||||
|
||||
if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
|
||||
/* Error. Treat as single byte. */
|
||||
/* Error. */
|
||||
/* Reset the shift state for next time. */
|
||||
memset(&shiftstate, 0, sizeof(shiftstate));
|
||||
return (wchar_t) STOUC(*x);
|
||||
return WCHAR_INVALID(*x);
|
||||
}
|
||||
|
||||
return wc;
|
||||
|
@ -1913,7 +1929,7 @@ charrefinc(char **x, char *y, int *z)
|
|||
size_t ret;
|
||||
|
||||
if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
|
||||
return (wchar_t) STOUC(*(*x)++);
|
||||
return WCHAR_INVALID(*(*x)++);
|
||||
|
||||
ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
|
||||
|
||||
|
@ -1922,7 +1938,7 @@ charrefinc(char **x, char *y, int *z)
|
|||
*z = 1;
|
||||
/* Reset the shift state for next time. */
|
||||
memset(&shiftstate, 0, sizeof(shiftstate));
|
||||
return (wchar_t) STOUC(*(*x)++);
|
||||
return WCHAR_INVALID(*(*x)++);
|
||||
}
|
||||
|
||||
/* Nulls here are normal characters */
|
||||
|
|
|
@ -508,3 +508,20 @@
|
|||
cd ..
|
||||
}
|
||||
0:cd with special characters
|
||||
|
||||
test_array=(
|
||||
'[[ \xcc = \xcc ]]'
|
||||
'[[ \xcc != \xcd ]]'
|
||||
'[[ \xcc != \ucc ]]'
|
||||
'[[ \ucc = \ucc ]]'
|
||||
'[[ \ucc = [\ucc] ]]'
|
||||
'[[ \xcc != [\ucc] ]]'
|
||||
# Not clear how useful the following is...
|
||||
'[[ \xcc = [\xcc] ]]'
|
||||
)
|
||||
for test in $test_array; do
|
||||
if ! eval ${(g::)test} ; then
|
||||
print -rl "Test $test failed" >&2
|
||||
fi
|
||||
done
|
||||
0:Invalid characters in pattern matching
|
||||
|
|
Loading…
Reference in a new issue