mirror of
https://github.com/zsh-users/zsh
synced 2024-07-21 10:14:19 +00:00
22556: Multibyte separators and delimiters
This commit is contained in:
parent
6ca7b6abdf
commit
50e9ab122b
|
@ -1,3 +1,11 @@
|
|||
2006-07-24 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||
|
||||
* 22556: Doc/Zsh/builtins.yo, Functions/Zle/insert-composed-char,
|
||||
Src/builtin.c, Src/pattern.c, Src/subst.c, Src/utils.c, Src/zsh.h,
|
||||
Src/ztype.h, Src/Zle/zle.h, Src/Zle/zle_main.c,
|
||||
Test/D04parameter.ztst, Test/D07multibyte.ztst: Multibyte
|
||||
separators and delimiters.
|
||||
|
||||
2006-07-18 Clint Adams <clint@zsh.org>
|
||||
|
||||
* 22554: Jesse Weinstein: Completion/Unix/Command/_vorbiscomment:
|
||||
|
|
|
@ -1003,6 +1003,10 @@ Read only one (or var(num)) characters. All are assigned to the first
|
|||
var(name), without word splitting. This flag is ignored when tt(-q) is
|
||||
present. Input is read from the terminal unless one of tt(-u) or tt(-p)
|
||||
is present. This option may also be used within zle widgets.
|
||||
|
||||
Note that despite the mnemonic `key' this option does read full
|
||||
characters, which may consist of multiple bytes if the option
|
||||
tt(MULTIBYTE) is set.
|
||||
)
|
||||
item(tt(-z))(
|
||||
Read one entry from the editor buffer stack and assign it to the first
|
||||
|
|
|
@ -128,7 +128,7 @@
|
|||
# 'm Macron
|
||||
# '' Acute
|
||||
|
||||
emulate -LR zsh
|
||||
emulate -L zsh
|
||||
setopt cbases extendedglob printeightbit
|
||||
|
||||
local accent basechar ochar error
|
||||
|
@ -165,7 +165,8 @@ else
|
|||
fi
|
||||
|
||||
local -A charmap
|
||||
charmap=(${=zsh_accented_chars[$accent]})
|
||||
# just in case someone is monkeying with IFS...
|
||||
charmap=(${(s. .)zsh_accented_chars[$accent]})
|
||||
|
||||
if [[ ${#charmap} -eq 0 || -z $charmap[$basechar] ]]; then
|
||||
$error "Combination ${basechar}${accent} is not available."
|
||||
|
|
|
@ -62,11 +62,11 @@ typedef wint_t ZLE_INT_T;
|
|||
#define ZC_iblank wcsiblank
|
||||
#define ZC_icntrl iswcntrl
|
||||
#define ZC_idigit iswdigit
|
||||
#define ZC_iident wcsiident
|
||||
#define ZC_iident(x) wcsitype((x), IIDENT)
|
||||
#define ZC_ilower iswlower
|
||||
#define ZC_inblank iswspace
|
||||
#define ZC_iupper iswupper
|
||||
#define ZC_iword wcsiword
|
||||
#define ZC_iword(x) wcsitype((x), IWORD)
|
||||
|
||||
#define ZC_tolower towlower
|
||||
#define ZC_toupper towupper
|
||||
|
|
|
@ -1290,32 +1290,40 @@ bin_vared(char *name, char **args, Options ops, UNUSED(int func))
|
|||
char **arr = getarrvalue(v), **aptr, **tmparr, **tptr;
|
||||
tptr = tmparr = (char **)zhalloc(sizeof(char *)*(arrlen(arr)+1));
|
||||
for (aptr = arr; *aptr; aptr++) {
|
||||
int sepcount = 0;
|
||||
int sepcount = 0, clen;
|
||||
convchar_t c;
|
||||
/*
|
||||
* See if this word contains a separator character
|
||||
* or backslash
|
||||
*/
|
||||
for (t = *aptr; *t; t++) {
|
||||
if (*t == Meta) {
|
||||
if (isep(t[1] ^ 32))
|
||||
sepcount++;
|
||||
MB_METACHARINIT();
|
||||
for (t = *aptr; *t; ) {
|
||||
if (*t == '\\') {
|
||||
t++;
|
||||
} else if (isep(*t) || *t == '\\')
|
||||
sepcount++;
|
||||
} else {
|
||||
t += MB_METACHARLENCONV(t, &c);
|
||||
if (MB_ZISTYPE(c, ISEP))
|
||||
sepcount++;
|
||||
}
|
||||
}
|
||||
if (sepcount) {
|
||||
/* Yes, so allocate enough space to quote it. */
|
||||
char *newstr, *nptr;
|
||||
newstr = zhalloc(strlen(*aptr)+sepcount+1);
|
||||
/* Go through string quoting separators */
|
||||
MB_METACHARINIT();
|
||||
for (t = *aptr, nptr = newstr; *t; ) {
|
||||
if (*t == Meta) {
|
||||
if (isep(t[1] ^ 32))
|
||||
*nptr++ = '\\';
|
||||
*nptr++ = *t++;
|
||||
} else if (isep(*t) || *t == '\\')
|
||||
if (*t == '\\') {
|
||||
*nptr++ = '\\';
|
||||
*nptr++ = *t++;
|
||||
*nptr++ = *t++;
|
||||
} else {
|
||||
clen = MB_METACHARLENCONV(t, &c);
|
||||
if (MB_ZISTYPE(c, ISEP))
|
||||
*nptr++ = '\\';
|
||||
while (clen--)
|
||||
*nptr++ = *t++;
|
||||
}
|
||||
}
|
||||
*nptr = '\0';
|
||||
/* Stick this into the array of words to join up */
|
||||
|
|
264
Src/builtin.c
264
Src/builtin.c
|
@ -4266,7 +4266,7 @@ bin_break(char *name, char **argv, UNUSED(Options ops), int func)
|
|||
zerrnam(name, "not in while, until, select, or repeat loop");
|
||||
return 1;
|
||||
}
|
||||
contflag = 1; /* ARE WE SUPPOSED TO FALL THROUGH HERE? */
|
||||
contflag = 1; /* FALLTHROUGH */
|
||||
case BIN_BREAK:
|
||||
if (!loops) { /* break is only permitted in loops */
|
||||
zerrnam(name, "not in while, until, select, or repeat loop");
|
||||
|
@ -4560,7 +4560,14 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
int readchar = -1, val, resettty = 0;
|
||||
struct ttyinfo saveti;
|
||||
char d;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
wchar_t delim = L'\n', wc;
|
||||
mbstate_t mbs;
|
||||
char *laststart;
|
||||
size_t ret;
|
||||
#else
|
||||
char delim = '\n';
|
||||
#endif
|
||||
|
||||
if (OPT_HASARG(ops,c='k')) {
|
||||
char *eptr, *optarg = OPT_ARG(ops,c);
|
||||
|
@ -4666,7 +4673,23 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
}
|
||||
if (OPT_ISSET(ops,'d')) {
|
||||
char *delimstr = OPT_ARG(ops,'d');
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
wint_t wc;
|
||||
|
||||
if (isset(MULTIBYTE)) {
|
||||
mb_metacharinit();
|
||||
(void)mb_metacharlenconv(delimstr, &wc);
|
||||
}
|
||||
else
|
||||
wc = WEOF;
|
||||
if (wc != WEOF)
|
||||
delim = (wchar_t)wc;
|
||||
else
|
||||
delim = (wchar_t)((delimstr[0] == Meta) ?
|
||||
delimstr[1] ^ 32 : delimstr[0]);
|
||||
#else
|
||||
delim = (delimstr[0] == Meta) ? delimstr[1] ^ 32 : delimstr[0];
|
||||
#endif
|
||||
if (SHTTY != -1) {
|
||||
struct ttyinfo ti;
|
||||
gettyinfo(&ti);
|
||||
|
@ -4710,26 +4733,74 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
#endif
|
||||
|
||||
/* option -k means read only a given number of characters (default 1) */
|
||||
if (OPT_ISSET(ops,'k')) {
|
||||
int eof = 0;
|
||||
/* allocate buffer space for result */
|
||||
bptr = buf = (char *)zalloc(nchars+1);
|
||||
|
||||
do {
|
||||
if (izle) {
|
||||
if ((val = getkeyptr(0, NULL)) < 0)
|
||||
if ((val = getkeyptr(0, NULL)) < 0) {
|
||||
eof = 1;
|
||||
break;
|
||||
*bptr++ = (char) val;
|
||||
}
|
||||
*bptr = (char) val;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE)) {
|
||||
ret = mbrlen(bptr++, 1, &mbs);
|
||||
if (ret == MB_INVALID)
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
/* treat invalid as single character */
|
||||
if (ret != MB_INCOMPLETE)
|
||||
nchars--;
|
||||
continue;
|
||||
} else {
|
||||
bptr++;
|
||||
nchars--;
|
||||
}
|
||||
#else
|
||||
bptr++;
|
||||
nchars--;
|
||||
#endif
|
||||
} else {
|
||||
/* If read returns 0, is end of file */
|
||||
if (readchar >= 0) {
|
||||
*bptr = readchar;
|
||||
val = 1;
|
||||
readchar = -1;
|
||||
} else if ((val = read(readfd, bptr, nchars)) <= 0)
|
||||
} else if ((val = read(readfd, bptr, nchars)) <= 0) {
|
||||
eof = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE)) {
|
||||
while (val > 0) {
|
||||
ret = mbrlen(bptr, val, &mbs);
|
||||
if (ret == MB_INCOMPLETE) {
|
||||
bptr += val;
|
||||
break;
|
||||
} else {
|
||||
if (ret == MB_INVALID) {
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
/* treat as single byte */
|
||||
ret = 1;
|
||||
}
|
||||
else if (ret == 0) /* handle null as normal char */
|
||||
ret = 1;
|
||||
nchars--;
|
||||
val -= ret;
|
||||
bptr += ret;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
/* decrement number of characters read from number required */
|
||||
nchars -= val;
|
||||
|
||||
|
@ -4761,7 +4832,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
zfree(buf, bptr - buf + 1);
|
||||
if (resettty && SHTTY != -1)
|
||||
settyinfo(&saveti);
|
||||
return val <= 0;
|
||||
return eof;
|
||||
}
|
||||
|
||||
/* option -q means get one character, and interpret it as a Y or N */
|
||||
|
@ -4770,10 +4841,25 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
|
||||
/* set up the buffer */
|
||||
readbuf[1] = '\0';
|
||||
|
||||
|
||||
/* get, and store, reply */
|
||||
if (izle) {
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
int key;
|
||||
|
||||
while ((key = getkeyptr(0, NULL)) >= 0) {
|
||||
char c = (char)key;
|
||||
/*
|
||||
* If multibyte, it can't be y, so we don't care
|
||||
* what key gets set to; just read to end of character.
|
||||
*/
|
||||
if (!isset(MULTIBYTE) ||
|
||||
mbrlen(&c, 1, &mbs) != MB_INCOMPLETE)
|
||||
break;
|
||||
}
|
||||
#else
|
||||
int key = getkeyptr(0, NULL);
|
||||
#endif
|
||||
|
||||
readbuf[0] = (key == 'y' ? 'y' : 'n');
|
||||
} else {
|
||||
|
@ -4786,6 +4872,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
SHTTY = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (OPT_ISSET(ops,'e') || OPT_ISSET(ops,'E'))
|
||||
printf("%s\n", readbuf);
|
||||
if (!OPT_ISSET(ops,'e'))
|
||||
|
@ -4808,16 +4895,79 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
while (*args || (OPT_ISSET(ops,'A') && !gotnl)) {
|
||||
sigset_t s = child_unblock();
|
||||
buf = bptr = (char *)zalloc(bsiz = 64);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
laststart = buf;
|
||||
ret = MB_INCOMPLETE;
|
||||
#endif
|
||||
/* get input, a character at a time */
|
||||
while (!gotnl) {
|
||||
c = zread(izle, &readchar);
|
||||
/* \ at the end of a line indicates a continuation *
|
||||
* line, except in raw mode (-r option) */
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (c == EOF) {
|
||||
/* not waiting to be completed any more */
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
*bptr = (char)c;
|
||||
if (isset(MULTIBYTE)) {
|
||||
ret = mbrtowc(&wc, bptr, 1, &mbs);
|
||||
if (!ret) /* NULL */
|
||||
ret = 1;
|
||||
} else {
|
||||
ret = 1;
|
||||
wc = (wchar_t)c;
|
||||
}
|
||||
if (ret != MB_INCOMPLETE) {
|
||||
if (ret == MB_INVALID)
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
if (bslash && wc == delim) {
|
||||
bslash = 0;
|
||||
continue;
|
||||
}
|
||||
if (wc == delim)
|
||||
break;
|
||||
/*
|
||||
* `first' is non-zero if any separator we encounter is a
|
||||
* non-whitespace separator, which means that anything
|
||||
* (even an empty string) between, before or after separators
|
||||
* is significant. If it is zero, we have a whitespace
|
||||
* separator, which shouldn't cause extra empty strings to
|
||||
* be emitted. Hence the test for (*buf || first) when
|
||||
* we assign the result of reading a word.
|
||||
*/
|
||||
if (!bslash && wcsitype(wc, ISEP)) {
|
||||
if (bptr != buf ||
|
||||
(!(c < 128 && iwsep(c)) && first)) {
|
||||
first |= !(c < 128 && iwsep(c));
|
||||
break;
|
||||
}
|
||||
first |= !(c < 128 && iwsep(c));
|
||||
continue;
|
||||
}
|
||||
bslash = (wc == L'\\' && !bslash && !OPT_ISSET(ops,'r'));
|
||||
if (bslash)
|
||||
continue;
|
||||
first = 0;
|
||||
}
|
||||
if (imeta(STOUC(*bptr))) {
|
||||
bptr[1] = bptr[0] ^ 32;
|
||||
bptr[0] = Meta;
|
||||
bptr += 2;
|
||||
}
|
||||
else
|
||||
bptr++;
|
||||
if (ret != MB_INCOMPLETE)
|
||||
laststart = bptr;
|
||||
#else
|
||||
if (c == EOF)
|
||||
break;
|
||||
if (bslash && c == delim) {
|
||||
bslash = 0;
|
||||
continue;
|
||||
}
|
||||
if (c == EOF || c == delim)
|
||||
if (c == delim)
|
||||
break;
|
||||
/*
|
||||
* `first' is non-zero if any separator we encounter is a
|
||||
|
@ -4845,18 +4995,42 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
*bptr++ = c ^ 32;
|
||||
} else
|
||||
*bptr++ = c;
|
||||
#endif
|
||||
/* increase the buffer size, if necessary */
|
||||
if (bptr >= buf + bsiz - 1) {
|
||||
int blen = bptr - buf;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
int llen = laststart - buf;
|
||||
#endif
|
||||
|
||||
buf = realloc(buf, bsiz *= 2);
|
||||
bptr = buf + blen;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
laststart = buf + llen;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
signal_setmask(s);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (c == EOF)
|
||||
gotnl = 1;
|
||||
if (ret == MB_INCOMPLETE) {
|
||||
/*
|
||||
* We can only get here if there is an EOF in the
|
||||
* middle of a character... safest to keep the debris,
|
||||
* I suppose.
|
||||
*/
|
||||
*bptr = '\0';
|
||||
} else {
|
||||
if (wc == delim)
|
||||
gotnl = 1;
|
||||
*laststart = '\0';
|
||||
}
|
||||
#else
|
||||
if (c == delim || c == EOF)
|
||||
gotnl = 1;
|
||||
*bptr = '\0';
|
||||
#endif
|
||||
/* dispose of word appropriately */
|
||||
if (OPT_ISSET(ops,'e') || OPT_ISSET(ops,'E')) {
|
||||
zputs(buf, stdout);
|
||||
|
@ -4908,12 +5082,66 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
return c == EOF;
|
||||
}
|
||||
buf = bptr = (char *)zalloc(bsiz = 64);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
laststart = buf;
|
||||
ret = MB_INCOMPLETE;
|
||||
#endif
|
||||
/* any remaining part of the line goes into one parameter */
|
||||
bslash = 0;
|
||||
if (!gotnl) {
|
||||
sigset_t s = child_unblock();
|
||||
for (;;) {
|
||||
c = zread(izle, &readchar);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (c == EOF) {
|
||||
/* not waiting to be completed any more */
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
*bptr = (char)c;
|
||||
if (isset(MULTIBYTE)) {
|
||||
ret = mbrtowc(&wc, bptr, 1, &mbs);
|
||||
if (!ret) /* NULL */
|
||||
ret = 1;
|
||||
} else {
|
||||
ret = 1;
|
||||
wc = (wchar_t)c;
|
||||
}
|
||||
if (ret != MB_INCOMPLETE) {
|
||||
if (ret == MB_INVALID)
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
/*
|
||||
* \ at the end of a line introduces a continuation line,
|
||||
* except in raw mode (-r option)
|
||||
*/
|
||||
if (bslash && wc == delim) {
|
||||
bslash = 0;
|
||||
continue;
|
||||
}
|
||||
if (wc == delim && !zbuf)
|
||||
break;
|
||||
if (!bslash && bptr == buf && wcsitype(wc, ISEP)) {
|
||||
if (c < 128 && iwsep(c))
|
||||
continue;
|
||||
else if (!first) {
|
||||
first = 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
bslash = (wc == L'\\' && !bslash && !OPT_ISSET(ops,'r'));
|
||||
if (bslash)
|
||||
continue;
|
||||
}
|
||||
if (imeta(STOUC(*bptr))) {
|
||||
bptr[1] = bptr[0] ^ 32;
|
||||
bptr[0] = Meta;
|
||||
bptr += 2;
|
||||
}
|
||||
else
|
||||
bptr++;
|
||||
if (ret != MB_INCOMPLETE)
|
||||
laststart = bptr;
|
||||
#else
|
||||
/* \ at the end of a line introduces a continuation line, except in
|
||||
raw mode (-r option) */
|
||||
if (bslash && c == delim) {
|
||||
|
@ -4938,22 +5166,36 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
*bptr++ = c ^ 32;
|
||||
} else
|
||||
*bptr++ = c;
|
||||
#endif
|
||||
/* increase the buffer size, if necessary */
|
||||
if (bptr >= buf + bsiz - 1) {
|
||||
int blen = bptr - buf;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
int llen = laststart - buf;
|
||||
#endif
|
||||
|
||||
buf = realloc(buf, bsiz *= 2);
|
||||
bptr = buf + blen;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
laststart = buf + llen;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
signal_setmask(s);
|
||||
}
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (ret != MB_INCOMPLETE)
|
||||
bptr = laststart;
|
||||
#endif
|
||||
/*
|
||||
* Strip trailing IFS whitespace.
|
||||
* iwsep can only be certain single-byte ASCII bytes, but we
|
||||
* must check the byte isn't metafied.
|
||||
*/
|
||||
while (bptr > buf) {
|
||||
if (bptr > buf + 1 && bptr[-2] == Meta) {
|
||||
if (iwsep(bptr[-1] ^ 32))
|
||||
bptr -= 2;
|
||||
else
|
||||
break;
|
||||
/* non-ASCII, can't be IWSEP */
|
||||
break;
|
||||
} else if (iwsep(bptr[-1]))
|
||||
bptr--;
|
||||
else
|
||||
|
|
101
Src/pattern.c
101
Src/pattern.c
|
@ -318,7 +318,7 @@ metacharinc(char **x)
|
|||
inchar = *inptr++;
|
||||
}
|
||||
*x = inptr;
|
||||
return (wchar_t)inchar;
|
||||
return (wchar_t)STOUC(inchar);
|
||||
}
|
||||
|
||||
while (*inptr) {
|
||||
|
@ -352,12 +352,14 @@ typedef int patint_t;
|
|||
#define PEOF EOF
|
||||
|
||||
#define METACHARINC(x) ((void)((x) += (*(x) == Meta) ? 2 : 1))
|
||||
/*
|
||||
* Return unmetafied char from string (x is any char *)
|
||||
*/
|
||||
#define UNMETA(x) (*(x) == Meta ? (x)[1] ^ 32 : *(x))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Return unmetafied char from string (x is any char *).
|
||||
* Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
|
||||
* in effect.
|
||||
*/
|
||||
#define UNMETA(x) (*(x) == Meta ? (x)[1] ^ 32 : *(x))
|
||||
|
||||
/* Add n more characters, ensuring there is enough space. */
|
||||
|
||||
|
@ -1575,7 +1577,7 @@ charref(char *x, char *y)
|
|||
size_t ret;
|
||||
|
||||
if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
|
||||
return (wchar_t) *x;
|
||||
return (wchar_t) STOUC(*x);
|
||||
|
||||
ret = mbrtowc(&wc, x, y-x, &shiftstate);
|
||||
|
||||
|
@ -1583,7 +1585,7 @@ charref(char *x, char *y)
|
|||
/* Error. Treat as single byte. */
|
||||
/* Reset the shift state for next time. */
|
||||
memset(&shiftstate, 0, sizeof(shiftstate));
|
||||
return (wchar_t) *x;
|
||||
return (wchar_t) STOUC(*x);
|
||||
}
|
||||
|
||||
return wc;
|
||||
|
@ -1626,7 +1628,7 @@ charrefinc(char **x, char *y)
|
|||
size_t ret;
|
||||
|
||||
if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
|
||||
return (wchar_t) *(*x)++;
|
||||
return (wchar_t) STOUC(*(*x)++);
|
||||
|
||||
ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
|
||||
|
||||
|
@ -1634,7 +1636,7 @@ charrefinc(char **x, char *y)
|
|||
/* Error. Treat as single byte. */
|
||||
/* Reset the shift state for next time. */
|
||||
memset(&shiftstate, 0, sizeof(shiftstate));
|
||||
return (wchar_t) *(*x)++;
|
||||
return (wchar_t) STOUC(*(*x)++);
|
||||
}
|
||||
|
||||
/* Nulls here are normal characters */
|
||||
|
@ -2222,20 +2224,33 @@ patmatch(Upat prog)
|
|||
}
|
||||
break;
|
||||
case P_ANYOF:
|
||||
if (patinput == patinend ||
|
||||
!patmatchrange((char *)P_OPERAND(scan),
|
||||
CHARREF(patinput, patinend)))
|
||||
fail = 1;
|
||||
else
|
||||
CHARINC(patinput, patinend);
|
||||
break;
|
||||
case P_ANYBUT:
|
||||
if (patinput == patinend ||
|
||||
patmatchrange((char *)P_OPERAND(scan),
|
||||
CHARREF(patinput, patinend)))
|
||||
if (patinput == patinend)
|
||||
fail = 1;
|
||||
else
|
||||
CHARINC(patinput, patinend);
|
||||
else {
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
wchar_t cr = CHARREF(patinput, patinend);
|
||||
char *scanop = (char *)P_OPERAND(scan);
|
||||
if (patglobflags & GF_MULTIBYTE) {
|
||||
if (mb_patmatchrange(scanop, cr) ^
|
||||
(P_OP(scan) == P_ANYOF))
|
||||
fail = 1;
|
||||
else
|
||||
CHARINC(patinput, patinend);
|
||||
} else if (patmatchrange(scanop, (int)cr) ^
|
||||
(P_OP(scan) == P_ANYOF))
|
||||
fail = 1;
|
||||
else
|
||||
CHARINC(patinput, patinend);
|
||||
#else
|
||||
if (patmatchrange((char *)P_OPERAND(scan),
|
||||
CHARREF(patinput, patinend)) ^
|
||||
(P_OP(scan) == P_ANYOF))
|
||||
fail = 1;
|
||||
else
|
||||
CHARINC(patinput, patinend);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case P_NUMRNG:
|
||||
case P_NUMFROM:
|
||||
|
@ -2923,7 +2938,7 @@ patmatch(Upat prog)
|
|||
|
||||
/**/
|
||||
static int
|
||||
patmatchrange(char *range, wchar_t ch)
|
||||
mb_patmatchrange(char *range, wchar_t ch)
|
||||
{
|
||||
wchar_t r1, r2;
|
||||
|
||||
|
@ -2994,21 +3009,20 @@ patmatchrange(char *range, wchar_t ch)
|
|||
return 1;
|
||||
break;
|
||||
case PP_IDENT:
|
||||
if (wcsiident(ch))
|
||||
if (wcsitype(ch, IIDENT))
|
||||
return 1;
|
||||
break;
|
||||
case PP_IFS:
|
||||
/* TODO */
|
||||
if (isep(ch))
|
||||
if (wcsitype(ch, ISEP))
|
||||
return 1;
|
||||
break;
|
||||
case PP_IFSSPACE:
|
||||
/* TODO */
|
||||
if (iwsep(ch))
|
||||
/* must be ASCII space character */
|
||||
if (ch < 128 && iwsep((int)ch))
|
||||
return 1;
|
||||
break;
|
||||
case PP_WORD:
|
||||
if (wcsiword(ch))
|
||||
if (wcsitype(ch, IWORD))
|
||||
return 1;
|
||||
break;
|
||||
case PP_RANGE:
|
||||
|
@ -3031,7 +3045,7 @@ patmatchrange(char *range, wchar_t ch)
|
|||
}
|
||||
|
||||
/**/
|
||||
#else
|
||||
#endif
|
||||
|
||||
/**/
|
||||
static int
|
||||
|
@ -3142,9 +3156,6 @@ patmatchrange(char *range, int ch)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**/
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Repeatedly match something simple and say how many times.
|
||||
* charstart is an array parallel to that starting at patinput
|
||||
|
@ -3180,20 +3191,26 @@ static int patrepeat(Upat p, char *charstart)
|
|||
}
|
||||
break;
|
||||
case P_ANYOF:
|
||||
while (scan < patinend &&
|
||||
patmatchrange(opnd, CHARREF(scan, patinend))) {
|
||||
charstart[scan-patinput] = 1;
|
||||
count++;
|
||||
CHARINC(scan, patinend);
|
||||
}
|
||||
break;
|
||||
case P_ANYBUT:
|
||||
while (scan < patinend &&
|
||||
!patmatchrange(opnd, CHARREF(scan, patinend))) {
|
||||
while (scan < patinend) {
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
wchar_t cr = CHARREF(scan, patinend);
|
||||
if (patglobflags & GF_MULTIBYTE) {
|
||||
if (mb_patmatchrange(opnd, cr) ^
|
||||
(P_OP(p) == P_ANYOF))
|
||||
break;
|
||||
} else if (patmatchrange(opnd, (int)cr) ^
|
||||
(P_OP(p) == P_ANYOF))
|
||||
break;
|
||||
#else
|
||||
if (patmatchrange(opnd, CHARREF(scan, patinend)) ^
|
||||
P_OP(p) == P_ANYOF)
|
||||
break;
|
||||
#endif
|
||||
charstart[scan-patinput] = 1;
|
||||
count++;
|
||||
CHARINC(scan, patinend);
|
||||
}
|
||||
}
|
||||
break;
|
||||
#ifdef DEBUG
|
||||
default:
|
||||
|
|
60
Src/subst.c
60
Src/subst.c
|
@ -316,9 +316,14 @@ multsub(char **s, int split, char ***a, int *isarr, char *sep)
|
|||
local_list1(foo);
|
||||
|
||||
if (split) {
|
||||
for ( ; *x; x += l+1) {
|
||||
/*
|
||||
* This doesn't handle multibyte characters, but we're
|
||||
* looking for whitespace separators which must be ASCII.
|
||||
*/
|
||||
for ( ; *x; x += l) {
|
||||
char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
|
||||
if (!iwsep(c))
|
||||
l++;
|
||||
if (!iwsep(STOUC(c)))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -328,20 +333,35 @@ multsub(char **s, int split, char ***a, int *isarr, char *sep)
|
|||
if (split) {
|
||||
LinkNode n = firstnode(&foo);
|
||||
int inq = 0, inp = 0;
|
||||
for ( ; *x; x += l+1) {
|
||||
char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
|
||||
if (!inq && !inp && isep(c)) {
|
||||
*x = '\0';
|
||||
for (x += l+1; *x; x += l+1) {
|
||||
c = (l = *x == Meta) ? x[1] ^ 32 : *x;
|
||||
if (!isep(c))
|
||||
MB_METACHARINIT();
|
||||
for ( ; *x; x += l) {
|
||||
int rawc = -1;
|
||||
convchar_t c;
|
||||
if (itok(STOUC(*x))) {
|
||||
/* token, can't be separator, must be single byte */
|
||||
rawc = *x;
|
||||
l = 1;
|
||||
} else {
|
||||
l = MB_METACHARLENCONV(x, &c);
|
||||
if (!inq && !inp && MB_ZISTYPE(c, ISEP)) {
|
||||
*x = '\0';
|
||||
for (x += l; *x; x += l) {
|
||||
if (itok(STOUC(*x))) {
|
||||
/* as above */
|
||||
rawc = *x;
|
||||
l = 1;
|
||||
break;
|
||||
}
|
||||
l = MB_METACHARLENCONV(x, &c);
|
||||
if (!MB_ZISTYPE(c, ISEP))
|
||||
break;
|
||||
}
|
||||
if (!*x)
|
||||
break;
|
||||
insertlinknode(&foo, n, (void *)x), incnode(n);
|
||||
}
|
||||
if (!*x)
|
||||
break;
|
||||
insertlinknode(&foo, n, (void *)x), incnode(n);
|
||||
}
|
||||
switch (c) {
|
||||
switch (rawc) {
|
||||
case Dnull: /* " */
|
||||
case Snull: /* ' */
|
||||
case Tick: /* ` (note: no Qtick!) */
|
||||
|
@ -357,8 +377,8 @@ multsub(char **s, int split, char ***a, int *isarr, char *sep)
|
|||
case Bnull: /* \ */
|
||||
case Bnullkeep:
|
||||
/* The parser verified the following char's existence. */
|
||||
x += l+1;
|
||||
l = *x == Meta;
|
||||
x += l;
|
||||
l = MB_METACHARLEN(x);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -685,12 +705,14 @@ invinstrpcmp(const void *a, const void *b)
|
|||
static char *
|
||||
dopadding(char *str, int prenum, int postnum, char *preone, char *postone, char *premul, char *postmul)
|
||||
{
|
||||
char def[3], *ret, *t, *r;
|
||||
char *def, *ret, *t, *r;
|
||||
int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc;
|
||||
|
||||
def[0] = *ifs ? *ifs : ' ';
|
||||
def[1] = *ifs == Meta ? ifs[1] ^ 32 : '\0';
|
||||
def[2] = '\0';
|
||||
MB_METACHARINIT();
|
||||
if (*ifs)
|
||||
def = dupstrpfx(ifs, MB_METACHARLEN(ifs));
|
||||
else
|
||||
def = "";
|
||||
if (preone && !*preone)
|
||||
preone = def;
|
||||
if (postone && !*postone)
|
||||
|
|
406
Src/utils.c
406
Src/utils.c
|
@ -35,16 +35,65 @@
|
|||
/**/
|
||||
char *scriptname;
|
||||
|
||||
/**/
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
struct widechar_array {
|
||||
wchar_t *chars;
|
||||
size_t len;
|
||||
};
|
||||
typedef struct widechar_array *Widechar_array;
|
||||
|
||||
/*
|
||||
* The wordchars variable turned into a wide character array.
|
||||
* This is much more convenient for testing.
|
||||
*/
|
||||
struct widechar_array wordchars_wide;
|
||||
|
||||
/**/
|
||||
mod_export wchar_t *wordchars_wide;
|
||||
/**/
|
||||
/*
|
||||
* The same for the separators (IFS) array.
|
||||
*/
|
||||
struct widechar_array ifs_wide;
|
||||
|
||||
/* Function to set one of the above from the multibyte array */
|
||||
|
||||
static void
|
||||
set_widearray(char *mb_array, Widechar_array wca)
|
||||
{
|
||||
if (wca->chars) {
|
||||
free(wca->chars);
|
||||
wca->chars = NULL;
|
||||
}
|
||||
wca->len = 0;
|
||||
|
||||
if (!isset(MULTIBYTE))
|
||||
return;
|
||||
|
||||
if (mb_array) {
|
||||
VARARR(wchar_t, tmpwcs, strlen(mb_array));
|
||||
wchar_t *wcptr = tmpwcs;
|
||||
wint_t wci;
|
||||
|
||||
mb_metacharinit();
|
||||
while (*mb_array) {
|
||||
int mblen = mb_metacharlenconv(mb_array, &wci);
|
||||
|
||||
if (!mblen)
|
||||
break;
|
||||
/* No good unless all characters are convertible */
|
||||
if (*wcptr == WEOF)
|
||||
return;
|
||||
*wcptr++ = (wchar_t)wci;
|
||||
#ifdef DEBUG
|
||||
if (wcptr[-1] < 0)
|
||||
fprintf(stderr, "BUG: Bad cast to wchar_t\n");
|
||||
#endif
|
||||
mb_array += mblen;
|
||||
}
|
||||
|
||||
wca->len = wcptr - tmpwcs;
|
||||
wca->chars = (wchar_t *)zalloc(wca->len * sizeof(wchar_t));
|
||||
wmemcpy(wca->chars, tmpwcs, wca->len);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1853,9 +1902,34 @@ getquery(char *valid_chars, int purge)
|
|||
if (c != '\n')
|
||||
while ((d = read1char()) >= 0 && d != '\n');
|
||||
} else {
|
||||
settyinfo(&shttyinfo);
|
||||
if (c != '\n' && !valid_chars)
|
||||
if (c != '\n' && !valid_chars) {
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE) && c >= 0) {
|
||||
/*
|
||||
* No waiting for a valid character, and no draining;
|
||||
* we should ensure we haven't stopped in the middle
|
||||
* of a multibyte character.
|
||||
*/
|
||||
mbstate_t mbs;
|
||||
char cc = (char)c;
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
for (;;) {
|
||||
size_t ret = mbrlen(&cc, 1, &mbs);
|
||||
|
||||
if (ret != MB_INCOMPLETE)
|
||||
break;
|
||||
c = read1char();
|
||||
if (c < 0)
|
||||
break;
|
||||
cc = (char)c;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
settyinfo(&shttyinfo);
|
||||
write(SHTTY, "\n", 1);
|
||||
}
|
||||
else
|
||||
settyinfo(&shttyinfo);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
@ -2253,6 +2327,10 @@ skipwsep(char **s)
|
|||
char *t = *s;
|
||||
int i = 0;
|
||||
|
||||
/*
|
||||
* Don't need to handle mutlibyte characters, they can't
|
||||
* be IWSEP. Do need to check for metafication.
|
||||
*/
|
||||
while (*t && iwsep(*t == Meta ? t[1] ^ 32 : *t)) {
|
||||
if (*t == Meta)
|
||||
t++;
|
||||
|
@ -2293,19 +2371,23 @@ spacesplit(char *s, int allownull, int heap, int quote)
|
|||
|
||||
t = s;
|
||||
skipwsep(&s);
|
||||
if (*s && isep(*s == Meta ? s[1] ^ 32 : *s))
|
||||
MB_METACHARINIT();
|
||||
if (*s && itype_end(s, ISEP, 1) != s)
|
||||
*ptr++ = dup(allownull ? "" : nulstring);
|
||||
else if (!allownull && t != s)
|
||||
*ptr++ = dup("");
|
||||
while (*s) {
|
||||
if (isep(*s == Meta ? s[1] ^ 32 : *s) || (quote && *s == '\\')) {
|
||||
if (*s == Meta)
|
||||
s++;
|
||||
char *iend = itype_end(s, ISEP, 1);
|
||||
if (iend != s) {
|
||||
s = iend;
|
||||
skipwsep(&s);
|
||||
}
|
||||
else if (quote && *s == '\\') {
|
||||
s++;
|
||||
skipwsep(&s);
|
||||
}
|
||||
t = s;
|
||||
findsep(&s, NULL, quote);
|
||||
(void)findsep(&s, NULL, quote);
|
||||
if (s > t || allownull) {
|
||||
*ptr = (heap ? (char *) hcalloc((s - t) + 1) :
|
||||
(char *) zshcalloc((s - t) + 1));
|
||||
|
@ -2321,68 +2403,87 @@ spacesplit(char *s, int allownull, int heap, int quote)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a separator. Return 0 if already at separator, 1 if separator
|
||||
* found later, else -1. (Historical note: used to return length into
|
||||
* string but this is all that is necessary and is less ambiguous with
|
||||
* multibyte characters around.)
|
||||
*
|
||||
* *s is the string we are looking along, which will be updated
|
||||
* to the point we have got to.
|
||||
*
|
||||
* sep is a possibly multicharacter separator to look for. If NULL,
|
||||
* use normal separator characters. If *sep is NULL, split on individual
|
||||
* characters.
|
||||
*
|
||||
* quote is a flag that '\<sep>' should not be treated as a separator.
|
||||
* in this case we need to be able to strip the backslash directly
|
||||
* in the string, so the calling function must have sent us something
|
||||
* modifiable. currently this only works for sep == NULL. also in
|
||||
* in this case only, we need to turn \\ into \.
|
||||
*/
|
||||
|
||||
/**/
|
||||
static int
|
||||
findsep(char **s, char *sep, int quote)
|
||||
{
|
||||
/*
|
||||
* *s is the string we are looking along, which will be updated
|
||||
* to the point we have got to.
|
||||
*
|
||||
* sep is a possibly multicharacter separator to look for. If NULL,
|
||||
* use normal separator characters.
|
||||
*
|
||||
* quote is a flag that '\<sep>' should not be treated as a separator.
|
||||
* in this case we need to be able to strip the backslash directly
|
||||
* in the string, so the calling function must have sent us something
|
||||
* modifiable. currently this only works for sep == NULL. also in
|
||||
* in this case only, we need to turn \\ into \.
|
||||
*/
|
||||
int i;
|
||||
int i, ilen;
|
||||
char *t, *tt;
|
||||
convchar_t c;
|
||||
|
||||
MB_METACHARINIT();
|
||||
if (!sep) {
|
||||
for (t = *s; *t; t++) {
|
||||
if (quote && *t == '\\' &&
|
||||
(isep(t[1] == Meta ? (t[2] ^ 32) : t[1]) || t[1] == '\\')) {
|
||||
chuck(t);
|
||||
if (*t == Meta)
|
||||
t++;
|
||||
continue;
|
||||
}
|
||||
if (*t == Meta) {
|
||||
if (isep(t[1] ^ 32))
|
||||
for (t = *s; *t; t += ilen) {
|
||||
if (quote && *t == '\\') {
|
||||
if (t[1] == '\\') {
|
||||
chuck(t);
|
||||
ilen = 1;
|
||||
continue;
|
||||
} else {
|
||||
ilen = MB_METACHARLENCONV(t+1, &c);
|
||||
if (MB_ZISTYPE(c, ISEP)) {
|
||||
chuck(t);
|
||||
/* then advance over new character, length ilen */
|
||||
} else {
|
||||
/* treat *t (backslash) as normal byte */
|
||||
if (isep(*t))
|
||||
break;
|
||||
ilen = 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ilen = MB_METACHARLENCONV(t, &c);
|
||||
if (MB_ZISTYPE(c, ISEP))
|
||||
break;
|
||||
t++;
|
||||
} else if (isep(*t))
|
||||
break;
|
||||
}
|
||||
}
|
||||
i = t - *s;
|
||||
i = (t > *s);
|
||||
*s = t;
|
||||
return i;
|
||||
}
|
||||
if (!sep[0]) {
|
||||
/*
|
||||
* NULL separator just means advance past first character,
|
||||
* if any.
|
||||
*/
|
||||
if (**s) {
|
||||
if (**s == Meta)
|
||||
*s += 2;
|
||||
else
|
||||
++*s;
|
||||
*s += MB_METACHARLEN(*s);
|
||||
return 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; **s; i++) {
|
||||
/*
|
||||
* The following works for multibyte characters by virtue of
|
||||
* the fact that sep may be a string (and we don't care how
|
||||
* it divides up, we need to match all of it).
|
||||
*/
|
||||
for (t = sep, tt = *s; *t && *tt && *t == *tt; t++, tt++);
|
||||
if (!*t)
|
||||
return i;
|
||||
if (*(*s)++ == Meta) {
|
||||
#ifdef DEBUG
|
||||
if (! *(*s)++)
|
||||
fprintf(stderr, "BUG: unexpected end of string in findsep()\n");
|
||||
#else
|
||||
(*s)++;
|
||||
#endif
|
||||
}
|
||||
return (i > 0);
|
||||
*s += MB_METACHARLEN(*s);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -2405,16 +2506,15 @@ findword(char **s, char *sep)
|
|||
}
|
||||
return r;
|
||||
}
|
||||
for (t = *s; *t; t++) {
|
||||
if (*t == Meta) {
|
||||
if (! isep(t[1] ^ 32))
|
||||
break;
|
||||
t++;
|
||||
} else if (! isep(*t))
|
||||
MB_METACHARINIT();
|
||||
for (t = *s; *t; t += sl) {
|
||||
convchar_t c;
|
||||
sl = MB_METACHARLENCONV(t, &c);
|
||||
if (!MB_ZISTYPE(c, ISEP))
|
||||
break;
|
||||
}
|
||||
*s = t;
|
||||
findsep(s, sep, 0);
|
||||
(void)findsep(s, sep, 0);
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -2436,18 +2536,17 @@ wordcount(char *s, char *sep, int mul)
|
|||
r = 0;
|
||||
if (mul <= 0)
|
||||
skipwsep(&s);
|
||||
if ((*s && isep(*s == Meta ? s[1] ^ 32 : *s)) ||
|
||||
if ((*s && itype_end(s, ISEP, 1) != s) ||
|
||||
(mul < 0 && t != s))
|
||||
r++;
|
||||
for (; *s; r++) {
|
||||
if (isep(*s == Meta ? s[1] ^ 32 : *s)) {
|
||||
if (*s == Meta)
|
||||
s++;
|
||||
s++;
|
||||
char *ie = itype_end(s, ISEP, 1);
|
||||
if (ie != s) {
|
||||
s = ie;
|
||||
if (mul <= 0)
|
||||
skipwsep(&s);
|
||||
}
|
||||
findsep(&s, NULL, 0);
|
||||
(void)findsep(&s, NULL, 0);
|
||||
t = s;
|
||||
if (mul <= 0)
|
||||
skipwsep(&s);
|
||||
|
@ -2464,19 +2563,20 @@ sepjoin(char **s, char *sep, int heap)
|
|||
{
|
||||
char *r, *p, **t;
|
||||
int l, sl;
|
||||
char sepbuf[3];
|
||||
char sepbuf[2];
|
||||
|
||||
if (!*s)
|
||||
return heap ? "" : ztrdup("");
|
||||
if (!sep) {
|
||||
p = sep = sepbuf;
|
||||
if (ifs) {
|
||||
*p++ = *ifs;
|
||||
*p++ = *ifs == Meta ? ifs[1] ^ 32 : '\0';
|
||||
/* optimise common case that ifs[0] is space */
|
||||
if (ifs && *ifs != ' ') {
|
||||
MB_METACHARINIT();
|
||||
sep = dupstrpfx(ifs, MB_METACHARLEN(ifs));
|
||||
} else {
|
||||
p = sep = sepbuf;
|
||||
*p++ = ' ';
|
||||
*p = '\0';
|
||||
}
|
||||
*p = '\0';
|
||||
}
|
||||
sl = strlen(sep);
|
||||
for (t = s, l = 1 - sl; *t; l += strlen(*t) + sl, t++);
|
||||
|
@ -2508,7 +2608,7 @@ sepsplit(char *s, char *sep, int allownull, int heap)
|
|||
|
||||
for (t = s; n--;) {
|
||||
tt = t;
|
||||
findsep(&t, sep, 0);
|
||||
(void)findsep(&t, sep, 0);
|
||||
*p = (heap ? (char *) hcalloc(t - tt + 1) :
|
||||
(char *) zshcalloc(t - tt + 1));
|
||||
strncpy(*p, tt, t - tt);
|
||||
|
@ -2637,39 +2737,21 @@ inittyptab(void)
|
|||
for (t0 = (int)STOUC(Snull); t0 <= (int)STOUC(Nularg); t0++)
|
||||
typtab[t0] |= ITOK | IMETA | INULL;
|
||||
for (s = ifs ? ifs : DEFAULT_IFS; *s; s++) {
|
||||
if (inblank(*s)) {
|
||||
if (s[1] == *s)
|
||||
int c = STOUC(*s == Meta ? *++s ^ 32 : *s);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (!isascii(c)) {
|
||||
/* see comment for wordchars below */
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (inblank(c)) {
|
||||
if (s[1] == c)
|
||||
s++;
|
||||
else
|
||||
typtab[STOUC(*s)] |= IWSEP;
|
||||
typtab[c] |= IWSEP;
|
||||
}
|
||||
typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= ISEP;
|
||||
typtab[c] |= ISEP;
|
||||
}
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (wordchars) {
|
||||
char *wordchars_unmeta;
|
||||
const char *wordchars_ptr;
|
||||
mbstate_t mbs;
|
||||
size_t nchars;
|
||||
int unmetalen;
|
||||
|
||||
wordchars_unmeta = dupstring(wordchars);
|
||||
wordchars_ptr = unmetafy(wordchars_unmeta, &unmetalen);
|
||||
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
wordchars_wide = (wchar_t *)
|
||||
zrealloc(wordchars_wide, (unmetalen+1)*sizeof(wchar_t));
|
||||
nchars = mbsrtowcs(wordchars_wide, &wordchars_ptr, unmetalen, &mbs);
|
||||
if (nchars == MB_INVALID || nchars == MB_INCOMPLETE) {
|
||||
/* Conversion state is undefined: better just set to null */
|
||||
nchars = 0;
|
||||
}
|
||||
wordchars_wide[nchars] = L'\0';
|
||||
} else {
|
||||
wordchars_wide = zrealloc(wordchars_wide, sizeof(wchar_t));
|
||||
*wordchars_wide = L'\0';
|
||||
}
|
||||
#endif
|
||||
for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) {
|
||||
int c = STOUC(*s == Meta ? *++s ^ 32 : *s);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
|
@ -2686,6 +2768,10 @@ inittyptab(void)
|
|||
#endif
|
||||
typtab[c] |= IWORD;
|
||||
}
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
set_widearray(wordchars, &wordchars_wide);
|
||||
set_widearray(ifs, &ifs_wide);
|
||||
#endif
|
||||
for (s = SPECCHARS; *s; s++)
|
||||
typtab[STOUC(*s)] |= ISPECIAL;
|
||||
if (specialcomma)
|
||||
|
@ -2718,62 +2804,60 @@ wcsiblank(wint_t wc)
|
|||
}
|
||||
|
||||
/*
|
||||
* iword() macro extended to support wide characters.
|
||||
* zistype macro extended to support wide characters.
|
||||
* Works for IIDENT, IWORD, IALNUM, ISEP.
|
||||
* We don't need this for IWSEP because that only applies to
|
||||
* a fixed set of ASCII characters.
|
||||
* Note here that use of multibyte mode is not tested:
|
||||
* that's because for ZLE this is unconditional,
|
||||
* not dependent on the option. The caller must decide.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
wcsiword(wchar_t c)
|
||||
wcsitype(wchar_t c, int itype)
|
||||
{
|
||||
int len;
|
||||
VARARR(char, outstr, MB_CUR_MAX);
|
||||
|
||||
if (!isset(MULTIBYTE))
|
||||
return zistype(c, itype);
|
||||
|
||||
/*
|
||||
* Strategy: the shell requires that the multibyte representation
|
||||
* be an extension of ASCII. So see if converting the character
|
||||
* produces an ASCII character. If it does, use iword on that.
|
||||
* If it doesn't, use iswalnum on the original character. This
|
||||
* is pretty good most of the time.
|
||||
* produces an ASCII character. If it does, use zistype on that.
|
||||
* If it doesn't, use iswalnum on the original character.
|
||||
* If that fails, resort to the appropriate wide character array.
|
||||
*/
|
||||
len = wctomb(outstr, c);
|
||||
|
||||
if (len == 0) {
|
||||
/* NULL is special */
|
||||
return iword(0);
|
||||
return zistype(0, itype);
|
||||
} else if (len == 1 && iascii(*outstr)) {
|
||||
return iword(*outstr);
|
||||
return zistype(*outstr, itype);
|
||||
} else {
|
||||
return iswalnum(c) || wcschr(wordchars_wide, c);
|
||||
switch (itype) {
|
||||
case IIDENT:
|
||||
if (!isset(POSIXIDENTIFIERS))
|
||||
return 0;
|
||||
return iswalnum(c);
|
||||
|
||||
case IWORD:
|
||||
if (iswalnum(c))
|
||||
return 1;
|
||||
return !!wmemchr(wordchars_wide.chars, c, wordchars_wide.len);
|
||||
|
||||
case ISEP:
|
||||
return !!wmemchr(ifs_wide.chars, c, ifs_wide.len);
|
||||
|
||||
default:
|
||||
return iswalnum(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* iident() macro extended to support wide characters.
|
||||
*
|
||||
* The macro is intended to test if a character is allowed in an
|
||||
* internal zsh identifier. We allow all alphanumerics outside
|
||||
* the ASCII range unless POSIXIDENTIFIERS is set.
|
||||
*
|
||||
* Otherwise similar to wcsiword.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
wcsiident(wchar_t c)
|
||||
{
|
||||
int len;
|
||||
VARARR(char, outstr, MB_CUR_MAX);
|
||||
|
||||
len = wctomb(outstr, c);
|
||||
|
||||
if (len == 0) {
|
||||
/* NULL is special */
|
||||
return 0;
|
||||
} else if (len == 1 && iascii(*outstr)) {
|
||||
return iident(*outstr);
|
||||
} else {
|
||||
return !isset(POSIXIDENTIFIERS) && iswalnum(c);
|
||||
}
|
||||
}
|
||||
/**/
|
||||
#endif
|
||||
|
||||
|
@ -2789,7 +2873,7 @@ wcsiident(wchar_t c)
|
|||
* If "once" is set, just test the first character, i.e. (outptr !=
|
||||
* inptr) tests whether the first character is valid in an identifier.
|
||||
*
|
||||
* Currently this is only called with itype IIDENT or IUSER.
|
||||
* Currently this is only called with itype IIDENT, IUSER or ISEP.
|
||||
*/
|
||||
|
||||
/**/
|
||||
|
@ -2819,12 +2903,25 @@ itype_end(const char *ptr, int itype, int once)
|
|||
break;
|
||||
} else {
|
||||
/*
|
||||
* Valid non-ASCII character. Allow all alphanumerics;
|
||||
* if testing for words, allow all wordchars.
|
||||
* Valid non-ASCII character.
|
||||
*/
|
||||
if (!(iswalnum(wc) ||
|
||||
(itype == IWORD && wcschr(wordchars_wide, wc))))
|
||||
switch (itype) {
|
||||
case IWORD:
|
||||
if (!iswalnum(wc) &&
|
||||
!wmemchr(wordchars_wide.chars, wc,
|
||||
wordchars_wide.len))
|
||||
return (char *)ptr;
|
||||
break;
|
||||
|
||||
case ISEP:
|
||||
if (!wmemchr(ifs_wide.chars, wc, ifs_wide.len))
|
||||
return (char *)ptr;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (!iswalnum(wc))
|
||||
return (char *)ptr;
|
||||
}
|
||||
}
|
||||
ptr += len;
|
||||
|
||||
|
@ -3791,16 +3888,22 @@ mb_metacharlenconv(const char *s, wint_t *wcp)
|
|||
wchar_t wc;
|
||||
|
||||
if (!isset(MULTIBYTE)) {
|
||||
/* treat as single byte, possibly metafied */
|
||||
if (wcp)
|
||||
*wcp = WEOF;
|
||||
*wcp = (wint_t)(*s == Meta ? s[1] ^ 32 : *s);
|
||||
return 1 + (*s == Meta);
|
||||
}
|
||||
|
||||
ret = MB_INVALID;
|
||||
for (ptr = s; *ptr; ) {
|
||||
if (*ptr == Meta)
|
||||
if (*ptr == Meta) {
|
||||
inchar = *++ptr ^ 32;
|
||||
else
|
||||
#ifdef DEBUG
|
||||
if (!*ptr)
|
||||
fprintf(stderr,
|
||||
"BUG: unexpected end of string in mb_metacharlen()\n");
|
||||
#endif
|
||||
} else
|
||||
inchar = *ptr;
|
||||
ptr++;
|
||||
ret = mbrtowc(&wc, &inchar, 1, &mb_shiftstate);
|
||||
|
@ -3873,6 +3976,23 @@ mb_metastrlen(char *ptr)
|
|||
return num + num_in_char;
|
||||
}
|
||||
|
||||
/**/
|
||||
#else
|
||||
|
||||
/* Simple replacement for mb_metacharlenconv */
|
||||
int
|
||||
metacharlenconv(char *x, int *c)
|
||||
{
|
||||
if (*x == Meta) {
|
||||
if (c)
|
||||
*c == STOUC(x[1]);
|
||||
return 2;
|
||||
}
|
||||
if (c)
|
||||
*c = STOUC(*x);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**/
|
||||
#endif /* MULTIBYTE_SUPPORT */
|
||||
|
||||
|
|
|
@ -1925,6 +1925,8 @@ typedef char *(*ZleGetLineFn) _((int *, int *));
|
|||
#ifdef MULTIBYTE_SUPPORT
|
||||
#define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0)
|
||||
#define MB_METACHARINIT() mb_metacharinit()
|
||||
typedef wint_t convchar_t;
|
||||
#define MB_METACHARLENCONV(str, cp) mb_metacharlenconv((str), (cp))
|
||||
#define MB_METACHARLEN(str) mb_metacharlenconv(str, NULL)
|
||||
#define MB_METASTRLEN(str) mb_metastrlen(str)
|
||||
|
||||
|
@ -1948,6 +1950,8 @@ typedef char *(*ZleGetLineFn) _((int *, int *));
|
|||
|
||||
#else
|
||||
#define MB_METACHARINIT()
|
||||
typedef int convchar_t;
|
||||
#define MB_METACHARLENCONV(str, cp) metacharlenconv((str), (cp))
|
||||
#define MB_METACHARLEN(str) (*(str) == Meta ? 2 : 1)
|
||||
#define MB_METASTRLEN(str) ztrlen(str)
|
||||
|
||||
|
|
|
@ -59,6 +59,12 @@
|
|||
#define iwsep(X) zistype(X,IWSEP)
|
||||
#define inull(X) zistype(X,INULL)
|
||||
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
#define MB_ZISTYPE(X,Y) wcsitype((X),(Y))
|
||||
#else
|
||||
#define MB_ZISTYPE(X,Y) zistype((X),(Y))
|
||||
#endif
|
||||
|
||||
#define iascii(X) isascii(STOUC(X))
|
||||
#define ilower(X) islower(STOUC(X))
|
||||
#define iprint(X) isprint(STOUC(X))
|
||||
|
|
|
@ -725,6 +725,29 @@
|
|||
>7
|
||||
>8
|
||||
|
||||
# Tests a long-standing bug with joining on metafied characters in IFS
|
||||
(array=(one two three)
|
||||
IFS=$'\0'
|
||||
foo="$array"
|
||||
for (( i = 1; i <= ${#foo}; i++ )); do
|
||||
char=${foo[i]}
|
||||
print $(( #char ))
|
||||
done)
|
||||
0:Joining with NULL character from IFS
|
||||
>111
|
||||
>110
|
||||
>101
|
||||
>0
|
||||
>116
|
||||
>119
|
||||
>111
|
||||
>0
|
||||
>116
|
||||
>104
|
||||
>114
|
||||
>101
|
||||
>101
|
||||
|
||||
unset SHLVL
|
||||
(( SHLVL++ ))
|
||||
print $SHLVL
|
||||
|
|
|
@ -174,3 +174,57 @@
|
|||
1:POSIX_IDENTIFIERS option
|
||||
>3
|
||||
?(eval):1: command not found: hähä=3
|
||||
|
||||
foo="Ølaf«Ødd«øpénëd«ån«àpple"
|
||||
print -l ${(s.«.)foo}
|
||||
ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
|
||||
print -l ${=ioh}
|
||||
print ${(w)#ioh}
|
||||
0:Splitting with multibyte characters
|
||||
>Ølaf
|
||||
>Ødd
|
||||
>øpénëd
|
||||
>ån
|
||||
>àpple
|
||||
>Ἐν
|
||||
>ἀρχῇ
|
||||
>ἦν
|
||||
>ὁ
|
||||
>λόγος,
|
||||
>καὶ
|
||||
>ὁ
|
||||
>λόγος
|
||||
>ἦν
|
||||
>πρὸς
|
||||
>τὸν
|
||||
>θεόν,
|
||||
>καὶ
|
||||
>θεὸς
|
||||
>ἦν
|
||||
>ὁ
|
||||
>λόγος.
|
||||
>17
|
||||
|
||||
read -d £ one
|
||||
read -d £ two
|
||||
print $one
|
||||
print $two
|
||||
0:read with multibyte delimiter
|
||||
<first£second£
|
||||
>first
|
||||
>second
|
||||
|
||||
(IFS=«
|
||||
read -d » -A array
|
||||
print -l $array)
|
||||
0:read -A with multibyte IFS
|
||||
<dominus«illuminatio«mea»ignored
|
||||
>dominus
|
||||
>illuminatio
|
||||
>mea
|
||||
|
||||
read -k2 -u0 twochars
|
||||
print $twochars
|
||||
0:read multibyte characters
|
||||
<«»ignored
|
||||
>«»
|
||||
|
|
Loading…
Reference in a new issue