mirror of
https://github.com/zsh-users/zsh
synced 2024-10-15 04:13:34 +00:00
Add non-metafied character length handling.
Use this in regex module and add test using $'\ua0'. Rename mb_metacharinit() to mb_charinit() as it does not involve metafied characters.
This commit is contained in:
parent
370e7f73f6
commit
f1923bdfa6
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
2015-06-12 Peter Stephenson <p.stephenson@samsung.com>
|
||||
|
||||
* 35448: Src/Modules/curses.c, Src/Modules/regex.c,
|
||||
Src/Zle/complist.c, Src/Zle/zle_utils.c, Src/builtin.c,
|
||||
Src/glob.c, Src/hist.c, Src/prompt.c, Src/utils.c, Src/zsh.h,
|
||||
Test/D07multibyte.ztst: Add non-metafied character length
|
||||
handling and use this for regex module. Add test.
|
||||
Rename mb_metacharinit() to mb_charinit() since it doesn't
|
||||
involve metafied characters.
|
||||
|
||||
2015-06-11 Peter Stephenson <p.stephenson@samsung.com>
|
||||
|
||||
* 35442: Doc/Zsh/options.yo: multibyte option now on
|
||||
|
|
|
@ -765,7 +765,7 @@ zccmd_string(const char *nam, char **args)
|
|||
w = (ZCWin)getdata(node);
|
||||
|
||||
#ifdef HAVE_WADDWSTR
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
wptr = wstr = zhalloc((strlen(str)+1) * sizeof(wchar_t));
|
||||
|
||||
while (*str && (clen = mb_metacharlenconv(str, &wc))) {
|
||||
|
|
|
@ -115,6 +115,7 @@ zcond_regex_match(char **a, int id)
|
|||
} else {
|
||||
zlong offs;
|
||||
char *ptr;
|
||||
int clen, leftlen;
|
||||
|
||||
m = matches;
|
||||
s = metafy(lhstr + m->rm_so, m->rm_eo - m->rm_so, META_DUP);
|
||||
|
@ -123,19 +124,25 @@ zcond_regex_match(char **a, int id)
|
|||
* Count the characters before the match.
|
||||
*/
|
||||
ptr = lhstr;
|
||||
leftlen = m->rm_so;
|
||||
offs = 0;
|
||||
MB_METACHARINIT();
|
||||
while (ptr < lhstr + m->rm_so) {
|
||||
MB_CHARINIT();
|
||||
while (leftlen) {
|
||||
offs++;
|
||||
ptr += MB_METACHARLEN(ptr);
|
||||
clen = MB_CHARLEN(ptr, leftlen);
|
||||
ptr += clen;
|
||||
leftlen -= clen;
|
||||
}
|
||||
setiparam("MBEGIN", offs + !isset(KSHARRAYS));
|
||||
/*
|
||||
* Add on the characters in the match.
|
||||
*/
|
||||
while (ptr < lhstr + m->rm_eo) {
|
||||
leftlen = m->rm_eo - m->rm_so;
|
||||
while (leftlen) {
|
||||
offs++;
|
||||
ptr += MB_METACHARLEN(ptr);
|
||||
clen = MB_CHARLEN(ptr, leftlen);
|
||||
ptr += clen;
|
||||
leftlen -= clen;
|
||||
}
|
||||
setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
|
||||
if (nelem) {
|
||||
|
@ -149,19 +156,25 @@ zcond_regex_match(char **a, int id)
|
|||
{
|
||||
char buf[DIGBUFSIZE];
|
||||
ptr = lhstr;
|
||||
leftlen = m->rm_so;
|
||||
offs = 0;
|
||||
/* Find the start offset */
|
||||
MB_METACHARINIT();
|
||||
while (ptr < lhstr + m->rm_so) {
|
||||
MB_CHARINIT();
|
||||
while (leftlen) {
|
||||
offs++;
|
||||
ptr += MB_METACHARLEN(ptr);
|
||||
clen = MB_CHARLEN(ptr, leftlen);
|
||||
ptr += clen;
|
||||
leftlen -= clen;
|
||||
}
|
||||
convbase(buf, offs + !isset(KSHARRAYS), 10);
|
||||
*bptr = ztrdup(buf);
|
||||
/* Continue to the end offset */
|
||||
while (ptr < lhstr + m->rm_eo) {
|
||||
leftlen = m->rm_eo - m->rm_so;
|
||||
while (leftlen ) {
|
||||
offs++;
|
||||
ptr += MB_METACHARLEN(ptr);
|
||||
clen = MB_CHARLEN(ptr, leftlen);
|
||||
ptr += clen;
|
||||
leftlen -= clen;
|
||||
}
|
||||
convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
|
||||
*eptr = ztrdup(buf);
|
||||
|
|
|
@ -728,7 +728,7 @@ clnicezputs(int do_colors, char *s, int ml)
|
|||
if (do_colors)
|
||||
initiscol();
|
||||
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
while (umleft > 0) {
|
||||
size_t cnt = eol ? MB_INVALID : mbrtowc(&cc, uptr, umleft, &mbs);
|
||||
|
||||
|
|
|
@ -1288,7 +1288,7 @@ showmsg(char const *msg)
|
|||
p = unmetafy(umsg, &ulen);
|
||||
memset(&mbs, 0, sizeof mbs);
|
||||
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
while (ulen > 0) {
|
||||
char const *n;
|
||||
if (*p == '\n') {
|
||||
|
|
|
@ -4582,7 +4582,7 @@ bin_print(char *name, char **args, Options ops, int func)
|
|||
convchar_t cc;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE)) {
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
(void)mb_metacharlenconv(metafy(curarg+1, curlen-1,
|
||||
META_USEHEAP), &cc);
|
||||
}
|
||||
|
@ -5557,7 +5557,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
|||
wint_t wi;
|
||||
|
||||
if (isset(MULTIBYTE)) {
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
(void)mb_metacharlenconv(delimstr, &wi);
|
||||
}
|
||||
else
|
||||
|
|
14
Src/glob.c
14
Src/glob.c
|
@ -2237,7 +2237,7 @@ xpandbraces(LinkList list, LinkNode *np)
|
|||
#ifdef MULTIBYTE_SUPPORT
|
||||
char *ncptr;
|
||||
int nclen;
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
ncptr = wcs_nicechar(cend, NULL, NULL);
|
||||
nclen = strlen(ncptr);
|
||||
p = zhalloc(lenalloc + nclen);
|
||||
|
@ -2805,7 +2805,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
|
|||
* ... now we know whether it's worth looking for the
|
||||
* shortest, which we do by brute force.
|
||||
*/
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
for (t = s, umlen = 0; t < s + mlen; ) {
|
||||
set_pat_end(p, *t);
|
||||
if (pattrylen(p, s, t - s, umlen, 0)) {
|
||||
|
@ -2831,7 +2831,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
|
|||
* so that match, mbegin, mend and MATCH, MBEGIN, MEND are
|
||||
* correct.
|
||||
*/
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
tmatch = NULL;
|
||||
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
|
||||
set_pat_start(p, t-s);
|
||||
|
@ -2855,7 +2855,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
|
|||
/* Largest possible match at tail of string: *
|
||||
* move forward along string until we get a match. *
|
||||
* Again there's no optimisation. */
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
|
||||
set_pat_start(p, t-s);
|
||||
if (pattrylen(p, t, s + l - t, umlen, ioff)) {
|
||||
|
@ -2889,7 +2889,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
|
|||
}
|
||||
ioff = 0; /* offset into string */
|
||||
umlen = umltot;
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
do {
|
||||
/* loop over all matches for global substitution */
|
||||
matched = 0;
|
||||
|
@ -2986,7 +2986,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
|
|||
*/
|
||||
nmatches = 0;
|
||||
tmatch = NULL;
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
|
||||
set_pat_start(p, t-s);
|
||||
if (pattrylen(p, t, s + l - t, umlen, ioff)) {
|
||||
|
@ -3002,7 +3002,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
|
|||
* We need to find the n'th last match.
|
||||
*/
|
||||
n = nmatches - n;
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
|
||||
set_pat_start(p, t-s);
|
||||
if (pattrylen(p, t, s + l - t, umlen, ioff) &&
|
||||
|
|
|
@ -2000,7 +2000,7 @@ casemodify(char *str, int how)
|
|||
VARARR(char, mbstr, MB_CUR_MAX);
|
||||
mbstate_t ps;
|
||||
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
memset(&ps, 0, sizeof(ps));
|
||||
while (*str) {
|
||||
wint_t wc;
|
||||
|
|
|
@ -964,7 +964,7 @@ stradd(char *d)
|
|||
/* FALL THROUGH */
|
||||
default:
|
||||
/* Take full wide character in one go */
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
pc = wcs_nicechar(cc, NULL, NULL);
|
||||
break;
|
||||
}
|
||||
|
|
89
Src/utils.c
89
Src/utils.c
|
@ -82,7 +82,7 @@ set_widearray(char *mb_array, Widechar_array wca)
|
|||
wchar_t *wcptr = tmpwcs;
|
||||
wint_t wci;
|
||||
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
while (*mb_array) {
|
||||
int mblen = mb_metacharlenconv(mb_array, &wci);
|
||||
|
||||
|
@ -332,7 +332,7 @@ zerrmsg(FILE *file, const char *fmt, va_list ap)
|
|||
case 'c':
|
||||
num = va_arg(ap, int);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
zputs(wcs_nicechar(num, NULL, NULL), file);
|
||||
#else
|
||||
zputs(nicechar(num), file);
|
||||
|
@ -461,12 +461,13 @@ static mbstate_t mb_shiftstate;
|
|||
|
||||
/*
|
||||
* Initialise multibyte state: called before a sequence of
|
||||
* wcs_nicechar() or mb_metacharlenconv().
|
||||
* wcs_nicechar(), mb_metacharlenconv(), or
|
||||
* mb_charlenconv().
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export void
|
||||
mb_metacharinit(void)
|
||||
mb_charinit(void)
|
||||
{
|
||||
memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
|
||||
}
|
||||
|
@ -500,7 +501,7 @@ mb_metacharinit(void)
|
|||
* (but not both). (Note the complication that the wide character
|
||||
* part may contain metafied characters.)
|
||||
*
|
||||
* The caller needs to call mb_metacharinit() before the first call, to
|
||||
* The caller needs to call mb_charinit() before the first call, to
|
||||
* set up the multibyte shift state for a range of characters.
|
||||
*/
|
||||
|
||||
|
@ -3832,7 +3833,7 @@ itype_end(const char *ptr, int itype, int once)
|
|||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE) &&
|
||||
(itype != IIDENT || !isset(POSIXIDENTIFIERS))) {
|
||||
mb_metacharinit();
|
||||
mb_charinit();
|
||||
while (*ptr) {
|
||||
wint_t wc;
|
||||
int len = mb_metacharlenconv(ptr, &wc);
|
||||
|
@ -4972,6 +4973,65 @@ mb_metastrlenend(char *ptr, int width, char *eptr)
|
|||
return num + num_in_char;
|
||||
}
|
||||
|
||||
/*
|
||||
* The equivalent of mb_metacharlenconv_r() for
|
||||
* strings that aren't metafied and hence have
|
||||
* explicit lengths.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
mb_charlenconv_r(const char *s, int slen, wint_t *wcp, mbstate_t *mbsp)
|
||||
{
|
||||
size_t ret = MB_INVALID;
|
||||
char inchar;
|
||||
const char *ptr;
|
||||
wchar_t wc;
|
||||
|
||||
for (ptr = s; slen; ) {
|
||||
inchar = *ptr;
|
||||
ptr++;
|
||||
slen--;
|
||||
ret = mbrtowc(&wc, &inchar, 1, mbsp);
|
||||
|
||||
if (ret == MB_INVALID)
|
||||
break;
|
||||
if (ret == MB_INCOMPLETE)
|
||||
continue;
|
||||
if (wcp)
|
||||
*wcp = wc;
|
||||
return ptr - s;
|
||||
}
|
||||
|
||||
if (wcp)
|
||||
*wcp = WEOF;
|
||||
/* No valid multibyte sequence */
|
||||
memset(mbsp, 0, sizeof(*mbsp));
|
||||
if (ptr > s) {
|
||||
return 1; /* Treat as single byte character */
|
||||
} else
|
||||
return 0; /* Probably shouldn't happen */
|
||||
}
|
||||
|
||||
/*
|
||||
* The equivalent of mb_metacharlenconv() for
|
||||
* strings that aren't metafied and hence have
|
||||
* explicit lengths;
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
mb_charlenconv(const char *s, int slen, wint_t *wcp)
|
||||
{
|
||||
if (!isset(MULTIBYTE)) {
|
||||
if (wcp)
|
||||
*wcp = (wint_t)*s;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return mb_charlenconv_r(s, slen, wcp, &mb_shiftstate);
|
||||
}
|
||||
|
||||
/**/
|
||||
#else
|
||||
|
||||
|
@ -4996,6 +5056,23 @@ metacharlenconv(const char *x, int *c)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Simple replacement for mb_charlenconv */
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
charlenconv(const char *x, int len, int *c)
|
||||
{
|
||||
if (!len) {
|
||||
if (c)
|
||||
*c = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (c)
|
||||
*c = (char)*x;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**/
|
||||
#endif /* MULTIBYTE_SUPPORT */
|
||||
|
||||
|
|
12
Src/zsh.h
12
Src/zsh.h
|
@ -2921,8 +2921,9 @@ enum {
|
|||
#define AFTERTRAPHOOK (zshhooks + 2)
|
||||
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
/* Metafied input */
|
||||
#define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0)
|
||||
#define MB_METACHARINIT() mb_metacharinit()
|
||||
#define MB_METACHARINIT() mb_charinit()
|
||||
typedef wint_t convchar_t;
|
||||
#define MB_METACHARLENCONV(str, cp) mb_metacharlenconv((str), (cp))
|
||||
#define MB_METACHARLEN(str) mb_metacharlenconv(str, NULL)
|
||||
|
@ -2932,6 +2933,11 @@ typedef wint_t convchar_t;
|
|||
#define MB_METASTRLEN2END(str, widthp, eptr) \
|
||||
mb_metastrlenend(str, widthp, eptr)
|
||||
|
||||
/* Unmetafined input */
|
||||
#define MB_CHARINIT() mb_charinit()
|
||||
#define MB_CHARLENCONV(str, len, cp) mb_charlenconv((str), (len), (cp))
|
||||
#define MB_CHARLEN(str, len) mb_charlenconv((str), (len), NULL)
|
||||
|
||||
/*
|
||||
* We replace broken implementations with one that uses Unicode
|
||||
* characters directly as wide characters. In principle this is only
|
||||
|
@ -3015,6 +3021,10 @@ typedef int convchar_t;
|
|||
#define MB_METASTRLEN2(str, widthp) ztrlen(str)
|
||||
#define MB_METASTRLEN2END(str, widthp, eptr) ztrlenend(str, eptr)
|
||||
|
||||
#define MB_CHARINIT()
|
||||
#define MB_CHARLENCONV(str, len, cp) charlenconv((str), (len), (cp))
|
||||
#define MB_CHARLEN(str, len) ((len) ? 1 : 0)
|
||||
|
||||
#define WCWIDTH_WINT(c) (1)
|
||||
|
||||
/* Leave character or string as is. */
|
||||
|
|
|
@ -484,3 +484,16 @@
|
|||
# This doesn't look aligned in my editor because actually the characters
|
||||
# aren't quite double width, but the arithmetic is correct.
|
||||
# It appears just to be an effect of the font.
|
||||
|
||||
if zmodload -i zsh/regex 2>/dev/null; then
|
||||
[[ $'\ua0' =~ '^.$' ]] && print OK
|
||||
[[ $'\ua0' =~ $'^\ua0$' ]] && print OK
|
||||
[[ $'\ua0'X =~ '^X$' ]] || print OK
|
||||
else
|
||||
print -u$ZTST_fd "Regexp test skipped, regexp library not found."
|
||||
print -l OK OK OK
|
||||
fi
|
||||
0:Ensure no confusion on metafied input to regex module
|
||||
>OK
|
||||
>OK
|
||||
>OK
|
||||
|
|
Loading…
Reference in a new issue