mirror of
https://github.com/zsh-users/zsh
synced 2024-07-21 10:14:19 +00:00
36478: Add [[:INCOMPLETE:]] and [[:INVALID:]] pattern tests.
This commit is contained in:
parent
3bca11c35c
commit
e86b3cce47
|
@ -1,3 +1,9 @@
|
|||
2015-09-10 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||
|
||||
* 36478: Src/pattern.c, Src/zsh.h, Src/Zle/comp.h,
|
||||
Doc/Zsh/expn.yo, Test/D07multibyte.ztst: add [[:INCOMPLETE:]] and
|
||||
[[:INVALID:]] pattern tests.
|
||||
|
||||
2015-09-10 Barton E. Schaefer <schaefer@zsh.org>
|
||||
|
||||
* 36470: Src/Zle/zle_main.c: Auxiliary to 36468, return an empty
|
||||
|
|
|
@ -1956,6 +1956,20 @@ ifzman(the zmanref(zshparam) manual page)\
|
|||
ifnzman(noderef(Parameters Used By The Shell))\
|
||||
.
|
||||
)
|
||||
item(tt([:INCOMPLETE:]))(
|
||||
Matches a byte that starts an incomplete multibyte character.
|
||||
Note that there may be a sequence of more than one bytes that
|
||||
taken together form the prefix of a multibyte character. To
|
||||
test for a potentially incomplete byte sequence, use the pattern
|
||||
`tt([[:INCOMPLETE:]]*)'. This will never match a sequence starting
|
||||
with a valid multibyte character.
|
||||
)
|
||||
item(tt([:INVALID:]))(
|
||||
Matches a byte that does not start a valid multibyte character.
|
||||
Note this may be a continuation byte of an incomplete multibyte
|
||||
character as any part of a multibyte string consisting of invalid and
|
||||
incomplete multibyte characters is treated as single bytes.
|
||||
)
|
||||
item(tt([:WORD:]))(
|
||||
The character is treated as part of a word; this test is sensitive
|
||||
to the value of the tt(WORDCHARS) parameter
|
||||
|
|
|
@ -202,8 +202,9 @@ struct cpattern {
|
|||
* TODO: this will change.
|
||||
*/
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
#define PATMATCHRANGE(r, c, ip, mtp) mb_patmatchrange(r, c, ip, mtp)
|
||||
#define PATMATCHINDEX(r, i, cp, mtp) mb_patmatchindex(r, i, cp, mtp)
|
||||
#define PATMATCHRANGE(r, c, ip, mtp) \
|
||||
mb_patmatchrange(r, c, ZMB_VALID, ip, mtp)
|
||||
#define PATMATCHINDEX(r, i, cp, mtp) mb_patmatchindex(r, i, cp, mtp)
|
||||
#define CONVCAST(c) ((wchar_t)(c))
|
||||
#define CHR_INVALID (WEOF)
|
||||
#else
|
||||
|
|
|
@ -145,7 +145,7 @@ typedef union upat *Upat;
|
|||
*
|
||||
* P_ANY, P_ANYOF: the operand is a null terminated
|
||||
* string. Normal characters match as expected. Characters
|
||||
* in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate
|
||||
* in the range Meta+PP_ALPHA..Meta+PP_UNKWN do the appropriate
|
||||
* Posix range tests. This relies on imeta returning true for these
|
||||
* characters. We treat unknown POSIX ranges as never matching.
|
||||
* PP_RANGE means the next two (possibly metafied) characters form
|
||||
|
@ -1119,7 +1119,7 @@ patgetglobflags(char **strp, long *assertp, int *ignore)
|
|||
static const char *colon_stuffs[] = {
|
||||
"alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph",
|
||||
"lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
|
||||
"IFS", "IFSSPACE", "WORD", NULL
|
||||
"IFS", "IFSSPACE", "WORD", "INCOMPLETE", "INVALID", NULL
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1870,9 +1870,9 @@ static int globdots; /* Glob initial dots? */
|
|||
#ifdef MULTIBYTE_SUPPORT
|
||||
|
||||
/* Get a character from the start point in a string */
|
||||
#define CHARREF(x, y) charref((x), (y))
|
||||
#define CHARREF(x, y) charref((x), (y), (int *)NULL)
|
||||
static wchar_t
|
||||
charref(char *x, char *y)
|
||||
charref(char *x, char *y, int *zmb_ind)
|
||||
{
|
||||
wchar_t wc;
|
||||
size_t ret;
|
||||
|
@ -1886,9 +1886,13 @@ charref(char *x, char *y)
|
|||
/* Error. */
|
||||
/* Reset the shift state for next time. */
|
||||
memset(&shiftstate, 0, sizeof(shiftstate));
|
||||
if (zmb_ind)
|
||||
*zmb_ind = (ret == MB_INVALID) ? ZMB_INVALID : ZMB_INCOMPLETE;
|
||||
return WCHAR_INVALID(*x);
|
||||
}
|
||||
|
||||
if (zmb_ind)
|
||||
*zmb_ind = ZMB_VALID;
|
||||
return wc;
|
||||
}
|
||||
|
||||
|
@ -2580,10 +2584,11 @@ patmatch(Upat prog)
|
|||
fail = 1;
|
||||
else {
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
wchar_t cr = CHARREF(patinput, patinend);
|
||||
int zmb_ind;
|
||||
wchar_t cr = charref(patinput, patinend, &zmb_ind);
|
||||
char *scanop = (char *)P_OPERAND(scan);
|
||||
if (patglobflags & GF_MULTIBYTE) {
|
||||
if (mb_patmatchrange(scanop, cr, NULL, NULL) ^
|
||||
if (mb_patmatchrange(scanop, cr, zmb_ind, NULL, NULL) ^
|
||||
(P_OP(scan) == P_ANYOF))
|
||||
fail = 1;
|
||||
else
|
||||
|
@ -3351,6 +3356,9 @@ patmatch(Upat prog)
|
|||
* The null-terminated specification is in range; the test
|
||||
* character is in ch.
|
||||
*
|
||||
* zmb is one of the enum defined above charref(), for indicating
|
||||
* incomplete or invalid multibyte characters.
|
||||
*
|
||||
* indptr is used by completion matching, which is why this
|
||||
* function is exported. If indptr is not NULL we set *indptr
|
||||
* to the index of the character in the range string, adjusted
|
||||
|
@ -3367,7 +3375,7 @@ patmatch(Upat prog)
|
|||
|
||||
/**/
|
||||
mod_export int
|
||||
mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
|
||||
mb_patmatchrange(char *range, wchar_t ch, int zmb_ind, wint_t *indptr, int *mtp)
|
||||
{
|
||||
wchar_t r1, r2;
|
||||
|
||||
|
@ -3476,6 +3484,14 @@ mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
|
|||
*indptr += r2 - r1;
|
||||
}
|
||||
break;
|
||||
case PP_INCOMPLETE:
|
||||
if (zmb_ind == ZMB_INCOMPLETE)
|
||||
return 1;
|
||||
break;
|
||||
case PP_INVALID:
|
||||
if (zmb_ind == ZMB_INVALID)
|
||||
return 1;
|
||||
break;
|
||||
case PP_UNKWN:
|
||||
DPUTS(1, "BUG: unknown posix range passed through.\n");
|
||||
break;
|
||||
|
@ -3545,6 +3561,8 @@ mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
|
|||
case PP_IFS:
|
||||
case PP_IFSSPACE:
|
||||
case PP_WORD:
|
||||
case PP_INCOMPLETE:
|
||||
case PP_INVALID:
|
||||
if (!ind) {
|
||||
*mtp = swtype;
|
||||
return 1;
|
||||
|
@ -3698,6 +3716,10 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp)
|
|||
if (indptr && r1 < r2)
|
||||
*indptr += r2 - r1;
|
||||
break;
|
||||
case PP_INCOMPLETE:
|
||||
case PP_INVALID:
|
||||
/* Never true if not in multibyte mode */
|
||||
break;
|
||||
case PP_UNKWN:
|
||||
DPUTS(1, "BUG: unknown posix range passed through.\n");
|
||||
break;
|
||||
|
@ -3768,6 +3790,8 @@ patmatchindex(char *range, int ind, int *chr, int *mtp)
|
|||
case PP_IFS:
|
||||
case PP_IFSSPACE:
|
||||
case PP_WORD:
|
||||
case PP_INCOMPLETE:
|
||||
case PP_INVALID:
|
||||
if (!ind) {
|
||||
*mtp = swtype;
|
||||
return 1;
|
||||
|
@ -3851,9 +3875,10 @@ static int patrepeat(Upat p, char *charstart)
|
|||
case P_ANYBUT:
|
||||
while (scan < patinend) {
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
wchar_t cr = CHARREF(scan, patinend);
|
||||
int zmb_ind;
|
||||
wchar_t cr = charref(scan, patinend, &zmb_ind);
|
||||
if (patglobflags & GF_MULTIBYTE) {
|
||||
if (mb_patmatchrange(opnd, cr, NULL, NULL) ^
|
||||
if (mb_patmatchrange(opnd, cr, zmb_ind, NULL, NULL) ^
|
||||
(P_OP(p) == P_ANYOF))
|
||||
break;
|
||||
} else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
|
||||
|
|
17
Src/zsh.h
17
Src/zsh.h
|
@ -1562,13 +1562,15 @@ typedef struct zpc_disables_save *Zpc_disables_save;
|
|||
#define PP_IFS 15
|
||||
#define PP_IFSSPACE 16
|
||||
#define PP_WORD 17
|
||||
#define PP_INCOMPLETE 18
|
||||
#define PP_INVALID 19
|
||||
/* Special value for last definition */
|
||||
#define PP_LAST 17
|
||||
#define PP_LAST 19
|
||||
|
||||
/* Unknown type. Not used in a valid token. */
|
||||
#define PP_UNKWN 18
|
||||
#define PP_UNKWN 20
|
||||
/* Range: token followed by the (possibly multibyte) start and end */
|
||||
#define PP_RANGE 19
|
||||
#define PP_RANGE 21
|
||||
|
||||
/* Globbing flags: lower 8 bits gives approx count */
|
||||
#define GF_LCMATCHUC 0x0100
|
||||
|
@ -1577,6 +1579,15 @@ typedef struct zpc_disables_save *Zpc_disables_save;
|
|||
#define GF_MATCHREF 0x0800
|
||||
#define GF_MULTIBYTE 0x1000 /* Use multibyte if supported by build */
|
||||
|
||||
enum {
|
||||
/* Valid multibyte character from charref */
|
||||
ZMB_VALID,
|
||||
/* Incomplete multibyte character from charref */
|
||||
ZMB_INCOMPLETE,
|
||||
/* Invalid multibyte character charref */
|
||||
ZMB_INVALID
|
||||
};
|
||||
|
||||
/* Dummy Patprog pointers. Used mainly in executable code, but the
|
||||
* pattern code needs to know about it, too. */
|
||||
|
||||
|
|
|
@ -525,3 +525,9 @@
|
|||
fi
|
||||
done
|
||||
0:Invalid characters in pattern matching
|
||||
|
||||
[[ $'\xe3' == [[:INCOMPLETE:]] ]] || print fail 1
|
||||
[[ $'\xe3\x83' == [[:INCOMPLETE:]][[:INVALID:]] ]] || print fail 2
|
||||
[[ $'\xe3\x83\x9b' != [[:INCOMPLETE:][:NVALID:]] ]] || print fail 3
|
||||
[[ $'\xe3\x83\x9b' = ? ]] || print fail 4
|
||||
0:Testing incomplete and invalid multibyte character components
|
||||
|
|
Loading…
Reference in a new issue