mirror of
https://github.com/zsh-users/zsh
synced 2024-07-21 10:14:19 +00:00
23119: lower case in sorting properly
This commit is contained in:
parent
c53aa4adee
commit
e375d5ee88
|
@ -1,3 +1,8 @@
|
|||
2007-01-22 Peter Stephenson <pws@csr.com>
|
||||
|
||||
* 23119: Src/sort.c, Test/B03print.ztst, Test/D07multibyte.ztst:
|
||||
do lowering of multibyte character case in sorting properly.
|
||||
|
||||
2007-01-21 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||
|
||||
* 23118: Doc/Zsh/expn.yo, Src/builtin.c, Src/glob.c, Src/jobs.c,
|
||||
|
|
48
Src/sort.c
48
Src/sort.c
|
@ -248,7 +248,8 @@ strmetasort(char **array, int sortwhat, int *unmetalenp)
|
|||
|| *metaptr == Meta) {
|
||||
char *s, *t, *src = *arrptr, *dst;
|
||||
int len;
|
||||
sortarrptr->cmp = dst = (char *)zhalloc(strlen(src) + 1);
|
||||
sortarrptr->cmp = dst =
|
||||
(char *)zhalloc(((sortwhat & SORTIT_IGNORING_CASE)?2:1)*strlen(src)+1);
|
||||
|
||||
if (unmetalenp) {
|
||||
/* Already unmetafied and we have the length. */
|
||||
|
@ -283,8 +284,49 @@ strmetasort(char **array, int sortwhat, int *unmetalenp)
|
|||
len = metaptr - src;
|
||||
}
|
||||
if (sortwhat & SORTIT_IGNORING_CASE) {
|
||||
for (s = src, t = dst; s - src != len; )
|
||||
*t++ = tulower(*s++);
|
||||
char *send = src + len;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE)) {
|
||||
/*
|
||||
* Lower the case the hard way. Convert to a wide
|
||||
* character, process that, and convert back. We
|
||||
* don't assume the characters have the same
|
||||
* multibyte length. We can't use casemodify()
|
||||
* because we have unmetafied data, which may have
|
||||
* been passed down to use.
|
||||
*/
|
||||
mbstate_t mbsin, mbsout;
|
||||
int clen;
|
||||
wchar_t wc;
|
||||
memset(&mbsin, 0, sizeof(mbstate_t));
|
||||
memset(&mbsout, 0, sizeof(mbstate_t));
|
||||
|
||||
for (s = src, t = dst; s < send; ) {
|
||||
clen = mbrtowc(&wc, s, send-s, &mbsin);
|
||||
if (clen < 0) {
|
||||
/* invalid or unfinished: treat as single bytes */
|
||||
while (s < send)
|
||||
*t++ = tulower(*s++);
|
||||
break;
|
||||
}
|
||||
if (clen == 0) {
|
||||
/* embedded null */
|
||||
*t++ = '\0';
|
||||
s++;
|
||||
continue;
|
||||
}
|
||||
s += clen;
|
||||
wc = towlower(wc);
|
||||
clen = wcrtomb(t, wc, &mbsout);
|
||||
t += clen;
|
||||
DPUTS(clen < 0, "Bad conversion when lowering case");
|
||||
}
|
||||
*t = '\0';
|
||||
len = t - dst;
|
||||
} else
|
||||
#endif
|
||||
for (s = src, t = dst; s < send; )
|
||||
*t++ = tulower(*s++);
|
||||
src = dst;
|
||||
}
|
||||
if (sortwhat & SORTIT_IGNORING_BACKSLASHES) {
|
||||
|
|
|
@ -34,7 +34,12 @@
|
|||
>baz
|
||||
>bar
|
||||
|
||||
print -io a B c
|
||||
# some locales force case-insensitive sorting
|
||||
(LC_ALL=C; print -o a B c)
|
||||
0:case-sensitive argument sorting
|
||||
>B a c
|
||||
|
||||
(LC_ALL=C; print -io a B c)
|
||||
0:case-insensitive argument sorting
|
||||
>a B c
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
# Find a UTF-8 locale.
|
||||
setopt multibyte
|
||||
# Don't let LC_* override our choice of locale.
|
||||
unset -m LC_\*
|
||||
mb_ok=
|
||||
langs=(en_US.UTF-8 en_GB.UTF-8 en.UTF-8
|
||||
$(locale -a 2>/dev/null | sed -e 's/utf8/UTF-8/' | grep UTF-8))
|
||||
|
@ -315,3 +317,12 @@
|
|||
printf "%4.3s\n" főobar
|
||||
0:Multibyte characters in printf widths
|
||||
> főo
|
||||
|
||||
# We ask for case-insensitive sorting here (and supply upper case
|
||||
# characters) so that we exercise the logic in the shell that lowers the
|
||||
# case of the string for case-insensitive sorting.
|
||||
print -oi HAH HUH HEH HÉH HÈH
|
||||
(LC_ALL=C; print -oi HAH HUH HEH HÉH HÈH)
|
||||
0:Multibyte characters in print sorting
|
||||
>HAH HEH HÉH HÈH HUH
|
||||
>HAH HEH HUH HÈH HÉH
|
||||
|
|
Loading…
Reference in a new issue