This patch address two problems.

1st one is relatively minor: according our own manpage, upper and lower
classes must be sorted, but currently not.

2nd one is serious:
	tr '[:lower:]' '[:upper:]'
	(and vice versa) currently works only if upper and lower classes
	have exact the same number of elements. When it is not true, like for
	many ISO8859-x locales which have bigger amount of lowercase letters,
	tr may do nasty things.

	See this page
	http://www.opengroup.org/onlinepubs/007908799/xcu/tr.html
	for detailed description of desired tr behaviour in such cases.
This commit is contained in:
Andrey A. Chernov 2003-08-03 02:23:39 +00:00
parent 0d5a03997a
commit 00611f0457
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=118371
3 changed files with 71 additions and 21 deletions

View file

@ -40,7 +40,8 @@
typedef struct { typedef struct {
enum { STRING1, STRING2 } which; enum { STRING1, STRING2 } which;
enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state; enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE,
SET, SET_UPPER, SET_LOWER } state;
int cnt; /* character count */ int cnt; /* character count */
int lastch; /* last character */ int lastch; /* last character */
int equiv[NCHARS]; /* equivalence set */ int equiv[NCHARS]; /* equivalence set */
@ -49,3 +50,5 @@ typedef struct {
} STR; } STR;
int next(STR *); int next(STR *);
int charcoll(const void *, const void *);

View file

@ -106,6 +106,8 @@ next(s)
} }
return (1); return (1);
case SET: case SET:
case SET_UPPER:
case SET_LOWER:
if ((s->lastch = s->set[s->cnt++]) == OOBCH) { if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
s->state = NORMAL; s->state = NORMAL;
return (next(s)); return (next(s));
@ -194,7 +196,7 @@ genclass(s)
{ {
int cnt, (*func)(int); int cnt, (*func)(int);
CLASS *cp, tmp; CLASS *cp, tmp;
int *p; int *p, n;
tmp.name = s->str; tmp.name = s->str;
if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
@ -208,10 +210,18 @@ genclass(s)
if ((func)(cnt)) if ((func)(cnt))
*p++ = cnt; *p++ = cnt;
*p = OOBCH; *p = OOBCH;
n = p - cp->set;
s->cnt = 0; s->cnt = 0;
s->state = SET;
s->set = cp->set; s->set = cp->set;
if (strcmp(s->str, "upper") == 0)
s->state = SET_UPPER;
else if (strcmp(s->str, "lower") == 0) {
s->state = SET_LOWER;
} else
s->state = SET;
if ((s->state == SET_LOWER || s->state == SET_UPPER) && n > 1)
mergesort(s->set, n, sizeof(*(s->set)), charcoll);
} }
static int static int

View file

@ -101,7 +101,6 @@ static int string1[NCHARS] = {
STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
static int charcoll(const void *, const void *);
static void setup(int *, char *, STR *, int, int); static void setup(int *, char *, STR *, int, int);
static void usage(void); static void usage(void);
@ -224,20 +223,55 @@ main(int argc, char **argv)
if (!next(&s2)) if (!next(&s2))
errx(1, "empty string2"); errx(1, "empty string2");
ch = s2.lastch; /*
/* If string2 runs out of characters, use the last one specified. */ * For -s result will contain only those characters defined
if (sflag) * as the second characters in each of the toupper or tolower
while (next(&s1)) { * pairs.
string1[s1.lastch] = ch = s2.lastch; */
string2[ch] = 1;
(void)next(&s2);
}
else
while (next(&s1)) {
string1[s1.lastch] = ch = s2.lastch;
(void)next(&s2);
}
/* If string2 runs out of characters, use the last one specified. */
while (next(&s1)) {
again:
if (s1.state == SET_LOWER &&
s2.state == SET_UPPER &&
s1.cnt == 1 && s2.cnt == 1) {
do {
string1[s1.lastch] = ch = toupper(s1.lastch);
if (sflag && isupper(ch))
string2[ch] = 1;
if (!next(&s1))
goto endloop;
} while (s1.state == SET_LOWER && s1.cnt > 1);
/* skip upper set */
do {
if (!next(&s2))
break;
} while (s2.state == SET_UPPER && s2.cnt > 1);
goto again;
} else if (s1.state == SET_UPPER &&
s2.state == SET_LOWER &&
s1.cnt == 1 && s2.cnt == 1) {
do {
string1[s1.lastch] = ch = tolower(s1.lastch);
if (sflag && islower(ch))
string2[ch] = 1;
if (!next(&s1))
goto endloop;
} while (s1.state == SET_UPPER && s1.cnt > 1);
/* skip lower set */
do {
if (!next(&s2))
break;
} while (s2.state == SET_LOWER && s2.cnt > 1);
goto again;
} else {
string1[s1.lastch] = s2.lastch;
if (sflag)
string2[s2.lastch] = 1;
}
(void)next(&s2);
}
endloop:
if (cflag || Cflag) { if (cflag || Cflag) {
s2.str = argv[1]; s2.str = argv[1];
s2.state = NORMAL; s2.state = NORMAL;
@ -294,15 +328,18 @@ setup(int *string, char *arg, STR *str, int cflag, int Cflag)
string[cnt] = !string[cnt] && ISCHAR(cnt); string[cnt] = !string[cnt] && ISCHAR(cnt);
} }
static int int
charcoll(const void *a, const void *b) charcoll(const void *a, const void *b)
{ {
char sa[2], sb[2]; static char sa[2], sb[2];
int r;
sa[0] = *(const int *)a; sa[0] = *(const int *)a;
sb[0] = *(const int *)b; sb[0] = *(const int *)b;
sa[1] = sb[1] = '\0'; r = strcoll(sa, sb);
return (strcoll(sa, sb)); if (r == 0)
r = *(const int *)a - *(const int *)b;
return (r);
} }
static void static void