The collate functions within libc have been using version 1 and 1.2 of the

packed LC_COLLATE binary formats. These were generated with the colldef
tool, but the new LC_COLLATE files are going to be generated by the new
localedef tool using CLDR POSIX files as input.  The BSD-flavored
version of localedef identifies the format as "BSD 1.0".  Any
LC_COLLATE file with a different version will simply not be loaded, and
all LC* categories will get set to "C" (aka "POSIX") locale.

This work is based off of Nexenta's contribution to Illumos.
The integration with xlocale is John Marino's work for Dragonfly.

The following commits will enable localedef tool, disable the colldef
tool, add generated colldef directory, and finally remove colldef from
base.

The only difference with Dragonfly are:
- a few fixes to build with clang
- And identification of the flavor as "BSD 1.0" instead of "Dragonfly 4.4"

Obtained from:	Dragonfly
This commit is contained in:
Baptiste Daroussin 2015-08-07 23:41:26 +00:00
parent 6f7a9f7c8d
commit 2a6abeebef
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/collation/; revision=286430
8 changed files with 778 additions and 374 deletions

View file

@ -214,4 +214,7 @@ FBSDprivate_1.0 {
__detect_path_locale;
__collate_load_error;
__collate_range_cmp;
__collate_load_tables_l;
__collate_lookup;
};

View file

@ -1,4 +1,5 @@
/*-
* Copright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@ -28,50 +29,39 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Adapted to xlocale by John Marino <draco@marino.st>
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "namespace.h"
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <errno.h>
#include <unistd.h>
#include <sysexits.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include "un-namespace.h"
#include "collate.h"
#include "setlocale.h"
#include "ldpart.h"
#include "libc_private.h"
/*
* To avoid modifying the original (single-threaded) code too much, we'll just
* define the old globals as fields inside the table.
*
* We also modify the collation table test functions to search the thread-local
* table first and the global table second.
*/
#define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
#define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
#define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
#define __collate_chain_pri_table (table->__collate_chain_pri_table)
int __collate_load_error;
struct xlocale_collate __xlocale_global_collate = {
{{0}, "C"}, 1, 0
{{0}, "C"}, 1, 0, 0, 0
};
struct xlocale_collate __xlocale_C_collate = {
{{0}, "C"}, 1, 0
struct xlocale_collate __xlocale_C_collate = {
{{0}, "C"}, 1, 0, 0, 0
};
void __collate_err(int ex, const char *f) __dead2;
#include "libc_private.h"
int
__collate_load_tables_l(const char *encoding, struct xlocale_collate *table);
@ -80,14 +70,14 @@ static void
destruct_collate(void *t)
{
struct xlocale_collate *table = t;
if (__collate_chain_pri_table) {
free(__collate_chain_pri_table);
if (table->map && (table->maplen > 0)) {
(void) munmap(table->map, table->maplen);
}
free(t);
}
void *
__collate_load(const char *encoding, locale_t unused)
__collate_load(const char *encoding, __unused locale_t unused)
{
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
return &__xlocale_C_collate;
@ -110,18 +100,19 @@ int
__collate_load_tables(const char *encoding)
{
int ret = __collate_load_tables_l(encoding, &__xlocale_global_collate);
__collate_load_error = __xlocale_global_collate.__collate_load_error;
return ret;
}
int
__collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
{
FILE *fp;
int i, saverr, chains;
uint32_t u32;
char strbuf[STR_LEN], buf[PATH_MAX];
void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
int i, chains, z;
char buf[PATH_MAX];
char *TMP;
char *map;
collate_info_t *info;
struct stat sbuf;
int fd;
/* 'encoding' must be already checked. */
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
@ -129,217 +120,582 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
return (_LDP_CACHE);
}
/* 'PathLocale' must be already set & checked. */
/* Range checking not needed, encoding has fixed size */
(void)strcpy(buf, _PathLocale);
(void)strcat(buf, "/");
(void)strcat(buf, encoding);
(void)strcat(buf, "/LC_COLLATE");
if ((fp = fopen(buf, "re")) == NULL)
return (_LDP_ERROR);
(void) snprintf(buf, sizeof (buf), "%s/%s/LC_COLLATE",
_PathLocale, encoding);
if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
saverr = errno;
(void)fclose(fp);
errno = saverr;
if ((fd = _open(buf, O_RDONLY)) < 0)
return (_LDP_ERROR);
if (_fstat(fd, &sbuf) < 0) {
(void) _close(fd);
return (_LDP_ERROR);
}
chains = -1;
if (strcmp(strbuf, COLLATE_VERSION) == 0)
chains = 0;
else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
chains = 1;
if (chains < 0) {
(void)fclose(fp);
errno = EFTYPE;
if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) {
(void) _close(fd);
errno = EINVAL;
return (_LDP_ERROR);
}
if (chains) {
if (fread(&u32, sizeof(u32), 1, fp) != 1) {
saverr = errno;
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
}
if ((chains = (int)ntohl(u32)) < 1) {
(void)fclose(fp);
errno = EFTYPE;
return (_LDP_ERROR);
map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
(void) _close(fd);
if ((TMP = map) == NULL) {
return (_LDP_ERROR);
}
if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) {
(void) munmap(map, sbuf.st_size);
errno = EINVAL;
return (_LDP_ERROR);
}
TMP += COLLATE_STR_LEN;
info = (void *)TMP;
TMP += sizeof (*info);
if ((info->directive_count < 1) ||
(info->directive_count >= COLL_WEIGHTS_MAX) ||
((chains = info->chain_count) < 0)) {
(void) munmap(map, sbuf.st_size);
errno = EINVAL;
return (_LDP_ERROR);
}
i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) +
(sizeof (collate_chain_t) * chains) +
(sizeof (collate_large_t) * info->large_count);
for (z = 0; z < (info->directive_count); z++) {
i += sizeof (collate_subst_t) * info->subst_count[z];
}
if (i != (sbuf.st_size - (TMP - map))) {
(void) munmap(map, sbuf.st_size);
errno = EINVAL;
return (_LDP_ERROR);
}
table->char_pri_table = (void *)TMP;
TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1);
for (z = 0; z < info->directive_count; z++) {
if (info->subst_count[z] > 0) {
table->subst_table[z] = (void *)TMP;
TMP += info->subst_count[z] * sizeof (collate_subst_t);
} else {
table->subst_table[z] = NULL;
}
}
if (chains > 0) {
table->chain_pri_table = (void *)TMP;
TMP += chains * sizeof (collate_chain_t);
} else
chains = TABLE_SIZE;
table->chain_pri_table = NULL;
if (info->large_count > 0)
table->large_pri_table = (void *)TMP;
else
table->large_pri_table = NULL;
if ((TMP_substitute_table =
malloc(sizeof(__collate_substitute_table))) == NULL) {
saverr = errno;
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
}
if ((TMP_char_pri_table =
malloc(sizeof(__collate_char_pri_table))) == NULL) {
saverr = errno;
free(TMP_substitute_table);
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
}
if ((TMP_chain_pri_table =
malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
saverr = errno;
free(TMP_substitute_table);
free(TMP_char_pri_table);
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
}
#define FREAD(a, b, c, d) \
{ \
if (fread(a, b, c, d) != c) { \
saverr = errno; \
free(TMP_substitute_table); \
free(TMP_char_pri_table); \
free(TMP_chain_pri_table); \
(void)fclose(d); \
errno = saverr; \
return (_LDP_ERROR); \
} \
}
FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
FREAD(TMP_chain_pri_table,
sizeof(*__collate_chain_pri_table), chains, fp);
(void)fclose(fp);
if (__collate_substitute_table_ptr != NULL)
free(__collate_substitute_table_ptr);
__collate_substitute_table_ptr = TMP_substitute_table;
if (__collate_char_pri_table_ptr != NULL)
free(__collate_char_pri_table_ptr);
__collate_char_pri_table_ptr = TMP_char_pri_table;
for (i = 0; i < UCHAR_MAX + 1; i++) {
__collate_char_pri_table[i].prim =
ntohl(__collate_char_pri_table[i].prim);
__collate_char_pri_table[i].sec =
ntohl(__collate_char_pri_table[i].sec);
}
if (__collate_chain_pri_table != NULL)
free(__collate_chain_pri_table);
__collate_chain_pri_table = TMP_chain_pri_table;
for (i = 0; i < chains; i++) {
__collate_chain_pri_table[i].prim =
ntohl(__collate_chain_pri_table[i].prim);
__collate_chain_pri_table[i].sec =
ntohl(__collate_chain_pri_table[i].sec);
}
__collate_substitute_nontrivial = 0;
for (i = 0; i < UCHAR_MAX + 1; i++) {
if (__collate_substitute_table[i][0] != i ||
__collate_substitute_table[i][1] != 0) {
__collate_substitute_nontrivial = 1;
break;
}
}
table->info = info;
table->__collate_load_error = 0;
return (_LDP_LOADED);
}
u_char *
__collate_substitute(struct xlocale_collate *table, const u_char *s)
{
int dest_len, len, nlen;
int delta = strlen(s);
u_char *dest_str = NULL;
/*
* Note: for performance reasons, we have expanded bsearch here. This avoids
* function call overhead with each comparison.
*/
if (s == NULL || *s == '\0')
return (__collate_strdup(""));
delta += delta / 8;
dest_str = malloc(dest_len = delta);
if (dest_str == NULL)
__collate_err(EX_OSERR, __func__);
len = 0;
while (*s) {
nlen = len + strlen(__collate_substitute_table[*s]);
if (dest_len <= nlen) {
dest_str = reallocf(dest_str, dest_len = nlen + delta);
if (dest_str == NULL)
__collate_err(EX_OSERR, __func__);
static int32_t *
substsearch(struct xlocale_collate *table, const wchar_t key, int pass)
{
collate_subst_t *p;
int n = table->info->subst_count[pass];
if (n == 0)
return (NULL);
if (pass >= table->info->directive_count)
return (NULL);
if (!(key & COLLATE_SUBST_PRIORITY))
return (NULL);
p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY);
return (p->pri);
}
static collate_chain_t *
chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len)
{
int low;
int high;
int next, compar, l;
collate_chain_t *p;
collate_chain_t *tab;
if (table->info->chain_count == 0)
return (NULL);
low = 0;
high = table->info->chain_count - 1;
tab = table->chain_pri_table;
while (low <= high) {
next = (low + high) / 2;
p = tab + next;
compar = *key - *p->str;
if (compar == 0) {
l = wcsnlen(p->str, COLLATE_STR_LEN);
compar = wcsncmp(key, p->str, l);
if (compar == 0) {
*len = l;
return (p);
}
}
(void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
len = nlen;
if (compar > 0)
low = next + 1;
else
high = next - 1;
}
return (dest_str);
return (NULL);
}
static collate_large_t *
largesearch(struct xlocale_collate *table, const wchar_t key)
{
int low = 0;
int high = table->info->large_count - 1;
int next, compar;
collate_large_t *p;
collate_large_t *tab = table->large_pri_table;
if (table->info->large_count == 0)
return (NULL);
while (low <= high) {
next = (low + high) / 2;
p = tab + next;
compar = key - p->val;
if (compar == 0)
return (p);
if (compar > 0)
low = next + 1;
else
high = next - 1;
}
return (NULL);
}
void
__collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec)
_collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len,
int *pri, int which, const int **state)
{
struct __collate_st_chain_pri *p2;
collate_chain_t *p2;
collate_large_t *match;
int p, l;
const int *sptr;
/*
* If this is the "last" pass for the UNDEFINED, then
* we just return the priority itself.
*/
if (which >= table->info->directive_count) {
*pri = *t;
*len = 1;
*state = NULL;
return;
}
/*
* If we have remaining substitution data from a previous
* call, consume it first.
*/
if ((sptr = *state) != NULL) {
*pri = *sptr;
sptr++;
*state = *sptr ? sptr : NULL;
*len = 0;
return;
}
/* No active substitutions */
*len = 1;
*prim = *sec = 0;
for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
if (*t == p2->str[0] &&
strncmp(t, p2->str, strlen(p2->str)) == 0) {
*len = strlen(p2->str);
*prim = p2->prim;
*sec = p2->sec;
return;
/*
* Check for composites such as dipthongs that collate as a
* single element (aka chains or collating-elements).
*/
if (((p2 = chainsearch(table, t, &l)) != NULL) &&
((p = p2->pri[which]) >= 0)) {
*len = l;
*pri = p;
} else if (*t <= UCHAR_MAX) {
/*
* Character is a small (8-bit) character.
* We just look these up directly for speed.
*/
*pri = table->char_pri_table[*t].pri[which];
} else if ((table->info->large_count > 0) &&
((match = largesearch(table, *t)) != NULL)) {
/*
* Character was found in the extended table.
*/
*pri = match->pri.pri[which];
} else {
/*
* Character lacks a specific definition.
*/
if (table->info->directive[which] & DIRECTIVE_UNDEFINED) {
/* Mask off sign bit to prevent ordering confusion. */
*pri = (*t & COLLATE_MAX_PRIORITY);
} else {
*pri = table->info->undef_pri[which];
}
/* No substitutions for undefined characters! */
return;
}
/*
* Try substituting (expanding) the character. We are
* currently doing this *after* the chain compression. I
* think it should not matter, but this way might be slightly
* faster.
*
* We do this after the priority search, as this will help us
* to identify a single key value. In order for this to work,
* its important that the priority assigned to a given element
* to be substituted be unique for that level. The localedef
* code ensures this for us.
*/
if ((sptr = substsearch(table, *pri, which)) != NULL) {
if ((*pri = *sptr) != 0) {
sptr++;
*state = *sptr ? sptr : NULL;
}
}
*prim = __collate_char_pri_table[*t].prim;
*sec = __collate_char_pri_table[*t].sec;
}
u_char *
__collate_strdup(u_char *s)
/*
* This is the meaty part of wcsxfrm & strxfrm. Note that it does
* NOT NULL terminate. That is left to the caller.
*/
size_t
_collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf,
size_t room)
{
u_char *t = strdup(s);
int pri;
int len;
const wchar_t *t;
wchar_t *tr = NULL;
int direc;
int pass;
const int32_t *state;
size_t want = 0;
size_t need = 0;
if (t == NULL)
__collate_err(EX_OSERR, __func__);
return (t);
for (pass = 0; pass <= table->info->directive_count; pass++) {
state = NULL;
if (pass != 0) {
/* insert level separator from the previous pass */
if (room) {
*xf++ = 1;
room--;
}
want++;
}
/* special pass for undefined */
if (pass == table->info->directive_count) {
direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
} else {
direc = table->info->directive[pass];
}
t = src;
if (direc & DIRECTIVE_BACKWARD) {
wchar_t *bp, *fp, c;
if (tr)
free(tr);
if ((tr = wcsdup(t)) == NULL) {
errno = ENOMEM;
goto fail;
}
bp = tr;
fp = tr + wcslen(tr) - 1;
while (bp < fp) {
c = *bp;
*bp++ = *fp;
*fp-- = c;
}
t = (const wchar_t *)tr;
}
if (direc & DIRECTIVE_POSITION) {
while (*t || state) {
_collate_lookup(table, t, &len, &pri, pass, &state);
t += len;
if (pri <= 0) {
if (pri < 0) {
errno = EINVAL;
goto fail;
}
pri = COLLATE_MAX_PRIORITY;
}
if (room) {
*xf++ = pri;
room--;
}
want++;
need = want;
}
} else {
while (*t || state) {
_collate_lookup(table, t, &len, &pri, pass, &state);
t += len;
if (pri <= 0) {
if (pri < 0) {
errno = EINVAL;
goto fail;
}
continue;
}
if (room) {
*xf++ = pri;
room--;
}
want++;
need = want;
}
}
}
if (tr)
free(tr);
return (need);
fail:
if (tr)
free(tr);
return ((size_t)(-1));
}
void
__collate_err(int ex, const char *f)
/*
* In the non-POSIX case, we transform each character into a string of
* characters representing the character's priority. Since char is usually
* signed, we are limited by 7 bits per byte. To avoid zero, we need to add
* XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6
* bits per byte.
*
* It turns out that we sometimes have real priorities that are
* 31-bits wide. (But: be careful using priorities where the high
* order bit is set -- i.e. the priority is negative. The sort order
* may be surprising!)
*
* TODO: This would be a good area to optimize somewhat. It turns out
* that real prioririties *except for the last UNDEFINED pass* are generally
* very small. We need the localedef code to precalculate the max
* priority for us, and ideally also give us a mask, and then we could
* severely limit what we expand to.
*/
#define XFRM_BYTES 6
#define XFRM_OFFSET ('0') /* make all printable characters */
#define XFRM_SHIFT 6
#define XFRM_MASK ((1 << XFRM_SHIFT) - 1)
#define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */
static int
xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass)
{
const char *s;
int serrno = errno;
/* we use unsigned to ensure zero fill on right shift */
uint32_t val = (uint32_t)table->info->pri_count[pass];
int nc = 0;
s = _getprogname();
_write(STDERR_FILENO, s, strlen(s));
_write(STDERR_FILENO, ": ", 2);
s = f;
_write(STDERR_FILENO, s, strlen(s));
_write(STDERR_FILENO, ": ", 2);
s = strerror(serrno);
_write(STDERR_FILENO, s, strlen(s));
_write(STDERR_FILENO, "\n", 1);
exit(ex);
while (val) {
*p = (pri & XFRM_MASK) + XFRM_OFFSET;
pri >>= XFRM_SHIFT;
val >>= XFRM_SHIFT;
p++;
nc++;
}
return (nc);
}
#ifdef COLLATE_DEBUG
void
__collate_print_tables()
size_t
_collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf,
size_t room)
{
int i;
struct __collate_st_chain_pri *p2;
int pri;
int len;
const wchar_t *t;
wchar_t *tr = NULL;
int direc;
int pass;
const int32_t *state;
size_t want = 0;
size_t need = 0;
int b;
uint8_t buf[XFRM_BYTES];
printf("Substitute table:\n");
for (i = 0; i < UCHAR_MAX + 1; i++)
if (i != *__collate_substitute_table[i])
printf("\t'%c' --> \"%s\"\n", i,
__collate_substitute_table[i]);
printf("Chain priority table:\n");
for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
printf("Char priority table:\n");
for (i = 0; i < UCHAR_MAX + 1; i++)
printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
__collate_char_pri_table[i].sec);
for (pass = 0; pass <= table->info->directive_count; pass++) {
state = NULL;
if (pass != 0) {
/* insert level separator from the previous pass */
if (room) {
*xf++ = XFRM_SEP;
room--;
}
want++;
}
/* special pass for undefined */
if (pass == table->info->directive_count) {
direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
} else {
direc = table->info->directive[pass];
}
t = src;
if (direc & DIRECTIVE_BACKWARD) {
wchar_t *bp, *fp, c;
if (tr)
free(tr);
if ((tr = wcsdup(t)) == NULL) {
errno = ENOMEM;
goto fail;
}
bp = tr;
fp = tr + wcslen(tr) - 1;
while (bp < fp) {
c = *bp;
*bp++ = *fp;
*fp-- = c;
}
t = (const wchar_t *)tr;
}
if (direc & DIRECTIVE_POSITION) {
while (*t || state) {
_collate_lookup(table, t, &len, &pri, pass, &state);
t += len;
if (pri <= 0) {
if (pri < 0) {
errno = EINVAL;
goto fail;
}
pri = COLLATE_MAX_PRIORITY;
}
b = xfrm(table, buf, pri, pass);
want += b;
if (room) {
while (b) {
b--;
if (room) {
*xf++ = buf[b];
room--;
}
}
}
need = want;
}
} else {
while (*t || state) {
_collate_lookup(table, t, &len, &pri, pass, &state);
t += len;
if (pri <= 0) {
if (pri < 0) {
errno = EINVAL;
goto fail;
}
continue;
}
b = xfrm(table, buf, pri, pass);
want += b;
if (room) {
while (b) {
b--;
if (room) {
*xf++ = buf[b];
room--;
}
}
}
need = want;
}
}
}
if (tr)
free(tr);
return (need);
fail:
if (tr)
free(tr);
return ((size_t)(-1));
}
/*
* __collate_equiv_value returns the primary collation value for the given
* collating symbol specified by str and len. Zero or negative is returned
* if the collating symbol was not found. This function is used by bracket
* code in the TRE regex library.
*/
int
__collate_equiv_value(locale_t locale, const wchar_t *str, size_t len)
{
int32_t e;
if (len < 1 || len >= COLLATE_STR_LEN)
return (-1);
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
if (table->__collate_load_error)
return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1);
if (len == 1) {
e = -1;
if (*str <= UCHAR_MAX)
e = table->char_pri_table[*str].pri[0];
else if (table->info->large_count > 0) {
collate_large_t *match_large;
match_large = largesearch(table, *str);
if (match_large)
e = match_large->pri.pri[0];
}
if (e == 0)
return (1);
return (e > 0 ? e : 0);
}
if (table->info->chain_count > 0) {
wchar_t name[COLLATE_STR_LEN];
collate_chain_t *match_chain;
int clen;
wcsncpy (name, str, len);
name[len] = 0;
match_chain = chainsearch(table, name, &clen);
if (match_chain) {
e = match_chain->pri[0];
if (e == 0)
return (1);
return (e < 0 ? -e : e);
}
}
return (0);
}
#endif

View file

@ -40,42 +40,98 @@
#include <limits.h>
#include "xlocale_private.h"
#define STR_LEN 10
#define TABLE_SIZE 100
#define COLLATE_VERSION "1.0\n"
#define COLLATE_VERSION1_2 "1.2\n"
/*
* Work around buildworld bootstrapping from older systems whos limits.h
* sets COLL_WEIGHTS_MAX to 0.
*/
#if COLL_WEIGHTS_MAX == 0
#undef COLL_WEIGHTS_MAX
#define COLL_WEIGHTS_MAX 10
#endif
struct __collate_st_char_pri {
int prim, sec;
};
struct __collate_st_chain_pri {
u_char str[STR_LEN];
int prim, sec;
};
#define COLLATE_STR_LEN 24 /* should be 64-bit multiple */
#define COLLATE_VERSION "BSD 1.0\n"
#define __collate_substitute_table (*__collate_substitute_table_ptr)
#define __collate_char_pri_table (*__collate_char_pri_table_ptr)
#define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */
#define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */
#define DIRECTIVE_UNDEF 0x00
#define DIRECTIVE_FORWARD 0x01
#define DIRECTIVE_BACKWARD 0x02
#define DIRECTIVE_POSITION 0x04
#define DIRECTIVE_UNDEFINED 0x08 /* special last weight for UNDEFINED */
#define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD)
/*
* The collate file format is as follows:
*
* char version[COLLATE_STR_LEN]; // must be COLLATE_VERSION
* collate_info_t info; // see below, includes padding
* collate_char_pri_t char_data[256]; // 8 bit char values
* collate_subst_t subst[*]; // 0 or more substitutions
* collate_chain_pri_t chains[*]; // 0 or more chains
* collate_large_pri_t large[*]; // extended char priorities
*
* Note that all structures must be 32-bit aligned, as each structure
* contains 32-bit member fields. The entire file is mmap'd, so its
* critical that alignment be observed. It is not generally safe to
* use any 64-bit values in the structures.
*/
typedef struct collate_info {
uint8_t directive_count;
uint8_t directive[COLL_WEIGHTS_MAX];
int32_t pri_count[COLL_WEIGHTS_MAX];
int32_t flags;
int32_t chain_count;
int32_t large_count;
int32_t subst_count[COLL_WEIGHTS_MAX];
int32_t undef_pri[COLL_WEIGHTS_MAX];
} collate_info_t;
typedef struct collate_char {
int32_t pri[COLL_WEIGHTS_MAX];
} collate_char_t;
typedef struct collate_chain {
wchar_t str[COLLATE_STR_LEN];
int32_t pri[COLL_WEIGHTS_MAX];
} collate_chain_t;
typedef struct collate_large {
int32_t val;
collate_char_t pri;
} collate_large_t;
typedef struct collate_subst {
int32_t key;
int32_t pri[COLLATE_STR_LEN];
} collate_subst_t;
struct xlocale_collate {
struct xlocale_component header;
int __collate_load_error;
int __collate_substitute_nontrivial;
char * map;
size_t maplen;
u_char (*__collate_substitute_table_ptr)[UCHAR_MAX + 1][STR_LEN];
struct __collate_st_char_pri (*__collate_char_pri_table_ptr)[UCHAR_MAX + 1];
struct __collate_st_chain_pri *__collate_chain_pri_table;
collate_info_t *info;
collate_char_t *char_pri_table;
collate_large_t *large_pri_table;
collate_chain_t *chain_pri_table;
collate_subst_t *subst_table[COLL_WEIGHTS_MAX];
};
__BEGIN_DECLS
u_char *__collate_strdup(u_char *);
u_char *__collate_substitute(struct xlocale_collate *, const u_char *);
int __collate_load_tables(const char *);
void __collate_lookup(struct xlocale_collate *, const u_char *, int *, int *, int *);
int __collate_range_cmp(struct xlocale_collate *, int, int);
#ifdef COLLATE_DEBUG
void __collate_print_tables(void);
#endif
int __collate_equiv_value(locale_t, const wchar_t *, size_t);
void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *,
int, const int **);
int __collate_range_cmp(struct xlocale_collate *, wchar_t, wchar_t);
size_t _collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *,
size_t);
size_t _collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *,
size_t);
__END_DECLS
#endif /* !_COLLATE_H_ */

View file

@ -33,6 +33,7 @@
__FBSDID("$FreeBSD$");
#include <string.h>
#include <wchar.h>
#include <xlocale.h>
#include "collate.h"
@ -40,13 +41,15 @@ __FBSDID("$FreeBSD$");
* Compare two characters using collate
*/
int __collate_range_cmp(struct xlocale_collate *table, int c1, int c2)
int __collate_range_cmp(struct xlocale_collate *table, wchar_t c1, wchar_t c2)
{
static char s1[2], s2[2];
wchar_t s1[2], s2[2];
s1[0] = c1;
s1[1] = 0;
s2[0] = c2;
s2[1] = 0;
struct _xlocale l = {{0}};
l.components[XLC_COLLATE] = (struct xlocale_component *)table;
return (strcoll_l(s1, s2, &l));
return (wcscoll_l(s1, s2, &l));
}

View file

@ -67,12 +67,6 @@ extern _RuneLocale *_Read_RuneMagi(FILE *);
static int __setrunelocale(struct xlocale_ctype *l, const char *);
#define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
#define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
#define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
#define __collate_chain_pri_table (table->__collate_chain_pri_table)
static void
destruct_ctype(void *v)
{

View file

@ -1,4 +1,5 @@
/*-
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@ -35,63 +36,82 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <wchar.h>
#include "collate.h"
#include <stdio.h>
/*
* In order to properly handle multibyte locales, its easiet to just
* convert to wide characters and then use wcscoll. However if an
* error occurs, we gracefully fall back to simple strcmp. Caller
* should check errno.
*/
int
strcoll_l(const char *s, const char *s2, locale_t locale)
{
int len, len2, prim, prim2, sec, sec2, ret, ret2;
const char *t, *t2;
char *tt, *tt2;
int ret;
wchar_t *t1 = NULL, *t2 = NULL;
wchar_t *w1 = NULL, *w2 = NULL;
const char *cs1, *cs2;
mbstate_t mbs1;
mbstate_t mbs2;
size_t sz1, sz2;
memset(&mbs1, 0, sizeof (mbstate_t));
memset(&mbs2, 0, sizeof (mbstate_t));
/*
* The mbsrtowcs_l function can set the src pointer to null upon
* failure, so it should act on a copy to avoid:
* - sending null pointer to strcmp
* - having strcoll/strcoll_l change *s or *s2 to null
*/
cs1 = s;
cs2 = s2;
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
if (table->__collate_load_error)
return strcmp(s, s2);
goto error;
len = len2 = 1;
ret = ret2 = 0;
if (table->__collate_substitute_nontrivial) {
t = tt = __collate_substitute(table, s);
t2 = tt2 = __collate_substitute(table, s2);
} else {
tt = tt2 = NULL;
t = s;
t2 = s2;
}
while(*t && *t2) {
prim = prim2 = 0;
while(*t && !prim) {
__collate_lookup(table, t, &len, &prim, &sec);
t += len;
}
while(*t2 && !prim2) {
__collate_lookup(table, t2, &len2, &prim2, &sec2);
t2 += len2;
}
if(!prim || !prim2)
break;
if(prim != prim2) {
ret = prim - prim2;
goto end;
}
if(!ret2)
ret2 = sec - sec2;
}
if(!*t && *t2)
ret = -(int)((u_char)*t2);
else if(*t && !*t2)
ret = (u_char)*t;
else if(!*t && !*t2)
ret = ret2;
end:
free(tt);
free(tt2);
sz1 = strlen(s) + 1;
sz2 = strlen(s2) + 1;
return ret;
/*
* Simple assumption: conversion to wide format is strictly
* reducing, i.e. a single byte (or multibyte character)
* cannot result in multiple wide characters.
*/
if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL)
goto error;
w1 = t1;
if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL)
goto error;
w2 = t2;
if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1)
goto error;
if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1)
goto error;
ret = wcscoll_l(w1, w2, locale);
if (t1)
free(t1);
if (t2)
free(t2);
return (ret);
error:
if (t1)
free(t1);
if (t2)
free(t2);
return (strcmp(s, s2));
}
int

View file

@ -1,4 +1,5 @@
/*-
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@ -35,6 +36,8 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <wchar.h>
#include "collate.h"
size_t
@ -48,9 +51,10 @@ strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
size_t
strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale)
{
int prim, sec, l;
size_t slen;
char *s, *ss;
size_t xlen;
wchar_t *wcs = NULL;
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
@ -58,32 +62,44 @@ strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, local
if (!*src) {
if (len > 0)
*dest = '\0';
return 0;
return (0);
}
/*
* The conversion from multibyte to wide character strings is
* strictly reducing (one byte of an mbs cannot expand to more
* than one wide character.)
*/
slen = strlen(src);
if (table->__collate_load_error)
return strlcpy(dest, src, len);
goto error;
slen = 0;
prim = sec = 0;
ss = s = __collate_substitute(table, src);
while (*s) {
while (*s && !prim) {
__collate_lookup(table, s, &l, &prim, &sec);
s += l;
}
if (prim) {
if (len > 1) {
*dest++ = (char)prim;
len--;
}
slen++;
prim = 0;
}
if ((wcs = malloc((slen + 1) * sizeof (wchar_t))) == NULL)
goto error;
if (mbstowcs_l(wcs, src, slen + 1, locale) == (size_t)-1)
goto error;
if ((xlen = _collate_sxfrm(table, wcs, dest, len)) == (size_t)-1)
goto error;
if (wcs)
free(wcs);
if (len > xlen) {
dest[xlen] = 0;
} else if (len) {
dest[len-1] = 0;
}
free(ss);
if (len > 0)
*dest = '\0';
return slen;
return (xlen);
error:
/* errno should be set to ENOMEM if malloc failed */
if (wcs)
free(wcs);
(void) strlcpy(dest, src, len);
return (slen);
}

View file

@ -1,4 +1,5 @@
/*-
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@ -31,9 +32,6 @@
*/
#include <sys/cdefs.h>
#if 0
__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp ");
#endif
__FBSDID("$FreeBSD$");
#include <stdlib.h>
@ -41,18 +39,10 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include "collate.h"
static char *__mbsdup(const wchar_t *);
/*
* Placeholder wcsxfrm() implementation. See wcscoll.c for a description of
* the logic used.
*/
size_t
wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len, locale_t locale)
{
int prim, sec, l;
size_t slen;
char *mbsrc, *s, *ss;
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
@ -63,67 +53,33 @@ wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len,
return (0);
}
if (table->__collate_load_error || MB_CUR_MAX > 1) {
slen = wcslen(src);
if (len > 0) {
if (slen < len)
wcscpy(dest, src);
else {
wcsncpy(dest, src, len - 1);
dest[len - 1] = L'\0';
}
}
return (slen);
if ((table->__collate_load_error) ||
((slen = _collate_wxfrm(table, src, dest, len)) == (size_t)-1)) {
goto error;
}
mbsrc = __mbsdup(src);
slen = 0;
prim = sec = 0;
ss = s = __collate_substitute(table, mbsrc);
while (*s != '\0') {
while (*s != '\0' && prim == 0) {
__collate_lookup(table, s, &l, &prim, &sec);
s += l;
}
if (prim != 0) {
if (len > 1) {
*dest++ = (wchar_t)prim;
len--;
}
slen++;
prim = 0;
}
/* Add null termination at the correct location. */
if (len > slen) {
dest[slen] = 0;
} else if (len) {
dest[len-1] = 0;
}
free(ss);
free(mbsrc);
if (len != 0)
*dest = L'\0';
return (slen);
error:
slen = wcslen(src);
if (slen < len)
(void) wcscpy(dest, src);
else {
(void) wcsncpy(dest, src, len - 1);
dest[len - 1] = L'\0';
}
return (slen);
}
size_t
wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len)
{
return wcsxfrm_l(dest, src, len, __get_locale());
}
static char *
__mbsdup(const wchar_t *ws)
{
static const mbstate_t initial;
mbstate_t st;
const wchar_t *wcp;
size_t len;
char *mbs;
wcp = ws;
st = initial;
if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1)
return (NULL);
if ((mbs = malloc(len + 1)) == NULL)
return (NULL);
st = initial;
wcsrtombs(mbs, &ws, len + 1, &st);
return (mbs);
}