freebsd-src/include/ctype.h
Andrey A. Chernov 367ed4e13d The problem is: currently our single byte ctype(3) functions are broken
for wide characters locales in the argument range >= 0x80 - they may
return false positives.

Example 1: for UTF-8 locale we currently have:
iswspace(0xA0)==1 and isspace(0xA0)==1
(because iswspace() and isspace() are the same code)
but must have
iswspace(0xA0)==1 and isspace(0xA0)==0
(because there is no such character and all others in the range
0x80..0xff for the UTF-8 locale, it keeps ASCII only in the single byte
range because our internal wchar_t representation for UTF-8 is UCS-4).

Example 2: for all wide character locales isalpha(arg) when arg > 0xFF may
return false positives (must be 0).
(because iswalpha() and isalpha() are the same code)

This change address this issue separating single byte and wide ctype
and also fix iswascii() (currently iswascii() is broken for
arguments > 0xFF).
This change is 100% binary compatible with old binaries.

Reviewied by: i18n@
2007-10-13 16:28:22 +00:00

136 lines
4.7 KiB
C

/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* This code is derived from software contributed to Berkeley by
* Paul Borman at Krystal Technologies.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ctype.h 8.4 (Berkeley) 1/21/94
* $FreeBSD$
*/
#ifndef _CTYPE_H_
#define _CTYPE_H_
#include <sys/cdefs.h>
#include <sys/_types.h>
#include <_ctype.h>
__BEGIN_DECLS
int isalnum(int);
int isalpha(int);
int iscntrl(int);
int isdigit(int);
int isgraph(int);
int islower(int);
int isprint(int);
int ispunct(int);
int isspace(int);
int isupper(int);
int isxdigit(int);
int tolower(int);
int toupper(int);
#if __XSI_VISIBLE
int _tolower(int);
int _toupper(int);
int isascii(int);
int toascii(int);
#endif
#if __ISO_C_VISIBLE >= 1999
int isblank(int);
#endif
#if __BSD_VISIBLE
int digittoint(int);
int ishexnumber(int);
int isideogram(int);
int isnumber(int);
int isphonogram(int);
int isrune(int);
int isspecial(int);
#endif
__END_DECLS
#define isalnum(c) __sbistype((c), _CTYPE_A|_CTYPE_D)
#define isalpha(c) __sbistype((c), _CTYPE_A)
#define iscntrl(c) __sbistype((c), _CTYPE_C)
#define isdigit(c) __isctype((c), _CTYPE_D) /* ANSI -- locale independent */
#define isgraph(c) __sbistype((c), _CTYPE_G)
#define islower(c) __sbistype((c), _CTYPE_L)
#define isprint(c) __sbistype((c), _CTYPE_R)
#define ispunct(c) __sbistype((c), _CTYPE_P)
#define isspace(c) __sbistype((c), _CTYPE_S)
#define isupper(c) __sbistype((c), _CTYPE_U)
#define isxdigit(c) __isctype((c), _CTYPE_X) /* ANSI -- locale independent */
#define tolower(c) __sbtolower(c)
#define toupper(c) __sbtoupper(c)
#if __XSI_VISIBLE
/*
* POSIX.1-2001 specifies _tolower() and _toupper() to be macros equivalent to
* tolower() and toupper() respectively, minus extra checking to ensure that
* the argument is a lower or uppercase letter respectively. We've chosen to
* implement these macros with the same error checking as tolower() and
* toupper() since this doesn't violate the specification itself, only its
* intent. We purposely leave _tolower() and _toupper() undocumented to
* discourage their use.
*
* XXX isascii() and toascii() should similarly be undocumented.
*/
#define _tolower(c) __sbtolower(c)
#define _toupper(c) __sbtoupper(c)
#define isascii(c) (((c) & ~0x7F) == 0)
#define toascii(c) ((c) & 0x7F)
#endif
#if __ISO_C_VISIBLE >= 1999
#define isblank(c) __sbistype((c), _CTYPE_B)
#endif
#if __BSD_VISIBLE
#define digittoint(c) __sbmaskrune((c), 0xFF)
#define ishexnumber(c) __sbistype((c), _CTYPE_X)
#define isideogram(c) __sbistype((c), _CTYPE_I)
#define isnumber(c) __sbistype((c), _CTYPE_D)
#define isphonogram(c) __sbistype((c), _CTYPE_Q)
#define isrune(c) __sbistype((c), 0xFFFFFF00L)
#define isspecial(c) __sbistype((c), _CTYPE_T)
#endif
#endif /* !_CTYPE_H_ */