mirror of
https://github.com/godotengine/godot
synced 2024-09-18 21:11:28 +00:00
2814 lines
85 KiB
C++
2814 lines
85 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
**********************************************************************
|
|
* Copyright (C) 2009-2015, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
*/
|
|
|
|
#include "unicode/bytestream.h"
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/ures.h"
|
|
#include "unicode/localpointer.h"
|
|
#include "unicode/putil.h"
|
|
#include "unicode/uenum.h"
|
|
#include "unicode/uloc.h"
|
|
#include "ustr_imp.h"
|
|
#include "bytesinkutil.h"
|
|
#include "charstr.h"
|
|
#include "cmemory.h"
|
|
#include "cstring.h"
|
|
#include "putilimp.h"
|
|
#include "uinvchar.h"
|
|
#include "ulocimp.h"
|
|
#include "uassert.h"
|
|
|
|
|
|
/* struct holding a single variant */
|
|
typedef struct VariantListEntry {
|
|
const char *variant;
|
|
struct VariantListEntry *next;
|
|
} VariantListEntry;
|
|
|
|
/* struct holding a single attribute value */
|
|
struct AttributeListEntry : public icu::UMemory {
|
|
const char *attribute;
|
|
struct AttributeListEntry *next;
|
|
};
|
|
|
|
/* struct holding a single extension */
|
|
struct ExtensionListEntry : public icu::UMemory {
|
|
const char *key;
|
|
const char *value;
|
|
struct ExtensionListEntry *next;
|
|
};
|
|
|
|
#define MAXEXTLANG 3
|
|
typedef struct ULanguageTag {
|
|
char *buf; /* holding parsed subtags */
|
|
const char *language;
|
|
const char *extlang[MAXEXTLANG];
|
|
const char *script;
|
|
const char *region;
|
|
VariantListEntry *variants;
|
|
ExtensionListEntry *extensions;
|
|
const char *privateuse;
|
|
const char *legacy;
|
|
} ULanguageTag;
|
|
|
|
#define MINLEN 2
|
|
#define SEP '-'
|
|
#define PRIVATEUSE 'x'
|
|
#define LDMLEXT 'u'
|
|
|
|
#define LOCALE_SEP '_'
|
|
#define LOCALE_EXT_SEP '@'
|
|
#define LOCALE_KEYWORD_SEP ';'
|
|
#define LOCALE_KEY_TYPE_SEP '='
|
|
|
|
#define ISALPHA(c) uprv_isASCIILetter(c)
|
|
#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
|
|
|
|
static const char EMPTY[] = "";
|
|
static const char LANG_UND[] = "und";
|
|
static const char PRIVATEUSE_KEY[] = "x";
|
|
static const char _POSIX[] = "_POSIX";
|
|
static const char POSIX_KEY[] = "va";
|
|
static const char POSIX_VALUE[] = "posix";
|
|
static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
|
|
static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
|
|
static const char LOCALE_TYPE_YES[] = "yes";
|
|
|
|
#define LANG_UND_LEN 3
|
|
|
|
/*
|
|
Updated on 2018-09-12 from
|
|
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
|
|
|
|
This table has 2 parts. The part for
|
|
legacy language tags (marked as “Type: grandfathered” in BCP 47)
|
|
is generated by the following scripts from the IANA language tag registry.
|
|
|
|
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
|
|
egrep -A 7 'Type: grandfathered' | \
|
|
egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
|
|
awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
|
|
tr 'A-Z' 'a-z'
|
|
|
|
|
|
The 2nd part is made of five ICU-specific entries. They're kept for
|
|
the backward compatibility for now, even though there are no preferred
|
|
values. They may have to be removed for the strict BCP 47 compliance.
|
|
|
|
*/
|
|
static const char* const LEGACY[] = {
|
|
/* legacy preferred */
|
|
"art-lojban", "jbo",
|
|
"en-gb-oed", "en-gb-oxendict",
|
|
"i-ami", "ami",
|
|
"i-bnn", "bnn",
|
|
"i-hak", "hak",
|
|
"i-klingon", "tlh",
|
|
"i-lux", "lb",
|
|
"i-navajo", "nv",
|
|
"i-pwn", "pwn",
|
|
"i-tao", "tao",
|
|
"i-tay", "tay",
|
|
"i-tsu", "tsu",
|
|
"no-bok", "nb",
|
|
"no-nyn", "nn",
|
|
"sgn-be-fr", "sfb",
|
|
"sgn-be-nl", "vgt",
|
|
"sgn-ch-de", "sgg",
|
|
"zh-guoyu", "cmn",
|
|
"zh-hakka", "hak",
|
|
"zh-min-nan", "nan",
|
|
"zh-xiang", "hsn",
|
|
|
|
// Legacy tags with no preferred value in the IANA
|
|
// registry. Kept for now for the backward compatibility
|
|
// because ICU has mapped them this way.
|
|
"i-default", "en-x-i-default",
|
|
"i-enochian", "und-x-i-enochian",
|
|
"i-mingo", "see-x-i-mingo",
|
|
"zh-min", "nan-x-zh-min",
|
|
};
|
|
|
|
/*
|
|
Updated on 2018-09-12 from
|
|
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
|
|
|
|
The table lists redundant tags with preferred value in the IANA language tag registry.
|
|
It's generated with the following command:
|
|
|
|
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
|
|
grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
|
|
awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
|
|
tr 'A-Z' 'a-z'
|
|
|
|
In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
|
|
a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
|
|
*/
|
|
|
|
static const char* const REDUNDANT[] = {
|
|
// redundant preferred
|
|
"sgn-br", "bzs",
|
|
"sgn-co", "csn",
|
|
"sgn-de", "gsg",
|
|
"sgn-dk", "dsl",
|
|
"sgn-es", "ssp",
|
|
"sgn-fr", "fsl",
|
|
"sgn-gb", "bfi",
|
|
"sgn-gr", "gss",
|
|
"sgn-ie", "isg",
|
|
"sgn-it", "ise",
|
|
"sgn-jp", "jsl",
|
|
"sgn-mx", "mfs",
|
|
"sgn-ni", "ncs",
|
|
"sgn-nl", "dse",
|
|
"sgn-no", "nsl",
|
|
"sgn-pt", "psr",
|
|
"sgn-se", "swl",
|
|
"sgn-us", "ase",
|
|
"sgn-za", "sfs",
|
|
"zh-cmn", "cmn",
|
|
"zh-cmn-hans", "cmn-hans",
|
|
"zh-cmn-hant", "cmn-hant",
|
|
"zh-gan", "gan",
|
|
"zh-wuu", "wuu",
|
|
"zh-yue", "yue",
|
|
|
|
// variant tag with preferred value
|
|
"ja-latn-hepburn-heploc", "ja-latn-alalc97",
|
|
};
|
|
|
|
/*
|
|
Updated on 2018-09-12 from
|
|
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
|
|
|
|
grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
|
|
grep -B1 'Preferred' | grep -v '^--' | \
|
|
awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
|
|
|
|
Make sure that 2-letter language subtags come before 3-letter subtags.
|
|
*/
|
|
static const char DEPRECATEDLANGS[][4] = {
|
|
/* deprecated new */
|
|
"in", "id",
|
|
"iw", "he",
|
|
"ji", "yi",
|
|
"jw", "jv",
|
|
"mo", "ro",
|
|
"aam", "aas",
|
|
"adp", "dz",
|
|
"aue", "ktz",
|
|
"ayx", "nun",
|
|
"bgm", "bcg",
|
|
"bjd", "drl",
|
|
"ccq", "rki",
|
|
"cjr", "mom",
|
|
"cka", "cmr",
|
|
"cmk", "xch",
|
|
"coy", "pij",
|
|
"cqu", "quh",
|
|
"drh", "khk",
|
|
"drw", "prs",
|
|
"gav", "dev",
|
|
"gfx", "vaj",
|
|
"ggn", "gvr",
|
|
"gti", "nyc",
|
|
"guv", "duz",
|
|
"hrr", "jal",
|
|
"ibi", "opa",
|
|
"ilw", "gal",
|
|
"jeg", "oyb",
|
|
"kgc", "tdf",
|
|
"kgh", "kml",
|
|
"koj", "kwv",
|
|
"krm", "bmf",
|
|
"ktr", "dtp",
|
|
"kvs", "gdj",
|
|
"kwq", "yam",
|
|
"kxe", "tvd",
|
|
"kzj", "dtp",
|
|
"kzt", "dtp",
|
|
"lii", "raq",
|
|
"lmm", "rmx",
|
|
"meg", "cir",
|
|
"mst", "mry",
|
|
"mwj", "vaj",
|
|
"myt", "mry",
|
|
"nad", "xny",
|
|
"ncp", "kdz",
|
|
"nnx", "ngv",
|
|
"nts", "pij",
|
|
"oun", "vaj",
|
|
"pcr", "adx",
|
|
"pmc", "huw",
|
|
"pmu", "phr",
|
|
"ppa", "bfy",
|
|
"ppr", "lcq",
|
|
"pry", "prt",
|
|
"puz", "pub",
|
|
"sca", "hle",
|
|
"skk", "oyb",
|
|
"tdu", "dtp",
|
|
"thc", "tpo",
|
|
"thx", "oyb",
|
|
"tie", "ras",
|
|
"tkk", "twm",
|
|
"tlw", "weo",
|
|
"tmp", "tyj",
|
|
"tne", "kak",
|
|
"tnf", "prs",
|
|
"tsf", "taj",
|
|
"uok", "ema",
|
|
"xba", "cax",
|
|
"xia", "acn",
|
|
"xkh", "waw",
|
|
"xsj", "suj",
|
|
"ybd", "rki",
|
|
"yma", "lrr",
|
|
"ymt", "mtm",
|
|
"yos", "zom",
|
|
"yuu", "yug",
|
|
};
|
|
|
|
/*
|
|
Updated on 2018-04-24 from
|
|
|
|
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
|
|
grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
|
|
grep -B1 'Preferred' | \
|
|
awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
|
|
*/
|
|
static const char DEPRECATEDREGIONS[][3] = {
|
|
/* deprecated new */
|
|
"BU", "MM",
|
|
"DD", "DE",
|
|
"FX", "FR",
|
|
"TP", "TL",
|
|
"YD", "YE",
|
|
"ZR", "CD",
|
|
};
|
|
|
|
/*
|
|
* -------------------------------------------------
|
|
*
|
|
* These ultag_ functions may be exposed as APIs later
|
|
*
|
|
* -------------------------------------------------
|
|
*/
|
|
|
|
static ULanguageTag*
|
|
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
|
|
|
|
static void
|
|
ultag_close(ULanguageTag* langtag);
|
|
|
|
static const char*
|
|
ultag_getLanguage(const ULanguageTag* langtag);
|
|
|
|
#if 0
|
|
static const char*
|
|
ultag_getJDKLanguage(const ULanguageTag* langtag);
|
|
#endif
|
|
|
|
static const char*
|
|
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
|
|
|
|
static int32_t
|
|
ultag_getExtlangSize(const ULanguageTag* langtag);
|
|
|
|
static const char*
|
|
ultag_getScript(const ULanguageTag* langtag);
|
|
|
|
static const char*
|
|
ultag_getRegion(const ULanguageTag* langtag);
|
|
|
|
static const char*
|
|
ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
|
|
|
|
static int32_t
|
|
ultag_getVariantsSize(const ULanguageTag* langtag);
|
|
|
|
static const char*
|
|
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
|
|
|
|
static const char*
|
|
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
|
|
|
|
static int32_t
|
|
ultag_getExtensionsSize(const ULanguageTag* langtag);
|
|
|
|
static const char*
|
|
ultag_getPrivateUse(const ULanguageTag* langtag);
|
|
|
|
#if 0
|
|
static const char*
|
|
ultag_getLegacy(const ULanguageTag* langtag);
|
|
#endif
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
/**
|
|
* \class LocalULanguageTagPointer
|
|
* "Smart pointer" class, closes a ULanguageTag via ultag_close().
|
|
* For most methods see the LocalPointerBase base class.
|
|
*
|
|
* @see LocalPointerBase
|
|
* @see LocalPointer
|
|
* @internal
|
|
*/
|
|
U_DEFINE_LOCAL_OPEN_POINTER(LocalULanguageTagPointer, ULanguageTag, ultag_close);
|
|
|
|
U_NAMESPACE_END
|
|
|
|
/*
|
|
* -------------------------------------------------
|
|
*
|
|
* Language subtag syntax validation functions
|
|
*
|
|
* -------------------------------------------------
|
|
*/
|
|
|
|
static UBool
|
|
_isAlphaString(const char* s, int32_t len) {
|
|
int32_t i;
|
|
for (i = 0; i < len; i++) {
|
|
if (!ISALPHA(*(s + i))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static UBool
|
|
_isNumericString(const char* s, int32_t len) {
|
|
int32_t i;
|
|
for (i = 0; i < len; i++) {
|
|
if (!ISNUMERIC(*(s + i))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static UBool
|
|
_isAlphaNumericString(const char* s, int32_t len) {
|
|
int32_t i;
|
|
for (i = 0; i < len; i++) {
|
|
if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static UBool
|
|
_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (len >= min && len <= max && _isAlphaNumericString(s, len)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isLanguageSubtag(const char* s, int32_t len) {
|
|
/*
|
|
* unicode_language_subtag = alpha{2,3} | alpha{5,8};
|
|
* NOTE: Per ICUTC 2019/01/23- accepting alpha 4
|
|
* See ICU-20372
|
|
*/
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static UBool
|
|
_isExtlangSubtag(const char* s, int32_t len) {
|
|
/*
|
|
* extlang = 3ALPHA ; selected ISO 639 codes
|
|
* *2("-" 3ALPHA) ; permanently reserved
|
|
*/
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (len == 3 && _isAlphaString(s, len)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isScriptSubtag(const char* s, int32_t len) {
|
|
/*
|
|
* script = 4ALPHA ; ISO 15924 code
|
|
*/
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (len == 4 && _isAlphaString(s, len)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isRegionSubtag(const char* s, int32_t len) {
|
|
/*
|
|
* region = 2ALPHA ; ISO 3166-1 code
|
|
* / 3DIGIT ; UN M.49 code
|
|
*/
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (len == 2 && _isAlphaString(s, len)) {
|
|
return true;
|
|
}
|
|
if (len == 3 && _isNumericString(s, len)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static UBool
|
|
_isVariantSubtag(const char* s, int32_t len) {
|
|
/*
|
|
* variant = 5*8alphanum ; registered variants
|
|
* / (DIGIT 3alphanum)
|
|
*/
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) {
|
|
return true;
|
|
}
|
|
if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static UBool
|
|
_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) {
|
|
const char *p = s;
|
|
const char *pSubtag = nullptr;
|
|
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
|
|
while ((p - s) < len) {
|
|
if (*p == SEP) {
|
|
if (pSubtag == nullptr) {
|
|
return false;
|
|
}
|
|
if (!test(pSubtag, (int32_t)(p - pSubtag))) {
|
|
return false;
|
|
}
|
|
pSubtag = nullptr;
|
|
} else if (pSubtag == nullptr) {
|
|
pSubtag = p;
|
|
}
|
|
p++;
|
|
}
|
|
if (pSubtag == nullptr) {
|
|
return false;
|
|
}
|
|
return test(pSubtag, (int32_t)(p - pSubtag));
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isVariantSubtags(const char* s, int32_t len) {
|
|
return _isSepListOf(&_isVariantSubtag, s, len);
|
|
}
|
|
|
|
// This is for the ICU-specific "lvariant" handling.
|
|
static UBool
|
|
_isPrivateuseVariantSubtag(const char* s, int32_t len) {
|
|
/*
|
|
* variant = 1*8alphanum ; registered variants
|
|
* / (DIGIT 3alphanum)
|
|
*/
|
|
return _isAlphaNumericStringLimitedLength(s, len , 1, 8);
|
|
}
|
|
|
|
static UBool
|
|
_isExtensionSingleton(const char* s, int32_t len) {
|
|
/*
|
|
* extension = singleton 1*("-" (2*8alphanum))
|
|
*
|
|
* singleton = DIGIT ; 0 - 9
|
|
* / %x41-57 ; A - W
|
|
* / %x59-5A ; Y - Z
|
|
* / %x61-77 ; a - w
|
|
* / %x79-7A ; y - z
|
|
*/
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (len == 1 && (ISALPHA(*s) || ISNUMERIC(*s)) && (uprv_tolower(*s) != PRIVATEUSE)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static UBool
|
|
_isExtensionSubtag(const char* s, int32_t len) {
|
|
/*
|
|
* extension = singleton 1*("-" (2*8alphanum))
|
|
*/
|
|
return _isAlphaNumericStringLimitedLength(s, len, 2, 8);
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isExtensionSubtags(const char* s, int32_t len) {
|
|
return _isSepListOf(&_isExtensionSubtag, s, len);
|
|
}
|
|
|
|
static UBool
|
|
_isPrivateuseValueSubtag(const char* s, int32_t len) {
|
|
/*
|
|
* privateuse = "x" 1*("-" (1*8alphanum))
|
|
*/
|
|
return _isAlphaNumericStringLimitedLength(s, len, 1, 8);
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {
|
|
return _isSepListOf(&_isPrivateuseValueSubtag, s, len);
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {
|
|
/*
|
|
* attribute = alphanum{3,8} ;
|
|
*/
|
|
return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {
|
|
return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len);
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
|
|
/*
|
|
* key = alphanum alpha ;
|
|
*/
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (len == 2 && (ISALPHA(*s) || ISNUMERIC(*s)) && ISALPHA(s[1])) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {
|
|
/*
|
|
* alphanum{3,8}
|
|
*/
|
|
return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isUnicodeLocaleType(const char*s, int32_t len) {
|
|
/*
|
|
* type = alphanum{3,8} (sep alphanum{3,8})* ;
|
|
*/
|
|
return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len);
|
|
}
|
|
|
|
static UBool
|
|
_isTKey(const char* s, int32_t len)
|
|
{
|
|
/*
|
|
* tkey = alpha digit ;
|
|
*/
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
U_CAPI const char * U_EXPORT2
|
|
ultag_getTKeyStart(const char *localeID) {
|
|
const char *result = localeID;
|
|
const char *sep;
|
|
while((sep = uprv_strchr(result, SEP)) != nullptr) {
|
|
if (_isTKey(result, static_cast<int32_t>(sep - result))) {
|
|
return result;
|
|
}
|
|
result = ++sep;
|
|
}
|
|
if (_isTKey(result, -1)) {
|
|
return result;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
static UBool
|
|
_isTValue(const char* s, int32_t len)
|
|
{
|
|
/*
|
|
* tvalue = (sep alphanum{3,8})+ ;
|
|
*/
|
|
return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
|
|
}
|
|
|
|
static UBool
|
|
_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
|
|
{
|
|
const int32_t kStart = 0; // Start, wait for unicode_language_subtag, tkey or end
|
|
const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag,
|
|
// unicode_region_subtag, unicode_variant_subtag, tkey or end
|
|
const int32_t kGotScript = 2; // Got unicode_script_subtag, wait for unicode_region_subtag,
|
|
// unicode_variant_subtag, tkey, or end
|
|
const int32_t kGotRegion = 3; // Got unicode_region_subtag, wait for unicode_variant_subtag,
|
|
// tkey, or end.
|
|
const int32_t kGotVariant = 4; // Got unicode_variant_subtag, wait for unicode_variant_subtag
|
|
// tkey or end.
|
|
const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
|
|
const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
|
|
|
|
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
switch (state) {
|
|
case kStart:
|
|
if (ultag_isLanguageSubtag(s, len) && len != 4) {
|
|
state = kGotLanguage;
|
|
return true;
|
|
}
|
|
if (_isTKey(s, len)) {
|
|
state = kGotTKey;
|
|
return true;
|
|
}
|
|
return false;
|
|
case kGotLanguage:
|
|
if (ultag_isScriptSubtag(s, len)) {
|
|
state = kGotScript;
|
|
return true;
|
|
}
|
|
U_FALLTHROUGH;
|
|
case kGotScript:
|
|
if (ultag_isRegionSubtag(s, len)) {
|
|
state = kGotRegion;
|
|
return true;
|
|
}
|
|
U_FALLTHROUGH;
|
|
case kGotRegion:
|
|
U_FALLTHROUGH;
|
|
case kGotVariant:
|
|
if (_isVariantSubtag(s, len)) {
|
|
state = kGotVariant;
|
|
return true;
|
|
}
|
|
if (_isTKey(s, len)) {
|
|
state = kGotTKey;
|
|
return true;
|
|
}
|
|
return false;
|
|
case kGotTKey:
|
|
if (_isTValue(s, len)) {
|
|
state = kGotTValue;
|
|
return true;
|
|
}
|
|
return false;
|
|
case kGotTValue:
|
|
if (_isTKey(s, len)) {
|
|
state = kGotTKey;
|
|
return true;
|
|
}
|
|
if (_isTValue(s, len)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static UBool
|
|
_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
|
|
{
|
|
const int32_t kStart = 0; // Start, wait for a key or attribute or end
|
|
const int32_t kGotKey = 1; // Got a key, wait for type or key or end
|
|
const int32_t kGotType = 2; // Got a type, wait for key or end
|
|
|
|
switch (state) {
|
|
case kStart:
|
|
if (ultag_isUnicodeLocaleKey(s, len)) {
|
|
state = kGotKey;
|
|
return true;
|
|
}
|
|
if (ultag_isUnicodeLocaleAttribute(s, len)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
case kGotKey:
|
|
if (ultag_isUnicodeLocaleKey(s, len)) {
|
|
return true;
|
|
}
|
|
if (_isUnicodeLocaleTypeSubtag(s, len)) {
|
|
state = kGotType;
|
|
return true;
|
|
}
|
|
return false;
|
|
case kGotType:
|
|
if (ultag_isUnicodeLocaleKey(s, len)) {
|
|
state = kGotKey;
|
|
return true;
|
|
}
|
|
if (_isUnicodeLocaleTypeSubtag(s, len)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static UBool
|
|
_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
|
|
{
|
|
int32_t state = 0;
|
|
const char* p;
|
|
const char* start = s;
|
|
int32_t subtagLen = 0;
|
|
|
|
if (len < 0) {
|
|
len = (int32_t)uprv_strlen(s);
|
|
}
|
|
|
|
for (p = s; len > 0; p++, len--) {
|
|
if (*p == SEP) {
|
|
if (!test(state, start, subtagLen)) {
|
|
return false;
|
|
}
|
|
subtagLen = 0;
|
|
start = p + 1;
|
|
} else {
|
|
subtagLen++;
|
|
}
|
|
}
|
|
|
|
if (test(state, start, subtagLen) && state >= 0) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
|
|
{
|
|
return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len);
|
|
}
|
|
|
|
U_CFUNC UBool
|
|
ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {
|
|
return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len);
|
|
}
|
|
|
|
|
|
/*
|
|
* -------------------------------------------------
|
|
*
|
|
* Helper functions
|
|
*
|
|
* -------------------------------------------------
|
|
*/
|
|
|
|
static UBool
|
|
_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
|
|
UBool bAdded = true;
|
|
|
|
if (*first == nullptr) {
|
|
var->next = nullptr;
|
|
*first = var;
|
|
} else {
|
|
VariantListEntry *prev, *cur;
|
|
int32_t cmp;
|
|
|
|
/* variants order should be preserved */
|
|
prev = nullptr;
|
|
cur = *first;
|
|
while (true) {
|
|
if (cur == nullptr) {
|
|
prev->next = var;
|
|
var->next = nullptr;
|
|
break;
|
|
}
|
|
|
|
/* Checking for duplicate variant */
|
|
cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
|
|
if (cmp == 0) {
|
|
/* duplicated variant */
|
|
bAdded = false;
|
|
break;
|
|
}
|
|
prev = cur;
|
|
cur = cur->next;
|
|
}
|
|
}
|
|
|
|
return bAdded;
|
|
}
|
|
|
|
static UBool
|
|
_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
|
|
UBool bAdded = true;
|
|
|
|
if (*first == nullptr) {
|
|
attr->next = nullptr;
|
|
*first = attr;
|
|
} else {
|
|
AttributeListEntry *prev, *cur;
|
|
int32_t cmp;
|
|
|
|
/* reorder variants in alphabetical order */
|
|
prev = nullptr;
|
|
cur = *first;
|
|
while (true) {
|
|
if (cur == nullptr) {
|
|
prev->next = attr;
|
|
attr->next = nullptr;
|
|
break;
|
|
}
|
|
cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
|
|
if (cmp < 0) {
|
|
if (prev == nullptr) {
|
|
*first = attr;
|
|
} else {
|
|
prev->next = attr;
|
|
}
|
|
attr->next = cur;
|
|
break;
|
|
}
|
|
if (cmp == 0) {
|
|
/* duplicated variant */
|
|
bAdded = false;
|
|
break;
|
|
}
|
|
prev = cur;
|
|
cur = cur->next;
|
|
}
|
|
}
|
|
|
|
return bAdded;
|
|
}
|
|
|
|
|
|
static UBool
|
|
_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
|
|
UBool bAdded = true;
|
|
|
|
if (*first == nullptr) {
|
|
ext->next = nullptr;
|
|
*first = ext;
|
|
} else {
|
|
ExtensionListEntry *prev, *cur;
|
|
int32_t cmp;
|
|
|
|
/* reorder variants in alphabetical order */
|
|
prev = nullptr;
|
|
cur = *first;
|
|
while (true) {
|
|
if (cur == nullptr) {
|
|
prev->next = ext;
|
|
ext->next = nullptr;
|
|
break;
|
|
}
|
|
if (localeToBCP) {
|
|
/* special handling for locale to bcp conversion */
|
|
int32_t len, curlen;
|
|
|
|
len = (int32_t)uprv_strlen(ext->key);
|
|
curlen = (int32_t)uprv_strlen(cur->key);
|
|
|
|
if (len == 1 && curlen == 1) {
|
|
if (*(ext->key) == *(cur->key)) {
|
|
cmp = 0;
|
|
} else if (*(ext->key) == PRIVATEUSE) {
|
|
cmp = 1;
|
|
} else if (*(cur->key) == PRIVATEUSE) {
|
|
cmp = -1;
|
|
} else {
|
|
cmp = *(ext->key) - *(cur->key);
|
|
}
|
|
} else if (len == 1) {
|
|
cmp = *(ext->key) - LDMLEXT;
|
|
} else if (curlen == 1) {
|
|
cmp = LDMLEXT - *(cur->key);
|
|
} else {
|
|
cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
|
|
/* Both are u extension keys - we need special handling for 'attribute' */
|
|
if (cmp != 0) {
|
|
if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
|
|
cmp = 1;
|
|
} else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
|
|
cmp = -1;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
|
|
}
|
|
if (cmp < 0) {
|
|
if (prev == nullptr) {
|
|
*first = ext;
|
|
} else {
|
|
prev->next = ext;
|
|
}
|
|
ext->next = cur;
|
|
break;
|
|
}
|
|
if (cmp == 0) {
|
|
/* duplicated extension key */
|
|
bAdded = false;
|
|
break;
|
|
}
|
|
prev = cur;
|
|
cur = cur->next;
|
|
}
|
|
}
|
|
|
|
return bAdded;
|
|
}
|
|
|
|
static void
|
|
_initializeULanguageTag(ULanguageTag* langtag) {
|
|
int32_t i;
|
|
|
|
langtag->buf = nullptr;
|
|
|
|
langtag->language = EMPTY;
|
|
for (i = 0; i < MAXEXTLANG; i++) {
|
|
langtag->extlang[i] = nullptr;
|
|
}
|
|
|
|
langtag->script = EMPTY;
|
|
langtag->region = EMPTY;
|
|
|
|
langtag->variants = nullptr;
|
|
langtag->extensions = nullptr;
|
|
|
|
langtag->legacy = EMPTY;
|
|
langtag->privateuse = EMPTY;
|
|
}
|
|
|
|
static void
|
|
_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
|
|
char buf[ULOC_LANG_CAPACITY];
|
|
UErrorCode tmpStatus = U_ZERO_ERROR;
|
|
int32_t len, i;
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
|
|
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
len = 0;
|
|
}
|
|
|
|
/* Note: returned language code is in lower case letters */
|
|
|
|
if (len == 0) {
|
|
sink.Append(LANG_UND, LANG_UND_LEN);
|
|
} else if (!ultag_isLanguageSubtag(buf, len)) {
|
|
/* invalid language code */
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
sink.Append(LANG_UND, LANG_UND_LEN);
|
|
} else {
|
|
/* resolve deprecated */
|
|
for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
|
|
// 2-letter deprecated subtags are listede before 3-letter
|
|
// ones in DEPRECATEDLANGS[]. Get out of loop on coming
|
|
// across the 1st 3-letter subtag, if the input is a 2-letter code.
|
|
// to avoid continuing to try when there's no match.
|
|
if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
|
|
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
|
|
uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
|
|
len = (int32_t)uprv_strlen(buf);
|
|
break;
|
|
}
|
|
}
|
|
sink.Append(buf, len);
|
|
}
|
|
}
|
|
|
|
static void
|
|
_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
|
|
char buf[ULOC_SCRIPT_CAPACITY];
|
|
UErrorCode tmpStatus = U_ZERO_ERROR;
|
|
int32_t len;
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
|
|
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (len > 0) {
|
|
if (!ultag_isScriptSubtag(buf, len)) {
|
|
/* invalid script code */
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
return;
|
|
} else {
|
|
sink.Append("-", 1);
|
|
sink.Append(buf, len);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
|
|
char buf[ULOC_COUNTRY_CAPACITY];
|
|
UErrorCode tmpStatus = U_ZERO_ERROR;
|
|
int32_t len;
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
|
|
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (len > 0) {
|
|
if (!ultag_isRegionSubtag(buf, len)) {
|
|
/* invalid region code */
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
return;
|
|
} else {
|
|
sink.Append("-", 1);
|
|
/* resolve deprecated */
|
|
for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
|
|
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
|
|
uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
|
|
len = (int32_t)uprv_strlen(buf);
|
|
break;
|
|
}
|
|
}
|
|
sink.Append(buf, len);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void _sortVariants(VariantListEntry* first) {
|
|
for (VariantListEntry* var1 = first; var1 != nullptr; var1 = var1->next) {
|
|
for (VariantListEntry* var2 = var1->next; var2 != nullptr; var2 = var2->next) {
|
|
// Swap var1->variant and var2->variant.
|
|
if (uprv_compareInvCharsAsAscii(var1->variant, var2->variant) > 0) {
|
|
const char* temp = var1->variant;
|
|
var1->variant = var2->variant;
|
|
var2->variant = temp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) {
|
|
char buf[ULOC_FULLNAME_CAPACITY];
|
|
UErrorCode tmpStatus = U_ZERO_ERROR;
|
|
int32_t len, i;
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
|
|
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (len > 0) {
|
|
char *p, *pVar;
|
|
UBool bNext = true;
|
|
VariantListEntry *var;
|
|
VariantListEntry *varFirst = nullptr;
|
|
|
|
pVar = nullptr;
|
|
p = buf;
|
|
while (bNext) {
|
|
if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
|
|
if (*p == 0) {
|
|
bNext = false;
|
|
} else {
|
|
*p = 0; /* terminate */
|
|
}
|
|
if (pVar == nullptr) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
/* ignore empty variant */
|
|
} else {
|
|
/* ICU uses upper case letters for variants, but
|
|
the canonical format is lowercase in BCP47 */
|
|
for (i = 0; *(pVar + i) != 0; i++) {
|
|
*(pVar + i) = uprv_tolower(*(pVar + i));
|
|
}
|
|
|
|
/* validate */
|
|
if (_isVariantSubtag(pVar, -1)) {
|
|
if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
|
|
/* emit the variant to the list */
|
|
var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
|
|
if (var == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
break;
|
|
}
|
|
var->variant = pVar;
|
|
if (!_addVariantToList(&varFirst, var)) {
|
|
/* duplicated variant */
|
|
uprv_free(var);
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
/* Special handling for POSIX variant, need to remember that we had it and then */
|
|
/* treat it like an extension later. */
|
|
*hadPosix = true;
|
|
}
|
|
} else if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
} else if (_isPrivateuseValueSubtag(pVar, -1)) {
|
|
/* Handle private use subtags separately */
|
|
break;
|
|
}
|
|
}
|
|
/* reset variant starting position */
|
|
pVar = nullptr;
|
|
} else if (pVar == nullptr) {
|
|
pVar = p;
|
|
}
|
|
p++;
|
|
}
|
|
|
|
if (U_SUCCESS(*status)) {
|
|
if (varFirst != nullptr) {
|
|
int32_t varLen;
|
|
|
|
/* per UTS35, we should sort the variants */
|
|
_sortVariants(varFirst);
|
|
|
|
/* write out validated/normalized variants to the target */
|
|
var = varFirst;
|
|
while (var != nullptr) {
|
|
sink.Append("-", 1);
|
|
varLen = (int32_t)uprv_strlen(var->variant);
|
|
sink.Append(var->variant, varLen);
|
|
var = var->next;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* clean up */
|
|
var = varFirst;
|
|
while (var != nullptr) {
|
|
VariantListEntry *tmpVar = var->next;
|
|
uprv_free(var);
|
|
var = tmpVar;
|
|
}
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
|
|
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
|
|
int32_t attrBufLength = 0;
|
|
|
|
icu::MemoryPool<AttributeListEntry> attrPool;
|
|
icu::MemoryPool<ExtensionListEntry> extPool;
|
|
icu::MemoryPool<icu::CharString> strPool;
|
|
|
|
icu::LocalUEnumerationPointer keywordEnum(uloc_openKeywords(localeID, status));
|
|
if (U_FAILURE(*status) && !hadPosix) {
|
|
return;
|
|
}
|
|
if (keywordEnum.isValid() || hadPosix) {
|
|
/* reorder extensions */
|
|
int32_t len;
|
|
const char *key;
|
|
ExtensionListEntry *firstExt = nullptr;
|
|
ExtensionListEntry *ext;
|
|
AttributeListEntry *firstAttr = nullptr;
|
|
AttributeListEntry *attr;
|
|
icu::MemoryPool<icu::CharString> extBufPool;
|
|
const char *bcpKey=nullptr, *bcpValue=nullptr;
|
|
UErrorCode tmpStatus = U_ZERO_ERROR;
|
|
int32_t keylen;
|
|
UBool isBcpUExt;
|
|
|
|
while (true) {
|
|
key = uenum_next(keywordEnum.getAlias(), nullptr, status);
|
|
if (key == nullptr) {
|
|
break;
|
|
}
|
|
|
|
icu::CharString buf;
|
|
{
|
|
icu::CharStringByteSink sink(&buf);
|
|
ulocimp_getKeywordValue(localeID, key, sink, &tmpStatus);
|
|
}
|
|
len = buf.length();
|
|
|
|
if (U_FAILURE(tmpStatus)) {
|
|
if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
break;
|
|
}
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
/* ignore this keyword */
|
|
tmpStatus = U_ZERO_ERROR;
|
|
continue;
|
|
}
|
|
|
|
keylen = (int32_t)uprv_strlen(key);
|
|
isBcpUExt = (keylen > 1);
|
|
|
|
/* special keyword used for representing Unicode locale attributes */
|
|
if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
|
|
if (len > 0) {
|
|
int32_t i = 0;
|
|
while (true) {
|
|
attrBufLength = 0;
|
|
for (; i < len; i++) {
|
|
if (buf[i] != '-') {
|
|
if (static_cast<size_t>(attrBufLength) < sizeof(attrBuf)) {
|
|
attrBuf[attrBufLength++] = buf[i];
|
|
} else {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
} else {
|
|
i++;
|
|
break;
|
|
}
|
|
}
|
|
if (attrBufLength > 0) {
|
|
if (static_cast<size_t>(attrBufLength) < sizeof(attrBuf)) {
|
|
attrBuf[attrBufLength] = 0;
|
|
} else {
|
|
*status = U_STRING_NOT_TERMINATED_WARNING;
|
|
}
|
|
|
|
} else if (i >= len){
|
|
break;
|
|
}
|
|
|
|
/* create AttributeListEntry */
|
|
attr = attrPool.create();
|
|
if (attr == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
break;
|
|
}
|
|
icu::CharString* attrValue =
|
|
strPool.create(attrBuf, attrBufLength, *status);
|
|
if (attrValue == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
break;
|
|
}
|
|
if (U_FAILURE(*status)) {
|
|
break;
|
|
}
|
|
attr->attribute = attrValue->data();
|
|
|
|
if (!_addAttributeToList(&firstAttr, attr)) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/* for a place holder ExtensionListEntry */
|
|
bcpKey = LOCALE_ATTRIBUTE_KEY;
|
|
bcpValue = nullptr;
|
|
}
|
|
} else if (isBcpUExt) {
|
|
bcpKey = uloc_toUnicodeLocaleKey(key);
|
|
if (bcpKey == nullptr) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* we've checked buf is null-terminated above */
|
|
bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
|
|
if (bcpValue == nullptr) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
if (bcpValue == buf.data()) {
|
|
/*
|
|
When uloc_toUnicodeLocaleType(key, buf) returns the
|
|
input value as is, the value is well-formed, but has
|
|
no known mapping. This implementation normalizes the
|
|
value to lower case
|
|
*/
|
|
icu::CharString* extBuf = extBufPool.create(buf, tmpStatus);
|
|
|
|
if (extBuf == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
break;
|
|
}
|
|
if (U_FAILURE(tmpStatus)) {
|
|
*status = tmpStatus;
|
|
break;
|
|
}
|
|
|
|
T_CString_toLowerCase(extBuf->data());
|
|
bcpValue = extBuf->data();
|
|
}
|
|
} else {
|
|
if (*key == PRIVATEUSE) {
|
|
if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
} else {
|
|
if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
bcpKey = key;
|
|
icu::CharString* extBuf =
|
|
extBufPool.create(buf.data(), len, tmpStatus);
|
|
if (extBuf == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
break;
|
|
}
|
|
if (U_FAILURE(tmpStatus)) {
|
|
*status = tmpStatus;
|
|
break;
|
|
}
|
|
bcpValue = extBuf->data();
|
|
}
|
|
|
|
/* create ExtensionListEntry */
|
|
ext = extPool.create();
|
|
if (ext == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
break;
|
|
}
|
|
ext->key = bcpKey;
|
|
ext->value = bcpValue;
|
|
|
|
if (!_addExtensionToList(&firstExt, ext, true)) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Special handling for POSIX variant - add the keywords for POSIX */
|
|
if (hadPosix) {
|
|
/* create ExtensionListEntry for POSIX */
|
|
ext = extPool.create();
|
|
if (ext == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
ext->key = POSIX_KEY;
|
|
ext->value = POSIX_VALUE;
|
|
|
|
if (!_addExtensionToList(&firstExt, ext, true)) {
|
|
// Silently ignore errors.
|
|
}
|
|
}
|
|
|
|
if (U_SUCCESS(*status) && (firstExt != nullptr || firstAttr != nullptr)) {
|
|
UBool startLDMLExtension = false;
|
|
for (ext = firstExt; ext; ext = ext->next) {
|
|
if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
|
|
/* first LDML u singlton extension */
|
|
sink.Append("-u", 2);
|
|
startLDMLExtension = true;
|
|
}
|
|
|
|
/* write out the sorted BCP47 attributes, extensions and private use */
|
|
if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
|
|
/* write the value for the attributes */
|
|
for (attr = firstAttr; attr; attr = attr->next) {
|
|
sink.Append("-", 1);
|
|
sink.Append(
|
|
attr->attribute, static_cast<int32_t>(uprv_strlen(attr->attribute)));
|
|
}
|
|
} else {
|
|
sink.Append("-", 1);
|
|
sink.Append(ext->key, static_cast<int32_t>(uprv_strlen(ext->key)));
|
|
if (uprv_strcmp(ext->value, "true") != 0 &&
|
|
uprv_strcmp(ext->value, "yes") != 0) {
|
|
sink.Append("-", 1);
|
|
sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Append keywords parsed from LDML extension value
|
|
* e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
|
|
* Note: char* buf is used for storing keywords
|
|
*/
|
|
static void
|
|
_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, UBool *posixVariant, UErrorCode *status) {
|
|
const char *pTag; /* beginning of current subtag */
|
|
const char *pKwds; /* beginning of key-type pairs */
|
|
UBool variantExists = *posixVariant;
|
|
|
|
ExtensionListEntry *kwdFirst = nullptr; /* first LDML keyword */
|
|
ExtensionListEntry *kwd, *nextKwd;
|
|
|
|
int32_t len;
|
|
|
|
/* Reset the posixVariant value */
|
|
*posixVariant = false;
|
|
|
|
pTag = ldmlext;
|
|
pKwds = nullptr;
|
|
|
|
{
|
|
AttributeListEntry *attrFirst = nullptr; /* first attribute */
|
|
AttributeListEntry *attr, *nextAttr;
|
|
|
|
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
|
|
int32_t attrBufIdx = 0;
|
|
|
|
icu::MemoryPool<AttributeListEntry> attrPool;
|
|
|
|
/* Iterate through u extension attributes */
|
|
while (*pTag) {
|
|
/* locate next separator char */
|
|
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
|
|
|
|
if (ultag_isUnicodeLocaleKey(pTag, len)) {
|
|
pKwds = pTag;
|
|
break;
|
|
}
|
|
|
|
/* add this attribute to the list */
|
|
attr = attrPool.create();
|
|
if (attr == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
|
|
if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
|
|
uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
|
|
attrBuf[attrBufIdx + len] = 0;
|
|
attr->attribute = &attrBuf[attrBufIdx];
|
|
attrBufIdx += (len + 1);
|
|
} else {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
|
|
// duplicate attribute is ignored, causes no error.
|
|
_addAttributeToList(&attrFirst, attr);
|
|
|
|
/* next tag */
|
|
pTag += len;
|
|
if (*pTag) {
|
|
/* next to the separator */
|
|
pTag++;
|
|
}
|
|
}
|
|
|
|
if (attrFirst) {
|
|
/* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
|
|
|
|
kwd = extPool.create();
|
|
if (kwd == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
|
|
icu::CharString* value = kwdBuf.create();
|
|
if (value == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
|
|
/* attribute subtags sorted in alphabetical order as type */
|
|
attr = attrFirst;
|
|
while (attr != nullptr) {
|
|
nextAttr = attr->next;
|
|
if (attr != attrFirst) {
|
|
value->append('-', *status);
|
|
}
|
|
value->append(attr->attribute, *status);
|
|
attr = nextAttr;
|
|
}
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
kwd->key = LOCALE_ATTRIBUTE_KEY;
|
|
kwd->value = value->data();
|
|
|
|
if (!_addExtensionToList(&kwdFirst, kwd, false)) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pKwds) {
|
|
const char *pBcpKey = nullptr; /* u extension key subtag */
|
|
const char *pBcpType = nullptr; /* beginning of u extension type subtag(s) */
|
|
int32_t bcpKeyLen = 0;
|
|
int32_t bcpTypeLen = 0;
|
|
UBool isDone = false;
|
|
|
|
pTag = pKwds;
|
|
/* BCP47 representation of LDML key/type pairs */
|
|
while (!isDone) {
|
|
const char *pNextBcpKey = nullptr;
|
|
int32_t nextBcpKeyLen = 0;
|
|
UBool emitKeyword = false;
|
|
|
|
if (*pTag) {
|
|
/* locate next separator char */
|
|
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
|
|
|
|
if (ultag_isUnicodeLocaleKey(pTag, len)) {
|
|
if (pBcpKey) {
|
|
emitKeyword = true;
|
|
pNextBcpKey = pTag;
|
|
nextBcpKeyLen = len;
|
|
} else {
|
|
pBcpKey = pTag;
|
|
bcpKeyLen = len;
|
|
}
|
|
} else {
|
|
U_ASSERT(pBcpKey != nullptr);
|
|
/* within LDML type subtags */
|
|
if (pBcpType) {
|
|
bcpTypeLen += (len + 1);
|
|
} else {
|
|
pBcpType = pTag;
|
|
bcpTypeLen = len;
|
|
}
|
|
}
|
|
|
|
/* next tag */
|
|
pTag += len;
|
|
if (*pTag) {
|
|
/* next to the separator */
|
|
pTag++;
|
|
}
|
|
} else {
|
|
/* processing last one */
|
|
emitKeyword = true;
|
|
isDone = true;
|
|
}
|
|
|
|
if (emitKeyword) {
|
|
const char *pKey = nullptr; /* LDML key */
|
|
const char *pType = nullptr; /* LDML type */
|
|
|
|
char bcpKeyBuf[3]; /* BCP key length is always 2 for now */
|
|
|
|
U_ASSERT(pBcpKey != nullptr);
|
|
|
|
if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
|
|
/* the BCP key is invalid */
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
U_ASSERT(bcpKeyLen <= 2);
|
|
|
|
uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
|
|
bcpKeyBuf[bcpKeyLen] = 0;
|
|
|
|
/* u extension key to LDML key */
|
|
pKey = uloc_toLegacyKey(bcpKeyBuf);
|
|
if (pKey == nullptr) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
if (pKey == bcpKeyBuf) {
|
|
/*
|
|
The key returned by toLegacyKey points to the input buffer.
|
|
We normalize the result key to lower case.
|
|
*/
|
|
T_CString_toLowerCase(bcpKeyBuf);
|
|
icu::CharString* key = kwdBuf.create(bcpKeyBuf, bcpKeyLen, *status);
|
|
if (key == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
pKey = key->data();
|
|
}
|
|
|
|
if (pBcpType) {
|
|
char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
|
|
if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
|
|
/* the BCP type is too long */
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
|
|
uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
|
|
bcpTypeBuf[bcpTypeLen] = 0;
|
|
|
|
/* BCP type to locale type */
|
|
pType = uloc_toLegacyType(pKey, bcpTypeBuf);
|
|
if (pType == nullptr) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
if (pType == bcpTypeBuf) {
|
|
/*
|
|
The type returned by toLegacyType points to the input buffer.
|
|
We normalize the result type to lower case.
|
|
*/
|
|
/* normalize to lower case */
|
|
T_CString_toLowerCase(bcpTypeBuf);
|
|
icu::CharString* type = kwdBuf.create(bcpTypeBuf, bcpTypeLen, *status);
|
|
if (type == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
pType = type->data();
|
|
}
|
|
} else {
|
|
/* typeless - default type value is "yes" */
|
|
pType = LOCALE_TYPE_YES;
|
|
}
|
|
|
|
/* Special handling for u-va-posix, since we want to treat this as a variant,
|
|
not as a keyword */
|
|
if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
|
|
*posixVariant = true;
|
|
} else {
|
|
/* create an ExtensionListEntry for this keyword */
|
|
kwd = extPool.create();
|
|
if (kwd == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
|
|
kwd->key = pKey;
|
|
kwd->value = pType;
|
|
|
|
if (!_addExtensionToList(&kwdFirst, kwd, false)) {
|
|
// duplicate keyword is allowed, Only the first
|
|
// is honored.
|
|
}
|
|
}
|
|
|
|
pBcpKey = pNextBcpKey;
|
|
bcpKeyLen = pNextBcpKey != nullptr ? nextBcpKeyLen : 0;
|
|
pBcpType = nullptr;
|
|
bcpTypeLen = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
kwd = kwdFirst;
|
|
while (kwd != nullptr) {
|
|
nextKwd = kwd->next;
|
|
_addExtensionToList(appendTo, kwd, false);
|
|
kwd = nextKwd;
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {
|
|
int32_t i, n;
|
|
int32_t len;
|
|
ExtensionListEntry *kwdFirst = nullptr;
|
|
ExtensionListEntry *kwd;
|
|
const char *key, *type;
|
|
icu::MemoryPool<ExtensionListEntry> extPool;
|
|
icu::MemoryPool<icu::CharString> kwdBuf;
|
|
UBool posixVariant = false;
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
n = ultag_getExtensionsSize(langtag);
|
|
|
|
/* resolve locale keywords and reordering keys */
|
|
for (i = 0; i < n; i++) {
|
|
key = ultag_getExtensionKey(langtag, i);
|
|
type = ultag_getExtensionValue(langtag, i);
|
|
if (*key == LDMLEXT) {
|
|
/* Determine if variants already exists */
|
|
if (ultag_getVariantsSize(langtag)) {
|
|
posixVariant = true;
|
|
}
|
|
|
|
_appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
|
|
if (U_FAILURE(*status)) {
|
|
break;
|
|
}
|
|
} else {
|
|
kwd = extPool.create();
|
|
if (kwd == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
break;
|
|
}
|
|
kwd->key = key;
|
|
kwd->value = type;
|
|
if (!_addExtensionToList(&kwdFirst, kwd, false)) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (U_SUCCESS(*status)) {
|
|
type = ultag_getPrivateUse(langtag);
|
|
if ((int32_t)uprv_strlen(type) > 0) {
|
|
/* add private use as a keyword */
|
|
kwd = extPool.create();
|
|
if (kwd == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
} else {
|
|
kwd->key = PRIVATEUSE_KEY;
|
|
kwd->value = type;
|
|
if (!_addExtensionToList(&kwdFirst, kwd, false)) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If a POSIX variant was in the extensions, write it out before writing the keywords. */
|
|
|
|
if (U_SUCCESS(*status) && posixVariant) {
|
|
len = (int32_t) uprv_strlen(_POSIX);
|
|
sink.Append(_POSIX, len);
|
|
}
|
|
|
|
if (U_SUCCESS(*status) && kwdFirst != nullptr) {
|
|
/* write out the sorted keywords */
|
|
UBool firstValue = true;
|
|
kwd = kwdFirst;
|
|
do {
|
|
if (firstValue) {
|
|
sink.Append("@", 1);
|
|
firstValue = false;
|
|
} else {
|
|
sink.Append(";", 1);
|
|
}
|
|
|
|
/* key */
|
|
len = (int32_t)uprv_strlen(kwd->key);
|
|
sink.Append(kwd->key, len);
|
|
sink.Append("=", 1);
|
|
|
|
/* type */
|
|
len = (int32_t)uprv_strlen(kwd->value);
|
|
sink.Append(kwd->value, len);
|
|
|
|
kwd = kwd->next;
|
|
} while (kwd);
|
|
}
|
|
}
|
|
|
|
static void
|
|
_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
|
|
(void)hadPosix;
|
|
char buf[ULOC_FULLNAME_CAPACITY];
|
|
UErrorCode tmpStatus = U_ZERO_ERROR;
|
|
int32_t len, i;
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
|
|
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
|
if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (len > 0) {
|
|
char *p, *pPriv;
|
|
UBool bNext = true;
|
|
UBool firstValue = true;
|
|
UBool writeValue;
|
|
|
|
pPriv = nullptr;
|
|
p = buf;
|
|
while (bNext) {
|
|
writeValue = false;
|
|
if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
|
|
if (*p == 0) {
|
|
bNext = false;
|
|
} else {
|
|
*p = 0; /* terminate */
|
|
}
|
|
if (pPriv != nullptr) {
|
|
/* Private use in the canonical format is lowercase in BCP47 */
|
|
for (i = 0; *(pPriv + i) != 0; i++) {
|
|
*(pPriv + i) = uprv_tolower(*(pPriv + i));
|
|
}
|
|
|
|
/* validate */
|
|
if (_isPrivateuseValueSubtag(pPriv, -1)) {
|
|
if (firstValue) {
|
|
if (!_isVariantSubtag(pPriv, -1)) {
|
|
writeValue = true;
|
|
}
|
|
} else {
|
|
writeValue = true;
|
|
}
|
|
} else if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
break;
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
if (writeValue) {
|
|
sink.Append("-", 1);
|
|
|
|
if (firstValue) {
|
|
sink.Append(PRIVATEUSE_KEY, UPRV_LENGTHOF(PRIVATEUSE_KEY) - 1);
|
|
sink.Append("-", 1);
|
|
sink.Append(PRIVUSE_VARIANT_PREFIX, UPRV_LENGTHOF(PRIVUSE_VARIANT_PREFIX) - 1);
|
|
sink.Append("-", 1);
|
|
firstValue = false;
|
|
}
|
|
|
|
len = (int32_t)uprv_strlen(pPriv);
|
|
sink.Append(pPriv, len);
|
|
}
|
|
}
|
|
/* reset private use starting position */
|
|
pPriv = nullptr;
|
|
} else if (pPriv == nullptr) {
|
|
pPriv = p;
|
|
}
|
|
p++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* -------------------------------------------------
|
|
*
|
|
* ultag_ functions
|
|
*
|
|
* -------------------------------------------------
|
|
*/
|
|
|
|
/* Bit flags used by the parser */
|
|
#define LANG 0x0001
|
|
#define EXTL 0x0002
|
|
#define SCRT 0x0004
|
|
#define REGN 0x0008
|
|
#define VART 0x0010
|
|
#define EXTS 0x0020
|
|
#define EXTV 0x0040
|
|
#define PRIV 0x0080
|
|
|
|
/**
|
|
* Ticket #12705 - The optimizer in Visual Studio 2015 Update 3 has problems optimizing this function.
|
|
* As a work-around, optimization is disabled for this function on VS2015 and VS2017.
|
|
* This work-around should be removed once the following versions of Visual Studio are no
|
|
* longer supported: All versions of VS2015/VS2017, and versions of VS2019 below 16.4.
|
|
*/
|
|
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
|
|
#pragma optimize( "", off )
|
|
#endif
|
|
|
|
static ULanguageTag*
|
|
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
|
|
char *tagBuf;
|
|
int16_t next;
|
|
char *pSubtag, *pNext, *pLastGoodPosition;
|
|
int32_t subtagLen;
|
|
int32_t extlangIdx;
|
|
ExtensionListEntry *pExtension;
|
|
char *pExtValueSubtag, *pExtValueSubtagEnd;
|
|
int32_t i;
|
|
UBool privateuseVar = false;
|
|
int32_t legacyLen = 0;
|
|
|
|
if (parsedLen != nullptr) {
|
|
*parsedLen = 0;
|
|
}
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (tagLen < 0) {
|
|
tagLen = (int32_t)uprv_strlen(tag);
|
|
}
|
|
|
|
/* copy the entire string */
|
|
tagBuf = (char*)uprv_malloc(tagLen + 1);
|
|
if (tagBuf == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
|
|
if (tagLen > 0) {
|
|
uprv_memcpy(tagBuf, tag, tagLen);
|
|
}
|
|
*(tagBuf + tagLen) = 0;
|
|
|
|
/* create a ULanguageTag */
|
|
icu::LocalULanguageTagPointer t(
|
|
(ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)));
|
|
if (t.isNull()) {
|
|
uprv_free(tagBuf);
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
_initializeULanguageTag(t.getAlias());
|
|
t->buf = tagBuf;
|
|
|
|
if (tagLen < MINLEN) {
|
|
/* the input tag is too short - return empty ULanguageTag */
|
|
return t.orphan();
|
|
}
|
|
|
|
size_t parsedLenDelta = 0;
|
|
// Legacy tag will be consider together. Legacy tag with intervening
|
|
// script and region such as art-DE-lojban or art-Latn-lojban won't be
|
|
// matched.
|
|
/* check if the tag is legacy */
|
|
for (i = 0; i < UPRV_LENGTHOF(LEGACY); i += 2) {
|
|
int32_t checkLegacyLen = static_cast<int32_t>(uprv_strlen(LEGACY[i]));
|
|
if (tagLen < checkLegacyLen) {
|
|
continue;
|
|
}
|
|
if (tagLen > checkLegacyLen && tagBuf[checkLegacyLen] != '-') {
|
|
// make sure next char is '-'.
|
|
continue;
|
|
}
|
|
if (uprv_strnicmp(LEGACY[i], tagBuf, checkLegacyLen) == 0) {
|
|
int32_t newTagLength;
|
|
|
|
legacyLen = checkLegacyLen; /* back up for output parsedLen */
|
|
int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1]));
|
|
newTagLength = replacementLen + tagLen - checkLegacyLen;
|
|
int32_t oldTagLength = tagLen;
|
|
if (tagLen < newTagLength) {
|
|
uprv_free(tagBuf);
|
|
// Change t->buf after the free and before return to avoid the second double free in
|
|
// the destructor of t when t is out of scope.
|
|
t->buf = tagBuf = (char*)uprv_malloc(newTagLength + 1);
|
|
if (tagBuf == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
tagLen = newTagLength;
|
|
}
|
|
parsedLenDelta = checkLegacyLen - replacementLen;
|
|
uprv_strcpy(t->buf, LEGACY[i + 1]);
|
|
if (checkLegacyLen != tagLen) {
|
|
uprv_memcpy(t->buf + replacementLen, tag + checkLegacyLen,
|
|
oldTagLength - checkLegacyLen);
|
|
// NUL-terminate after memcpy().
|
|
t->buf[replacementLen + oldTagLength - checkLegacyLen] = 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (legacyLen == 0) {
|
|
for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
|
|
const char* redundantTag = REDUNDANT[i];
|
|
size_t redundantTagLen = uprv_strlen(redundantTag);
|
|
// The preferred tag for a redundant tag is always shorter than redundant
|
|
// tag. A redundant tag may or may not be followed by other subtags.
|
|
// (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
|
|
if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
|
|
const char* redundantTagEnd = tagBuf + redundantTagLen;
|
|
if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
|
|
const char* preferredTag = REDUNDANT[i + 1];
|
|
size_t preferredTagLen = uprv_strlen(preferredTag);
|
|
uprv_memcpy(t->buf, preferredTag, preferredTagLen);
|
|
if (*redundantTagEnd == SEP) {
|
|
uprv_memmove(tagBuf + preferredTagLen,
|
|
redundantTagEnd,
|
|
tagLen - redundantTagLen + 1);
|
|
} else {
|
|
tagBuf[preferredTagLen] = '\0';
|
|
}
|
|
// parsedLen should be the length of the input
|
|
// before redundantTag is replaced by preferredTag.
|
|
// Save the delta to add it back later.
|
|
parsedLenDelta = redundantTagLen - preferredTagLen;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* langtag = language
|
|
* ["-" script]
|
|
* ["-" region]
|
|
* *("-" variant)
|
|
* *("-" extension)
|
|
* ["-" privateuse]
|
|
*/
|
|
|
|
next = LANG | PRIV;
|
|
pNext = pLastGoodPosition = tagBuf;
|
|
extlangIdx = 0;
|
|
pExtension = nullptr;
|
|
pExtValueSubtag = nullptr;
|
|
pExtValueSubtagEnd = nullptr;
|
|
|
|
while (pNext) {
|
|
char *pSep;
|
|
|
|
pSubtag = pNext;
|
|
|
|
/* locate next separator char */
|
|
pSep = pSubtag;
|
|
while (*pSep) {
|
|
if (*pSep == SEP) {
|
|
break;
|
|
}
|
|
pSep++;
|
|
}
|
|
if (*pSep == 0) {
|
|
/* last subtag */
|
|
pNext = nullptr;
|
|
} else {
|
|
pNext = pSep + 1;
|
|
}
|
|
subtagLen = (int32_t)(pSep - pSubtag);
|
|
|
|
if (next & LANG) {
|
|
if (ultag_isLanguageSubtag(pSubtag, subtagLen)) {
|
|
*pSep = 0; /* terminate */
|
|
// TODO: move deprecated language code handling here.
|
|
t->language = T_CString_toLowerCase(pSubtag);
|
|
|
|
pLastGoodPosition = pSep;
|
|
next = SCRT | REGN | VART | EXTS | PRIV;
|
|
if (subtagLen <= 3)
|
|
next |= EXTL;
|
|
continue;
|
|
}
|
|
}
|
|
if (next & EXTL) {
|
|
if (_isExtlangSubtag(pSubtag, subtagLen)) {
|
|
*pSep = 0;
|
|
t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
|
|
|
|
pLastGoodPosition = pSep;
|
|
if (extlangIdx < 3) {
|
|
next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
|
|
} else {
|
|
next = SCRT | REGN | VART | EXTS | PRIV;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
if (next & SCRT) {
|
|
if (ultag_isScriptSubtag(pSubtag, subtagLen)) {
|
|
char *p = pSubtag;
|
|
|
|
*pSep = 0;
|
|
|
|
/* to title case */
|
|
*p = uprv_toupper(*p);
|
|
p++;
|
|
for (; *p; p++) {
|
|
*p = uprv_tolower(*p);
|
|
}
|
|
|
|
t->script = pSubtag;
|
|
|
|
pLastGoodPosition = pSep;
|
|
next = REGN | VART | EXTS | PRIV;
|
|
continue;
|
|
}
|
|
}
|
|
if (next & REGN) {
|
|
if (ultag_isRegionSubtag(pSubtag, subtagLen)) {
|
|
*pSep = 0;
|
|
// TODO: move deprecated region code handling here.
|
|
t->region = T_CString_toUpperCase(pSubtag);
|
|
|
|
pLastGoodPosition = pSep;
|
|
next = VART | EXTS | PRIV;
|
|
continue;
|
|
}
|
|
}
|
|
if (next & VART) {
|
|
if (_isVariantSubtag(pSubtag, subtagLen) ||
|
|
(privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
|
|
VariantListEntry *var;
|
|
UBool isAdded;
|
|
|
|
var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
|
|
if (var == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
*pSep = 0;
|
|
var->variant = T_CString_toUpperCase(pSubtag);
|
|
isAdded = _addVariantToList(&(t->variants), var);
|
|
if (!isAdded) {
|
|
/* duplicated variant entry */
|
|
uprv_free(var);
|
|
break;
|
|
}
|
|
pLastGoodPosition = pSep;
|
|
next = VART | EXTS | PRIV;
|
|
continue;
|
|
}
|
|
}
|
|
if (next & EXTS) {
|
|
if (_isExtensionSingleton(pSubtag, subtagLen)) {
|
|
if (pExtension != nullptr) {
|
|
if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
|
|
/* the previous extension is incomplete */
|
|
uprv_free(pExtension);
|
|
pExtension = nullptr;
|
|
break;
|
|
}
|
|
|
|
/* terminate the previous extension value */
|
|
*pExtValueSubtagEnd = 0;
|
|
pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
|
|
|
|
/* insert the extension to the list */
|
|
if (_addExtensionToList(&(t->extensions), pExtension, false)) {
|
|
pLastGoodPosition = pExtValueSubtagEnd;
|
|
} else {
|
|
/* stop parsing here */
|
|
uprv_free(pExtension);
|
|
pExtension = nullptr;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* create a new extension */
|
|
pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
|
|
if (pExtension == nullptr) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
*pSep = 0;
|
|
pExtension->key = T_CString_toLowerCase(pSubtag);
|
|
pExtension->value = nullptr; /* will be set later */
|
|
|
|
/*
|
|
* reset the start and the end location of extension value
|
|
* subtags for this extension
|
|
*/
|
|
pExtValueSubtag = nullptr;
|
|
pExtValueSubtagEnd = nullptr;
|
|
|
|
next = EXTV;
|
|
continue;
|
|
}
|
|
}
|
|
if (next & EXTV) {
|
|
if (_isExtensionSubtag(pSubtag, subtagLen)) {
|
|
if (pExtValueSubtag == nullptr) {
|
|
/* if the start position of this extension's value is not yet,
|
|
this one is the first value subtag */
|
|
pExtValueSubtag = pSubtag;
|
|
}
|
|
|
|
/* Mark the end of this subtag */
|
|
pExtValueSubtagEnd = pSep;
|
|
next = EXTS | EXTV | PRIV;
|
|
|
|
continue;
|
|
}
|
|
}
|
|
if (next & PRIV) {
|
|
if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
|
|
char *pPrivuseVal;
|
|
|
|
if (pExtension != nullptr) {
|
|
/* Process the last extension */
|
|
if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
|
|
/* the previous extension is incomplete */
|
|
uprv_free(pExtension);
|
|
pExtension = nullptr;
|
|
break;
|
|
} else {
|
|
/* terminate the previous extension value */
|
|
*pExtValueSubtagEnd = 0;
|
|
pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
|
|
|
|
/* insert the extension to the list */
|
|
if (_addExtensionToList(&(t->extensions), pExtension, false)) {
|
|
pLastGoodPosition = pExtValueSubtagEnd;
|
|
pExtension = nullptr;
|
|
} else {
|
|
/* stop parsing here */
|
|
uprv_free(pExtension);
|
|
pExtension = nullptr;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* The rest of part will be private use value subtags */
|
|
if (pNext == nullptr) {
|
|
/* empty private use subtag */
|
|
break;
|
|
}
|
|
/* back up the private use value start position */
|
|
pPrivuseVal = pNext;
|
|
|
|
/* validate private use value subtags */
|
|
while (pNext) {
|
|
pSubtag = pNext;
|
|
pSep = pSubtag;
|
|
while (*pSep) {
|
|
if (*pSep == SEP) {
|
|
break;
|
|
}
|
|
pSep++;
|
|
}
|
|
if (*pSep == 0) {
|
|
/* last subtag */
|
|
pNext = nullptr;
|
|
} else {
|
|
pNext = pSep + 1;
|
|
}
|
|
subtagLen = (int32_t)(pSep - pSubtag);
|
|
|
|
if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
|
|
*pSep = 0;
|
|
next = VART;
|
|
privateuseVar = true;
|
|
break;
|
|
} else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
|
|
pLastGoodPosition = pSep;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (next == VART) {
|
|
continue;
|
|
}
|
|
|
|
if (pLastGoodPosition - pPrivuseVal > 0) {
|
|
*pLastGoodPosition = 0;
|
|
t->privateuse = T_CString_toLowerCase(pPrivuseVal);
|
|
}
|
|
/* No more subtags, exiting the parse loop */
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* If we fell through here, it means this subtag is illegal - quit parsing */
|
|
break;
|
|
}
|
|
|
|
if (pExtension != nullptr) {
|
|
/* Process the last extension */
|
|
if (pExtValueSubtag == nullptr || pExtValueSubtagEnd == nullptr) {
|
|
/* the previous extension is incomplete */
|
|
uprv_free(pExtension);
|
|
} else {
|
|
/* terminate the previous extension value */
|
|
*pExtValueSubtagEnd = 0;
|
|
pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
|
|
/* insert the extension to the list */
|
|
if (_addExtensionToList(&(t->extensions), pExtension, false)) {
|
|
pLastGoodPosition = pExtValueSubtagEnd;
|
|
} else {
|
|
uprv_free(pExtension);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (parsedLen != nullptr) {
|
|
*parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
|
|
}
|
|
|
|
return t.orphan();
|
|
}
|
|
|
|
// Ticket #12705 - Turn optimization back on.
|
|
#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924)
|
|
#pragma optimize( "", on )
|
|
#endif
|
|
|
|
static void
|
|
ultag_close(ULanguageTag* langtag) {
|
|
|
|
if (langtag == nullptr) {
|
|
return;
|
|
}
|
|
|
|
uprv_free(langtag->buf);
|
|
|
|
if (langtag->variants) {
|
|
VariantListEntry *curVar = langtag->variants;
|
|
while (curVar) {
|
|
VariantListEntry *nextVar = curVar->next;
|
|
uprv_free(curVar);
|
|
curVar = nextVar;
|
|
}
|
|
}
|
|
|
|
if (langtag->extensions) {
|
|
ExtensionListEntry *curExt = langtag->extensions;
|
|
while (curExt) {
|
|
ExtensionListEntry *nextExt = curExt->next;
|
|
uprv_free(curExt);
|
|
curExt = nextExt;
|
|
}
|
|
}
|
|
|
|
uprv_free(langtag);
|
|
}
|
|
|
|
static const char*
|
|
ultag_getLanguage(const ULanguageTag* langtag) {
|
|
return langtag->language;
|
|
}
|
|
|
|
#if 0
|
|
static const char*
|
|
ultag_getJDKLanguage(const ULanguageTag* langtag) {
|
|
int32_t i;
|
|
for (i = 0; DEPRECATEDLANGS[i] != nullptr; i += 2) {
|
|
if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
|
|
return DEPRECATEDLANGS[i + 1];
|
|
}
|
|
}
|
|
return langtag->language;
|
|
}
|
|
#endif
|
|
|
|
static const char*
|
|
ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
|
|
if (idx >= 0 && idx < MAXEXTLANG) {
|
|
return langtag->extlang[idx];
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
static int32_t
|
|
ultag_getExtlangSize(const ULanguageTag* langtag) {
|
|
int32_t size = 0;
|
|
int32_t i;
|
|
for (i = 0; i < MAXEXTLANG; i++) {
|
|
if (langtag->extlang[i]) {
|
|
size++;
|
|
}
|
|
}
|
|
return size;
|
|
}
|
|
|
|
static const char*
|
|
ultag_getScript(const ULanguageTag* langtag) {
|
|
return langtag->script;
|
|
}
|
|
|
|
static const char*
|
|
ultag_getRegion(const ULanguageTag* langtag) {
|
|
return langtag->region;
|
|
}
|
|
|
|
static const char*
|
|
ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
|
|
const char *var = nullptr;
|
|
VariantListEntry *cur = langtag->variants;
|
|
int32_t i = 0;
|
|
while (cur) {
|
|
if (i == idx) {
|
|
var = cur->variant;
|
|
break;
|
|
}
|
|
cur = cur->next;
|
|
i++;
|
|
}
|
|
return var;
|
|
}
|
|
|
|
static int32_t
|
|
ultag_getVariantsSize(const ULanguageTag* langtag) {
|
|
int32_t size = 0;
|
|
VariantListEntry *cur = langtag->variants;
|
|
while (true) {
|
|
if (cur == nullptr) {
|
|
break;
|
|
}
|
|
size++;
|
|
cur = cur->next;
|
|
}
|
|
return size;
|
|
}
|
|
|
|
static const char*
|
|
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
|
|
const char *key = nullptr;
|
|
ExtensionListEntry *cur = langtag->extensions;
|
|
int32_t i = 0;
|
|
while (cur) {
|
|
if (i == idx) {
|
|
key = cur->key;
|
|
break;
|
|
}
|
|
cur = cur->next;
|
|
i++;
|
|
}
|
|
return key;
|
|
}
|
|
|
|
static const char*
|
|
ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
|
|
const char *val = nullptr;
|
|
ExtensionListEntry *cur = langtag->extensions;
|
|
int32_t i = 0;
|
|
while (cur) {
|
|
if (i == idx) {
|
|
val = cur->value;
|
|
break;
|
|
}
|
|
cur = cur->next;
|
|
i++;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
static int32_t
|
|
ultag_getExtensionsSize(const ULanguageTag* langtag) {
|
|
int32_t size = 0;
|
|
ExtensionListEntry *cur = langtag->extensions;
|
|
while (true) {
|
|
if (cur == nullptr) {
|
|
break;
|
|
}
|
|
size++;
|
|
cur = cur->next;
|
|
}
|
|
return size;
|
|
}
|
|
|
|
static const char*
|
|
ultag_getPrivateUse(const ULanguageTag* langtag) {
|
|
return langtag->privateuse;
|
|
}
|
|
|
|
#if 0
|
|
static const char*
|
|
ultag_getLegacy(const ULanguageTag* langtag) {
|
|
return langtag->legacy;
|
|
}
|
|
#endif
|
|
|
|
|
|
/*
|
|
* -------------------------------------------------
|
|
*
|
|
* Locale/BCP47 conversion APIs, exposed as uloc_*
|
|
*
|
|
* -------------------------------------------------
|
|
*/
|
|
U_CAPI int32_t U_EXPORT2
|
|
uloc_toLanguageTag(const char* localeID,
|
|
char* langtag,
|
|
int32_t langtagCapacity,
|
|
UBool strict,
|
|
UErrorCode* status) {
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
|
|
icu::CheckedArrayByteSink sink(langtag, langtagCapacity);
|
|
ulocimp_toLanguageTag(localeID, sink, strict, status);
|
|
|
|
int32_t reslen = sink.NumberOfBytesAppended();
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return reslen;
|
|
}
|
|
|
|
if (sink.Overflowed()) {
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
} else {
|
|
u_terminateChars(langtag, langtagCapacity, reslen, status);
|
|
}
|
|
|
|
return reslen;
|
|
}
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
ulocimp_toLanguageTag(const char* localeID,
|
|
icu::ByteSink& sink,
|
|
UBool strict,
|
|
UErrorCode* status) {
|
|
icu::CharString canonical;
|
|
UErrorCode tmpStatus = U_ZERO_ERROR;
|
|
UBool hadPosix = false;
|
|
const char* pKeywordStart;
|
|
|
|
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
|
|
{
|
|
icu::CharStringByteSink canonicalSink(&canonical);
|
|
ulocimp_canonicalize(localeID, canonicalSink, &tmpStatus);
|
|
}
|
|
if (U_FAILURE(tmpStatus)) {
|
|
*status = tmpStatus;
|
|
return;
|
|
}
|
|
|
|
/* For handling special case - private use only tag */
|
|
pKeywordStart = locale_getKeywordsStart(canonical.data());
|
|
if (pKeywordStart == canonical.data()) {
|
|
int kwdCnt = 0;
|
|
UBool done = false;
|
|
|
|
icu::LocalUEnumerationPointer kwdEnum(uloc_openKeywords(canonical.data(), &tmpStatus));
|
|
if (U_SUCCESS(tmpStatus)) {
|
|
kwdCnt = uenum_count(kwdEnum.getAlias(), &tmpStatus);
|
|
if (kwdCnt == 1) {
|
|
const char *key;
|
|
int32_t len = 0;
|
|
|
|
key = uenum_next(kwdEnum.getAlias(), &len, &tmpStatus);
|
|
if (len == 1 && *key == PRIVATEUSE) {
|
|
icu::CharString buf;
|
|
{
|
|
icu::CharStringByteSink sink(&buf);
|
|
ulocimp_getKeywordValue(localeID, key, sink, &tmpStatus);
|
|
}
|
|
if (U_SUCCESS(tmpStatus)) {
|
|
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
|
|
/* return private use only tag */
|
|
sink.Append("und-x-", 6);
|
|
sink.Append(buf.data(), buf.length());
|
|
done = true;
|
|
} else if (strict) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
done = true;
|
|
}
|
|
/* if not strict mode, then "und" will be returned */
|
|
} else {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
done = true;
|
|
}
|
|
}
|
|
}
|
|
if (done) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
_appendLanguageToLanguageTag(canonical.data(), sink, strict, status);
|
|
_appendScriptToLanguageTag(canonical.data(), sink, strict, status);
|
|
_appendRegionToLanguageTag(canonical.data(), sink, strict, status);
|
|
_appendVariantsToLanguageTag(canonical.data(), sink, strict, &hadPosix, status);
|
|
_appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
|
|
_appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
|
|
}
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
uloc_forLanguageTag(const char* langtag,
|
|
char* localeID,
|
|
int32_t localeIDCapacity,
|
|
int32_t* parsedLength,
|
|
UErrorCode* status) {
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
|
|
icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
|
|
ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
|
|
|
|
int32_t reslen = sink.NumberOfBytesAppended();
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return reslen;
|
|
}
|
|
|
|
if (sink.Overflowed()) {
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
} else {
|
|
u_terminateChars(localeID, localeIDCapacity, reslen, status);
|
|
}
|
|
|
|
return reslen;
|
|
}
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
ulocimp_forLanguageTag(const char* langtag,
|
|
int32_t tagLen,
|
|
icu::ByteSink& sink,
|
|
int32_t* parsedLength,
|
|
UErrorCode* status) {
|
|
UBool isEmpty = true;
|
|
const char *subtag, *p;
|
|
int32_t len;
|
|
int32_t i, n;
|
|
UBool noRegion = true;
|
|
|
|
icu::LocalULanguageTagPointer lt(ultag_parse(langtag, tagLen, parsedLength, status));
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
|
|
/* language */
|
|
subtag = ultag_getExtlangSize(lt.getAlias()) > 0 ? ultag_getExtlang(lt.getAlias(), 0) : ultag_getLanguage(lt.getAlias());
|
|
if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
|
|
len = (int32_t)uprv_strlen(subtag);
|
|
if (len > 0) {
|
|
sink.Append(subtag, len);
|
|
isEmpty = false;
|
|
}
|
|
}
|
|
|
|
/* script */
|
|
subtag = ultag_getScript(lt.getAlias());
|
|
len = (int32_t)uprv_strlen(subtag);
|
|
if (len > 0) {
|
|
sink.Append("_", 1);
|
|
isEmpty = false;
|
|
|
|
/* write out the script in title case */
|
|
char c = uprv_toupper(*subtag);
|
|
sink.Append(&c, 1);
|
|
sink.Append(subtag + 1, len - 1);
|
|
}
|
|
|
|
/* region */
|
|
subtag = ultag_getRegion(lt.getAlias());
|
|
len = (int32_t)uprv_strlen(subtag);
|
|
if (len > 0) {
|
|
sink.Append("_", 1);
|
|
isEmpty = false;
|
|
|
|
/* write out the region in upper case */
|
|
p = subtag;
|
|
while (*p) {
|
|
char c = uprv_toupper(*p);
|
|
sink.Append(&c, 1);
|
|
p++;
|
|
}
|
|
noRegion = false;
|
|
}
|
|
|
|
/* variants */
|
|
_sortVariants(lt.getAlias()->variants);
|
|
n = ultag_getVariantsSize(lt.getAlias());
|
|
if (n > 0) {
|
|
if (noRegion) {
|
|
sink.Append("_", 1);
|
|
isEmpty = false;
|
|
}
|
|
|
|
for (i = 0; i < n; i++) {
|
|
subtag = ultag_getVariant(lt.getAlias(), i);
|
|
sink.Append("_", 1);
|
|
|
|
/* write out the variant in upper case */
|
|
p = subtag;
|
|
while (*p) {
|
|
char c = uprv_toupper(*p);
|
|
sink.Append(&c, 1);
|
|
p++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* keywords */
|
|
n = ultag_getExtensionsSize(lt.getAlias());
|
|
subtag = ultag_getPrivateUse(lt.getAlias());
|
|
if (n > 0 || uprv_strlen(subtag) > 0) {
|
|
if (isEmpty && n > 0) {
|
|
/* need a language */
|
|
sink.Append(LANG_UND, LANG_UND_LEN);
|
|
}
|
|
_appendKeywords(lt.getAlias(), sink, status);
|
|
}
|
|
}
|