mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-15 04:43:53 +00:00
Synchronize locale generation tools with dragonfly
generates the makefiles and the sources
This commit is contained in:
parent
becbad1f6e
commit
4a707b2112
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/projects/collation/; revision=289260
|
@ -27,6 +27,7 @@ LC:= --lc=${LC}
|
|||
.endif
|
||||
|
||||
all:
|
||||
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src
|
||||
.for t in ${TYPES}
|
||||
. if ${KNOWN:M${t}}
|
||||
test -d ${t} || mkdir ${t}
|
||||
|
@ -64,13 +65,24 @@ build-${t}:
|
|||
env ${PASSON} tools/finalize ${t}
|
||||
.endfor
|
||||
|
||||
build-ctypedef: transfer-rollup
|
||||
|
||||
transfer-rollup:
|
||||
cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src
|
||||
|
||||
rollup:
|
||||
perl -I tools tools/utf8-rollup.pl \
|
||||
--cldr=$$(realpath ${CLDRDIR}) \
|
||||
--etc=$$(realpath ${ETCDIR})
|
||||
|
||||
clean:
|
||||
.for t in ${TYPES}
|
||||
rm -rf ${t} ${t}.draft
|
||||
.endfor
|
||||
|
||||
BASE_LOCALES_OF_INTEREST?= \
|
||||
af_ZA am_ET be_BY bg_BG ca_AD ca_ES ca_FR ca_IT \
|
||||
af_ZA am_ET ar_AE ar_EG ar_JO ar_MA ar_QA ar_SA \
|
||||
be_BY bg_BG ca_AD ca_ES ca_FR ca_IT \
|
||||
cs_CZ da_DK de_AT de_CH de_DE el_GR en_AU en_CA \
|
||||
en_GB en_HK en_IE en_NZ en_PH en_SG en_US en_ZA \
|
||||
es_AR es_CR es_ES es_MX et_EE eu_ES fi_FI fr_BE \
|
||||
|
@ -80,7 +92,12 @@ BASE_LOCALES_OF_INTEREST?= \
|
|||
ru_RU se_FI se_NO sk_SK sl_SI sv_FI sv_SE tr_TR \
|
||||
uk_UA \
|
||||
kk_Cyrl_KZ mn_Cyrl_MN sr_Cyrl_RS sr_Latn_RS \
|
||||
zh_Hans_CN zh_Hant_HK zh_Hant_TW
|
||||
zh_Hans_CN zh_Hant_HK zh_Hant_TW \
|
||||
\
|
||||
\
|
||||
bn_IN gu_IN or_IN ta_IN te_IN kn_IN ml_IN si_LK \
|
||||
th_TH lo_LA bo_IN my_MM pa_Guru_IN ka_GE chr_US \
|
||||
km_KH shi_Tfng_MA ii_CN vai_Vaii_LR vi_VN
|
||||
|
||||
POSIX:
|
||||
.if exists (${CLDRDIR}/tools/java/cldr.jar)
|
||||
|
|
|
@ -28,10 +28,12 @@
|
|||
|
||||
-->
|
||||
<language name="af"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-1"
|
||||
countries="ZA" />
|
||||
<language name="am"
|
||||
countries="ET" /> <!-- UTF-8 only -->
|
||||
<language name="ar"
|
||||
countries="AE EG JO MA QA SA" />
|
||||
<language name="be"
|
||||
encoding="CP1131 CP1251 ISO8859-5"
|
||||
countries="BY" />
|
||||
|
@ -40,46 +42,57 @@
|
|||
countries="BG" />
|
||||
<language name="ca"
|
||||
fallback="ca_ES"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="AD ES FR IT" /> <!-- only ca_ES defined -->
|
||||
<language name="cs"
|
||||
encoding="ISO8859-2"
|
||||
countries="CZ" />
|
||||
<language name="da"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="DK" />
|
||||
<language name="de"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="AT CH DE" />
|
||||
<language name="el"
|
||||
encoding="ISO8859-7"
|
||||
countries="GR" />
|
||||
<language name="en"
|
||||
encoding="ISO8859-1 ISO8859-15 US-ASCII"
|
||||
countries="AU CA GB NZ US ZA" />
|
||||
encoding="ISO8859-15 US-ASCII"
|
||||
countries="GB" />
|
||||
<language name="en"
|
||||
encoding="ISO8859-1 US-ASCII"
|
||||
countries="AU CA NZ US ZA" />
|
||||
<language name="en"
|
||||
encoding="ISO8859-15"
|
||||
countries="IE" />
|
||||
<language name="en"
|
||||
encoding="ISO8859-1"
|
||||
countries="HK PH SG" />
|
||||
countries="HK SG" />
|
||||
<language name="en"
|
||||
countries="PH" /> <!-- UTF-8 only -->
|
||||
<language name="es"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
countries="CR" /> <!-- UTF-8 only -->
|
||||
<language name="es"
|
||||
encoding="ISO8859-15"
|
||||
countries="ES" />
|
||||
<language name="es"
|
||||
encoding="ISO8859-1"
|
||||
countries="AR CR MX" />
|
||||
countries="AR MX" />
|
||||
<language name="et"
|
||||
encoding="ISO8859-15"
|
||||
countries="EE" />
|
||||
<language name="eu"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="ES" />
|
||||
<language name="fi"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="FI" />
|
||||
<language name="fr"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
countries="BE CA CH FR" />
|
||||
encoding="ISO8859-15"
|
||||
countries="BE CH FR" />
|
||||
<language name="fr"
|
||||
encoding="ISO8859-1"
|
||||
countries="CA" />
|
||||
<language name="he"
|
||||
countries="IL" />
|
||||
<language name="hi"
|
||||
|
@ -95,10 +108,10 @@
|
|||
encoding="ARMSCII-8"
|
||||
countries="AM" />
|
||||
<language name="is"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="IS" />
|
||||
<language name="it"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="CH IT" />
|
||||
<language name="ja"
|
||||
encoding="SJIS eucJP"
|
||||
|
@ -111,7 +124,7 @@
|
|||
encoding_link="eucKR:CP949"
|
||||
countries="KR" />
|
||||
<language name="lt"
|
||||
encoding="ISO8859-4 ISO8859-13"
|
||||
encoding="ISO8859-13"
|
||||
countries="LT" />
|
||||
<language name="lv"
|
||||
encoding="ISO8859-13"
|
||||
|
@ -120,20 +133,23 @@
|
|||
family="Cyrl"
|
||||
countries="MN" />
|
||||
<language name="nb"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="NO" />
|
||||
<language name="nl"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="BE NL" />
|
||||
<language name="nn"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="NO" />
|
||||
<language name="pl"
|
||||
encoding="ISO8859-2"
|
||||
countries="PL" />
|
||||
<language name="pt"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
countries="PT BR" />
|
||||
encoding="ISO8859-15"
|
||||
countries="PT" />
|
||||
<language name="pt"
|
||||
encoding="ISO8859-1"
|
||||
countries="BR" />
|
||||
<language name="ro"
|
||||
encoding="ISO8859-2"
|
||||
countries="RO" />
|
||||
|
@ -157,7 +173,7 @@
|
|||
encoding="ISO8859-5"
|
||||
countries="RS" />
|
||||
<language name="sv"
|
||||
encoding="ISO8859-1 ISO8859-15"
|
||||
encoding="ISO8859-15"
|
||||
countries="SE FI" />
|
||||
<language name="tr"
|
||||
encoding="ISO8859-9"
|
||||
|
|
6151
tools/tools/locale/etc/common.UTF-8.src
Normal file
6151
tools/tools/locale/etc/common.UTF-8.src
Normal file
File diff suppressed because it is too large
Load diff
989
tools/tools/locale/etc/manual-input.UTF-8
Normal file
989
tools/tools/locale/etc/manual-input.UTF-8
Normal file
|
@ -0,0 +1,989 @@
|
|||
|
||||
******* REMAINING DEFINITIONS ARE MANUALLY ASSEMBLED *******
|
||||
|
||||
blank <NO-BREAK_SPACE>
|
||||
digit <SUPERSCRIPT_TWO>;<SUPERSCRIPT_THREE>;<SUPERSCRIPT_ONE>
|
||||
punct <INVERTED_EXCLAMATION_MARK>;...;<COPYRIGHT_SIGN>;/
|
||||
<LEFT-POINTING_DOUBLE_ANGLE_QUOTATION_MARK>;...;<PLUS-MINUS_SIGN>;/
|
||||
<ACUTE_ACCENT>;/
|
||||
<PILCROW_SIGN>;...;<CEDILLA>;/
|
||||
<RIGHT-POINTING_DOUBLE_ANGLE_QUOTATION_MARK>;...;<INVERTED_QUESTION_MARK>
|
||||
number <VULGAR_FRACTION_ONE_QUARTER>;...;<VULGAR_FRACTION_THREE_QUARTERS>
|
||||
cntrl <CONTROL-0080>;...;<APPLICATION_PROGRAM_COMMAND>
|
||||
graph <INVERTED_EXCLAMATION_MARK>;...;<INVERTED_QUESTION_MARK>
|
||||
|
||||
punct <DIVISION_SIGN>;<MULTIPLICATION_SIGN>
|
||||
graph <DIVISION_SIGN>;<MULTIPLICATION_SIGN>
|
||||
|
||||
**********************************************************************
|
||||
* Complete set of "special" characters
|
||||
**********************************************************************
|
||||
|
||||
special <EXCLAMATION_MARK>;...;<SOLIDUS>;/
|
||||
<COLON>;...;<COMMERCIAL_AT>;/
|
||||
<LEFT_SQUARE_BRACKET>;...;<GRAVE_ACCENT>;/
|
||||
<LEFT_CURLY_BRACKET>;...;<TILDE>;/
|
||||
<INVERTED_EXCLAMATION_MARK>;...;<ACUTE_ACCENT>;/
|
||||
<MIDDLE_DOT>;...;<INVERTED_QUESTION_MARK>;/
|
||||
<MULTIPLICATION_SIGN>;/
|
||||
<DIVISION_SIGN>;/
|
||||
<HYPHEN>;...;<HYPHENATION_POINT>;/
|
||||
<PER_MILLE_SIGN>;...;<VERTICAL_FOUR_DOTS>
|
||||
|
||||
**********************************************************************
|
||||
* Supplement generated sections with "number" classification
|
||||
**********************************************************************
|
||||
|
||||
digit <ARABIC-INDIC_DIGIT_ZERO>;...;<ARABIC-INDIC_DIGIT_NINE>
|
||||
digit <EXTENDED_ARABIC-INDIC_DIGIT_ZERO>;...;<EXTENDED_ARABIC-INDIC_DIGIT_NINE>
|
||||
digit <DEVANAGARI_DIGIT_ZERO>;...;<DEVANAGARI_DIGIT_NINE>
|
||||
digit <BENGALI_DIGIT_ZERO>;...;<BENGALI_DIGIT_NINE>
|
||||
number <BENGALI_CURRENCY_NUMERATOR_ONE>;...;<BENGALI_CURRENCY_DENOMINATOR_SIXTEEN>
|
||||
digit <GURMUKHI_DIGIT_ZERO>;...;<GURMUKHI_DIGIT_NINE>
|
||||
digit <GUJARATI_DIGIT_ZERO>;...;<GUJARATI_DIGIT_NINE>
|
||||
digit <ORIYA_DIGIT_ZERO>;...;<ORIYA_DIGIT_NINE>
|
||||
digit <TAMIL_DIGIT_ZERO>;...;<TAMIL_DIGIT_NINE>
|
||||
number <TAMIL_NUMBER_TEN>;...;<TAMIL_NUMBER_ONE_THOUSAND>
|
||||
digit <TELUGU_DIGIT_ZERO>;...;<TELUGU_DIGIT_NINE>
|
||||
number <TELUGU_FRACTION_DIGIT_ZERO_FOR_ODD_POWERS_OF_FOUR>;...;<TELUGU_FRACTION_DIGIT_THREE_FOR_EVEN_POWERS_OF_FOUR>
|
||||
digit <KANNADA_DIGIT_ZERO>;...;<KANNADA_DIGIT_NINE>
|
||||
digit <MALAYALAM_DIGIT_ZERO>;...;<MALAYALAM_DIGIT_NINE>
|
||||
number <MALAYALAM_NUMBER_TEN>;...;<MALAYALAM_FRACTION_THREE_QUARTERS>
|
||||
digit <THAI_DIGIT_ZERO>;...;<THAI_DIGIT_NINE>
|
||||
digit <LAO_DIGIT_ZERO>;...;<LAO_DIGIT_NINE>
|
||||
digit <TIBETAN_DIGIT_ZERO>;...;<TIBETAN_DIGIT_NINE>
|
||||
number <TIBETAN_DIGIT_HALF_ONE>;...;<TIBETAN_DIGIT_HALF_ZERO>
|
||||
digit <MYANMAR_DIGIT_ZERO>;...;<MYANMAR_DIGIT_NINE>
|
||||
digit <MYANMAR_SHAN_DIGIT_ZERO>;...;<MYANMAR_SHAN_DIGIT_NINE>
|
||||
digit <ETHIOPIC_DIGIT_ONE>;...;<ETHIOPIC_DIGIT_NINE>
|
||||
number <ETHIOPIC_NUMBER_TEN>;...;<ETHIOPIC_NUMBER_TEN_THOUSAND>
|
||||
digit <KHMER_DIGIT_ZERO>;...;<KHMER_DIGIT_NINE>
|
||||
number <KHMER_SYMBOL_LEK_ATTAK_SON>;...;<KHMER_SYMBOL_LEK_ATTAK_PRAM-BUON>
|
||||
digit <VAI_DIGIT_ZERO>;...;<VAI_DIGIT_NINE>
|
||||
number <ROMAN_NUMERAL_ONE>;...;<ROMAN_NUMERAL_TEN_THOUSAND>
|
||||
number <ROMAN_NUMERAL_SIX_LATE_FORM>;...;<ROMAN_NUMERAL_ONE_HUNDRED_THOUSAND>
|
||||
number <PARENTHESIZED_IDEOGRAPH_ONE>;...;<PARENTHESIZED_IDEOGRAPH_TEN>
|
||||
number <CIRCLED_NUMBER_TEN_ON_BLACK_SQUARE>;...;<CIRCLED_NUMBER_EIGHTY_ON_BLACK_SQUARE>
|
||||
number <CIRCLED_NUMBER_TWENTY_ONE>;...;<CIRCLED_NUMBER_THIRTY_FIVE>
|
||||
number <CIRCLED_IDEOGRAPH_ONE>;...;<CIRCLED_IDEOGRAPH_TEN>
|
||||
number <CIRCLED_NUMBER_THIRTY_SIX>;...;<CIRCLED_NUMBER_FIFTY>
|
||||
number <CJK_UNIFIED_IDEOGRAPH-3405>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-3483>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-382A>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-3B4D>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4E00>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4E03>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4E07>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4E09>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4E5D>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4E8C>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4E94>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4E96>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4EBF>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4EC0>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4EDF>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4EE8>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4F0D>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-4F70>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5104>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5146>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5169>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-516B>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-516D>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5341>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5343>;...;<CJK_UNIFIED_IDEOGRAPH-5345>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-534C>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-53C1>;...;<CJK_UNIFIED_IDEOGRAPH-53C4>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-56DB>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-58F1>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-58F9>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5E7A>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5EFE>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5EFF>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5F0C>;...;<CJK_UNIFIED_IDEOGRAPH-5F0E>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-5F10>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-62FE>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-634C>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-67D2>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-6F06>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-7396>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-767E>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-8086>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-842C>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-8CAE>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-8CB3>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-8D30>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-9621>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-9646>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-964C>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-9678>;/
|
||||
<CJK_UNIFIED_IDEOGRAPH-96F6>
|
||||
number <CJK_COMPATIBILITY_IDEOGRAPH-F96B>;/
|
||||
<CJK_COMPATIBILITY_IDEOGRAPH-F973>;/
|
||||
<CJK_COMPATIBILITY_IDEOGRAPH-F978>;/
|
||||
<CJK_COMPATIBILITY_IDEOGRAPH-F9B2>;/
|
||||
<CJK_COMPATIBILITY_IDEOGRAPH-F9D1>;/
|
||||
<CJK_COMPATIBILITY_IDEOGRAPH-F9D3>;/
|
||||
<CJK_COMPATIBILITY_IDEOGRAPH-F9FD>
|
||||
digit <FULLWIDTH_DIGIT_ZERO>;...;<FULLWIDTH_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x02B0 - 0x02FF Spacing Modification Letters
|
||||
**********************************************************************
|
||||
|
||||
graph <MODIFIER_LETTER_SMALL_H>;...;<MODIFIER_LETTER_LOW_LEFT_ARROW>
|
||||
punct <MODIFIER_LETTER_PRIME>;...;<MODIFIER_LETTER_LEFT_HALF_RING>;/
|
||||
<MODIFIER_LETTER_LEFT_ARROWHEAD>;...;<MODIFIER_LETTER_CROSS_ACCENT>;/
|
||||
<MODIFIER_LETTER_EXTRA-HIGH_TONE_BAR>;...;/
|
||||
<MODIFIER_LETTER_LOW_LEFT_ARROW>
|
||||
lower <MODIFIER_LETTER_SMALL_H>;...;<MODIFIER_LETTER_SMALL_Y>;/
|
||||
<MODIFIER_LETTER_GLOTTAL_STOP>;/
|
||||
<MODIFIER_LETTER_REVERSED+GLOTTAL_STOP>;/
|
||||
<MODIFIER_LETTER_SMALL_GAMMA>;...;/
|
||||
<MODIFIER_LETTER_SMALL_REVERSED_GLOTTAL_STOP>
|
||||
|
||||
**********************************************************************
|
||||
* 0x0300 - 0x036F Combining Diacritical Marks
|
||||
**********************************************************************
|
||||
|
||||
graph <COMBINING_GRAVE_ACCENT>;...;<COMBINING_LATIN_SMALL_LETTER_X>
|
||||
|
||||
**********************************************************************
|
||||
* 0x0300 - 0x0370 Coptic (Automatic section skips it)
|
||||
**********************************************************************
|
||||
|
||||
graph <COPTIC_CAPITAL_LETTER_SHEI>;/
|
||||
<COPTIC_CAPITAL_LETTER_FEI>;/
|
||||
<COPTIC_CAPITAL_LETTER_KHEI>;/
|
||||
<COPTIC_CAPITAL_LETTER_HORI>;/
|
||||
<COPTIC_CAPITAL_LETTER_GANGIA>;/
|
||||
<COPTIC_CAPITAL_LETTER_SHIMA>;/
|
||||
<COPTIC_CAPITAL_LETTER_DEI>;/
|
||||
<COPTIC_SMALL_LETTER_SHEI>;/
|
||||
<COPTIC_SMALL_LETTER_FEI>;/
|
||||
<COPTIC_SMALL_LETTER_KHEI>;/
|
||||
<COPTIC_SMALL_LETTER_HORI>;/
|
||||
<COPTIC_SMALL_LETTER_GANGIA>;/
|
||||
<COPTIC_SMALL_LETTER_SHIMA>;/
|
||||
<COPTIC_SMALL_LETTER_DEI>
|
||||
upper <COPTIC_CAPITAL_LETTER_SHEI>;/
|
||||
<COPTIC_CAPITAL_LETTER_FEI>;/
|
||||
<COPTIC_CAPITAL_LETTER_KHEI>;/
|
||||
<COPTIC_CAPITAL_LETTER_HORI>;/
|
||||
<COPTIC_CAPITAL_LETTER_GANGIA>;/
|
||||
<COPTIC_CAPITAL_LETTER_SHIMA>;/
|
||||
<COPTIC_CAPITAL_LETTER_DEI>
|
||||
lower <COPTIC_SMALL_LETTER_SHEI>;/
|
||||
<COPTIC_SMALL_LETTER_FEI>;/
|
||||
<COPTIC_SMALL_LETTER_KHEI>;/
|
||||
<COPTIC_SMALL_LETTER_HORI>;/
|
||||
<COPTIC_SMALL_LETTER_GANGIA>;/
|
||||
<COPTIC_SMALL_LETTER_SHIMA>;/
|
||||
<COPTIC_SMALL_LETTER_DEI>
|
||||
toupper (<COPTIC_SMALL_LETTER_SHEI>,<COPTIC_CAPITAL_LETTER_SHEI>);/
|
||||
(<COPTIC_SMALL_LETTER_FEI>,<COPTIC_CAPITAL_LETTER_FEI>);/
|
||||
(<COPTIC_SMALL_LETTER_KHEI>,<COPTIC_CAPITAL_LETTER_KHEI>);/
|
||||
(<COPTIC_SMALL_LETTER_HORI>,<COPTIC_CAPITAL_LETTER_HORI>);/
|
||||
(<COPTIC_SMALL_LETTER_GANGIA>,<COPTIC_CAPITAL_LETTER_GANGIA>);/
|
||||
(<COPTIC_SMALL_LETTER_SHIMA>,<COPTIC_CAPITAL_LETTER_SHIMA>);/
|
||||
(<COPTIC_SMALL_LETTER_DEI>,<COPTIC_CAPITAL_LETTER_DEI>)
|
||||
tolower (<COPTIC_CAPITAL_LETTER_SHEI>,<COPTIC_SMALL_LETTER_SHEI>);/
|
||||
(<COPTIC_CAPITAL_LETTER_FEI>,<COPTIC_SMALL_LETTER_FEI>);/
|
||||
(<COPTIC_CAPITAL_LETTER_KHEI>,<COPTIC_SMALL_LETTER_KHEI>);/
|
||||
(<COPTIC_CAPITAL_LETTER_HORI>,<COPTIC_SMALL_LETTER_HORI>);/
|
||||
(<COPTIC_CAPITAL_LETTER_GANGIA>,<COPTIC_SMALL_LETTER_GANGIA>);/
|
||||
(<COPTIC_CAPITAL_LETTER_SHIMA>,<COPTIC_SMALL_LETTER_SHIMA>);/
|
||||
(<COPTIC_CAPITAL_LETTER_DEI>,<COPTIC_SMALL_LETTER_DEI>)
|
||||
|
||||
**********************************************************************
|
||||
* 0x0700 - 0x074F Syriac
|
||||
**********************************************************************
|
||||
|
||||
graph <SYRIAC_END_OF_PARAGRAPH>;...;<SYRIAC_LETTER_SOGDIAN_FE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x0780 - 0x07BF Thaana
|
||||
**********************************************************************
|
||||
|
||||
graph <THAANA_LETTER_HAA>;...;<THAANA_LETTER_NAA>
|
||||
|
||||
**********************************************************************
|
||||
* 0x07C0 - 0x07FF Nko
|
||||
**********************************************************************
|
||||
|
||||
digit <NKO_DIGIT_ZERO>;...;<NKO_DIGIT_NINE>
|
||||
graph <NKO_LETTER_A>;...;<NKO_LAJANYALAN>
|
||||
|
||||
**********************************************************************
|
||||
* 0x0800 - 0x083F Samaritan
|
||||
**********************************************************************
|
||||
|
||||
graph <SAMARITAN_LETTER_ALAF>;...;<SAMARITAN_PUNCTUATION_ANNAAU>
|
||||
|
||||
**********************************************************************
|
||||
* 0x0840 - 0x085F Mandaic
|
||||
**********************************************************************
|
||||
|
||||
graph <MANDAIC_LETTER_HALQA>;...;<MANDAIC_GEMINATION_MARK>;/
|
||||
<MANDAIC_PUNCTUATION>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1400 - 0x167F Unified Canadian Aboriginal Syllabics
|
||||
**********************************************************************
|
||||
|
||||
graph <CANADIAN_SYLLABICS_HYPHEN>;...;<CANADIAN_SYLLABICS_BLACKFOOT_W>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1680 - 0x169F Ogham
|
||||
**********************************************************************
|
||||
|
||||
graph <OGHAM_SPACE_MARK>;...;<OGHAM_REVERSED_FEATHER_MARK>
|
||||
|
||||
**********************************************************************
|
||||
* 0x16A0 - 0x16FF Runic
|
||||
**********************************************************************
|
||||
|
||||
graph <RUNIC_LETTER_FEHU_FEOH_FE_F>;...;<RUNIC_LETTER_FRANKS_CASKET_AESC>
|
||||
number <RUNIC_ARLAUG_SYMBOL>;...;<RUNIC_BELGTHOR_SYMBOL>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1700 - 0x171F Tagalog
|
||||
**********************************************************************
|
||||
|
||||
graph <TAGALOG_LETTER_A>;...;<TAGALOG_SIGN_VIRAMA>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1720 - 0x173F Hanunoo
|
||||
**********************************************************************
|
||||
|
||||
graph <HANUNOO_LETTER_A>;...;<PHILIPPINE_DOUBLE_PUNCTUATION>
|
||||
|
||||
***********************************************************************
|
||||
* 0x1740 - 0x175F Buhid
|
||||
**********************************************************************
|
||||
|
||||
graph <BUHID_LETTER_A>;...;<BUHID_VOWEL_SIGN_U>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1760 - 0x177F Tagbanwa
|
||||
**********************************************************************
|
||||
|
||||
graph <TAGBANWA_LETTER_A>;...;<TAGBANWA_VOWEL_SIGN_U>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1800 - 0x18AF Mongolian
|
||||
**********************************************************************
|
||||
|
||||
graph <MONGOLIAN_BIRGA>;...;<MONGOLIAN_VOWEL_SEPARATOR>;/
|
||||
<MONGOLIAN_LETTER_A>;...;<MONGOLIAN_LETTER_MANCHU_ZHA>;/
|
||||
<MONGOLIAN_LETTER_ALI_GALI_ANUSVARA_ONE>;...;/
|
||||
<MONGOLIAN_LETTER_MANCHU_ALI_GALI_LHA>
|
||||
digit <MONGOLIAN_DIGIT_ZERO>;...;<MONGOLIAN_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x18B0 - 0x18FF Unified CA Aboriginal Syllabics Extended
|
||||
**********************************************************************
|
||||
|
||||
graph <CANADIAN_SYLLABICS_OY>;...;<CANADIAN_SYLLABICS_CARRIER_DENTAL_S>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1900 - 0x194F Limbu
|
||||
**********************************************************************
|
||||
|
||||
graph <LIMBU_VOWEL-CARRIER_LETTER>;...;<LIMBU_LETTER_TRA>;/
|
||||
<LIMBU_VOWEL_SIGN_A>;...;<LIMBU_SUBJOINED_LETTER_WA>;/
|
||||
<LIMBU_SMALL_LETTER_KA>;...;<LIMBU_SIGN_SA-I>;/
|
||||
<LIMBU_SIGN_LOO>;/
|
||||
<LIMBU_EXCLAMATION_MARK>;/
|
||||
<LIMBU_QUESTION_MARK>
|
||||
digit <LIMBU_DIGIT_ZERO>;...;<LIMBU_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1950 - 0x197F Tai Le
|
||||
**********************************************************************
|
||||
|
||||
graph <TAI_LE_LETTER_KA>;...;<TAI_LE_LETTER_AI>;/
|
||||
<TAI_LE_LETTER_TONE-2>;...;<TAI_LE_LETTER_TONE-6>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1980 - 0x19DF New Tai Le
|
||||
**********************************************************************
|
||||
|
||||
graph <NEW_TAI_LUE_LETTER_HIGH_QA>;...;<NEW_TAI_LUE_LETTER_LOW_SUA>;/
|
||||
<NEW_TAI_LUE_VOWEL_SIGN_VOWEL_SHORTENER>;...;/
|
||||
<NEW_TAI_LUE_TONE_MARK-2>;/
|
||||
<NEW_TAI_LUE_SIGN_LAE>;/
|
||||
<NEW_TAI_LUE_SIGN_LAEV>
|
||||
digit <NEW_TAI_LUE_DIGIT_ZERO>;...;<NEW_TAI_LUE_THAM_DIGIT_ONE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1A00 - 0x1A1F Buginese
|
||||
**********************************************************************
|
||||
|
||||
graph <BUGINESE_LETTER_KA>;...;<BUGINESE_VOWEL_SIGN_AE>;/
|
||||
<BUGINESE_PALLAWA>;/
|
||||
<BUGINESE_END_OF_SECTION>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1A20 - 0x1AAF Tai Tham
|
||||
**********************************************************************
|
||||
|
||||
graph <TAI_THAM_LETTER_HIGH_KA>;...;<TAI_THAM_CONSONANT_SIGN_SA>;/
|
||||
<TAI_THAM_SIGN_SAKOT>;...;<TAI_THAM_SIGN_KHUEN-LUE_KARAN>;/
|
||||
<TAI_THAM_COMBINING_CRYPTOGRAMMIC_DOT>;/
|
||||
<TAI_THAM_SIGN_WIANG>;...;<TAI_THAM_SIGN_CAANG>
|
||||
digit <TAI_THAM_HORA_DIGIT_ZERO>;...;<TAI_THAM_HORA_DIGIT_NINE>;/
|
||||
<TAI_THAM_THAM_DIGIT_ZERO>;...;<TAI_THAM_THAM_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1AB0 - 0x1AFF Combining Diacritical Marks Extended
|
||||
**********************************************************************
|
||||
|
||||
graph <COMBINING_DOUBLED_CIRCUMFLEX_ACCENT>;...;<COMBINING_PARENTHESES_OVERLAY>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1B00 - 0x1B7F Balinese
|
||||
**********************************************************************
|
||||
|
||||
graph <BALINESE_SIGN_ULU_RICEM>;...;<BALINESE_LETTER_ASYURA_SASAK>;/
|
||||
<BALINESE_PANTI>;...;<BALINESE_MUSICAL_SYMBOL_LEFT-HAND_OPEN_PING>
|
||||
digit <BALINESE_DIGIT_ZERO>;...;<BALINESE_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1B80 - 0x1BBF Sundanese
|
||||
**********************************************************************
|
||||
|
||||
graph <SUNDANESE_SIGN_PANYECEK>;...;<SUNDANESE_LETTER_FINAL_M>
|
||||
digit <SUNDANESE_DIGIT_ZERO>;...;<SUNDANESE_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1BC0 - 0x1BFF Batak
|
||||
**********************************************************************
|
||||
|
||||
graph <BATAK_LETTER_A>;...;<BATAK_PANONGONAN>;/
|
||||
<BATAK_SYMBOL_BINDU_NA_METEK>;...;<BATAK_SYMBOL_BINDU_PANGOLAT>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1C00 - 0x1C4F Lepcha
|
||||
**********************************************************************
|
||||
|
||||
graph <LEPCHA_LETTER_KA>;...;<LEPCHA_SIGN_NUKTA>;/
|
||||
<LEPCHA_PUNCTUATION_TA-ROL>;...;<LEPCHA_PUNCTUATION_TSHOOK>;/
|
||||
<LEPCHA_LETTER_TTA>;...;<LEPCHA_LETTER_DDA>
|
||||
digit <LEPCHA_DIGIT_ZERO>;...;<LEPCHA_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1C50 - 0x1C7F Ol Chiki
|
||||
**********************************************************************
|
||||
|
||||
graph <OL_CHIKI_LETTER_LA>;...;<OL_CHIKI_PUNCTUATION_DOUBLE_MUCAAD>
|
||||
digit <OL_CHIKI_DIGIT_ZERO>;...;<OL_CHIKI_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1CC0 - 0x1CCF Sundanese Supplement
|
||||
**********************************************************************
|
||||
|
||||
graph <SUNDANESE_PUNCTUATION_BINDU_SURYA>;...;/
|
||||
<SUNDANESE_PUNCTUATION_BINDU_BA_SATANGA>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1CD0 - 0x1CFF Vedic Extensions
|
||||
**********************************************************************
|
||||
|
||||
graph <VEDIC_TONE_KARSHANA>;...;<VEDIC_TONE_DOUBLE_RING_ABOVE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1DC0 - 0x1DFF Combining Diacritical Marks Supplement
|
||||
**********************************************************************
|
||||
|
||||
graph <COMBINING_DOTTED_GRAVE_ACCENT>;...;<COMBINING_UP_TACK_ABOVE>;/
|
||||
<COMBINING_DOUBLE_INVERTED_BREVE_BELOW>;...;/
|
||||
<COMBINING_RIGHT_ARROWHEAD_AND_DOWN_ARROWHEAD_BELOW>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2000 - 0x206F General Punctuation
|
||||
**********************************************************************
|
||||
|
||||
space <EN_QUAD>;...;<RIGHT-TO-LEFT_MARK>;/
|
||||
<LINE_SEPARATOR>;...;<NARROW_NO-BREAK_SPACE>
|
||||
punct <HYPHEN>;...;<HYPHENATION_POINT>;/
|
||||
<PER_MILLE_SIGN>;...;<VERTICAL_FOUR_DOTS>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2070 - 0x209F Superscripts and Subscripts
|
||||
**********************************************************************
|
||||
|
||||
graph <SUPERSCRIPT_ZERO>;...;<LATIN_SUBSCRIPT_SMALL_LETTER_T>
|
||||
digit <SUPERSCRIPT_ZERO>
|
||||
digit <SUPERSCRIPT_FOUR>;...;<SUPERSCRIPT_NINE>
|
||||
digit <SUBSCRIPT_ZERO>;...;<SUBSCRIPT_NINE>
|
||||
punct <SUPERSCRIPT_MINUS>;...;<SUPERSCRIPT_RIGHT_PARENTHESIS>
|
||||
punct <SUBSCRIPT_PLUS_SIGN>;...;<SUBSCRIPT_RIGHT_PARENTHESIS>
|
||||
lower <SUPERSCRIPT_LATIN_SMALL_LETTER_I>;/
|
||||
<SUPERSCRIPT_LATIN_SMALL_LETTER_N>;/
|
||||
<LATIN_SUBSCRIPT_SMALL_LETTER_A>;...;<LATIN_SUBSCRIPT_SMALL_LETTER_T>
|
||||
|
||||
**********************************************************************
|
||||
* 0x20A0 - 0x20CF Currency Symbols
|
||||
**********************************************************************
|
||||
|
||||
punct <EURO-CURRENCY_SIGN>;...;<RUBLE_SIGN>
|
||||
|
||||
**********************************************************************
|
||||
* 0x20D0 - 0x20FF Combining Diacritical Marks for Symbols
|
||||
**********************************************************************
|
||||
|
||||
graph <COMBINING_LEFT_HARPOON_ABOVE>;...;<COMBINING_ASTERISK_ABOVE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2100 - 0x214F Letterlike Symbols
|
||||
**********************************************************************
|
||||
|
||||
graph <ACCOUNT_OF>;...;<SYMBOL_FOR_SAMARITAN_SOURCE>
|
||||
punct <ACCOUNT_OF>;/
|
||||
<ADDRESSED_TO_THE_SUBJECT>;/
|
||||
<DEGREE_CELSIUS>;...;<CADA_UNA>;/
|
||||
<SCRUPLE>;/
|
||||
<DEGREE_FAHRENHEIT>;/
|
||||
<L_B_BAR_SYMBOL>;/
|
||||
<NUMERO_SIGN>;...;<SCRIPT_CAPITAL_P>;/
|
||||
<PRESCRIPTION_TAKE>;...;<VERSICLE>;/
|
||||
<OUNCE_SIGN>;/
|
||||
<INVERTED_OHM_SIGN>;/
|
||||
<TURNED_GREEK_SMALL_LETTER_IOTA>;/
|
||||
<ESTIMATED_SYMBOL>;/
|
||||
<ROTATED_CAPITAL_Q>;/
|
||||
<DOUBLE-STRUCK_N-ARY_SUMMATION>;...;<TURNED_SANS-SERIF_CAPITAL_Y>;/
|
||||
<PROPERTY_LINE>;...;<AKTIESELSKAB>;/
|
||||
<SYMBOL_FOR_SAMARITAN_SOURCE>
|
||||
upper <KELVIN_SIGN>;<ANGSTROM_SIGN>;<TURNED_CAPITAL_F>
|
||||
lower <TURNED_SMALL_F>
|
||||
alpha <DOUBLE-STRUCK_CAPITAL_C>;/
|
||||
<EULER_CONSTANT>;/
|
||||
<SCRIPT_SMALL_G>;...;<SCRIPT_SMALL_L>;/
|
||||
<DOUBLE-STRUCK_CAPITAL_N>;/
|
||||
<DOUBLE-STRUCK_CAPITAL_P>;...;<DOUBLE-STRUCK_CAPITAL_R>;/
|
||||
<DOUBLE-STRUCK_CAPITAL_Z>;/
|
||||
<OHM_SIGN>;/
|
||||
<BLACK-LETTER_CAPITAL_Z>;/
|
||||
<KELVIN_SIGN>;...;<BLACK-LETTER_CAPITAL_C>;/
|
||||
<SCRIPT_SMALL_E>;...;<SCRIPT_SMALL_O>;/
|
||||
<INFORMATION_SOURCE>;/
|
||||
<DOUBLE-STRUCK_SMALL_PI>;...;<DOUBLE-STRUCK_CAPITAL_PI>;/
|
||||
<DOUBLE-STRUCK_ITALIC_CAPITAL_D>;...;<DOUBLE-STRUCK_ITALIC_SMALL_J>
|
||||
tolower (<KELVIN_SIGN>,<k>);/
|
||||
(<ANGSTROM_SIGN>,<LATIN_SMALL_LETTER_A_WITH_RING_ABOVE>);/
|
||||
(<TURNED_CAPITAL_F>,<TURNED_SMALL_F>)
|
||||
toupper (<TURNED_SMALL_F>,<TURNED_CAPITAL_F>)
|
||||
|
||||
**********************************************************************
|
||||
* 0x2150 - 0x218F Number Forms (differential)
|
||||
**********************************************************************
|
||||
|
||||
number <VULGAR_FRACTION_ONE_SEVENTH>;...;<FRACTION_NUMERATOR_ONE>;/
|
||||
<VULGAR_FRACTION_ZERO_THIRDS>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2190 - 0x21FF Arrows
|
||||
**********************************************************************
|
||||
|
||||
punct <LEFTWARDS_ARROW>;...;<LEFT_RIGHT_OPEN-HEADED_ARROW>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2200 - 0x22FF Mathematical Operators
|
||||
**********************************************************************
|
||||
|
||||
punct <FOR_ALL>;...;<Z_NOTATION_BAG_MEMBERSHIP>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2300 - 0x23FF Miscellaneous Technical
|
||||
**********************************************************************
|
||||
|
||||
punct <DIAMETER_SIGN>;...;<BLACK_CIRCLE_FOR_RECORD>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2400 - 0x243F Control Pictures
|
||||
**********************************************************************
|
||||
|
||||
punct <SYMBOL_FOR_NULL>;...;<SYMBOL_FOR_SUBSTITUTE_FORM_TWO>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2440 - 0x245F Optical Character Recognition
|
||||
**********************************************************************
|
||||
|
||||
punct <OCR_HOOK>;...;<OCR_DOUBLE_BACKSLASH>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2460 - 0x24FF Enclosed Alphanumerics
|
||||
**********************************************************************
|
||||
|
||||
graph <CIRCLED_DIGIT_ONE>;...;<NEGATIVE_CIRCLED_DIGIT_ZERO>
|
||||
digit <CIRCLED_DIGIT_ONE>;...;<CIRCLED_DIGIT_NINE>
|
||||
digit <PARENTHESIZED_DIGIT_ONE>;...;<PARENTHESIZED_DIGIT_NINE>
|
||||
digit <DIGIT_ONE_FULL_STOP>;...;<DIGIT_NINE_FULL_STOP>
|
||||
digit <CIRCLED_DIGIT_ZERO>
|
||||
digit <DOUBLE_CIRCLED_DIGIT_ONE>;...;<DOUBLE_CIRCLED_DIGIT_NINE>
|
||||
digit <NEGATIVE_CIRCLED_DIGIT_ZERO>
|
||||
xdigit <CIRCLED_LATIN_CAPITAL_LETTER_A>;...;<CIRCLED_LATIN_CAPITAL_LETTER_F>
|
||||
xdigit <CIRCLED_LATIN_SMALL_LETTER_A>;...;<CIRCLED_LATIN_SMALL_LETTER_F>
|
||||
number <CIRCLED_NUMBER_TEN>;...;<CIRCLED_NUMBER_TWENTY>
|
||||
number <PARENTHESIZED_NUMBER_TEN>;...;<PARENTHESIZED_NUMBER_TWENTY>
|
||||
number <NUMBER_TEN_FULL_STOP>;...;<NUMBER_TWENTY_FULL_STOP>
|
||||
number <NEGATIVE_CIRCLED_NUMBER_ELEVEN>;...;<NEGATIVE_CIRCLED_NUMBER_TWENTY>
|
||||
number <DOUBLE_CIRCLED_NUMBER_TEN>
|
||||
lower <PARENTHESIZED_LATIN_SMALL_LETTER_A>;...;<PARENTHESIZED_LATIN_SMALL_LETTER_Z>
|
||||
upper <CIRCLED_LATIN_CAPITAL_LETTER_A>;...;<CIRCLED_LATIN_CAPITAL_LETTER_Z>
|
||||
lower <CIRCLED_LATIN_SMALL_LETTER_A>;...;<CIRCLED_LATIN_SMALL_LETTER_Z>
|
||||
toupper (<CIRCLED_LATIN_SMALL_LETTER_A>,<CIRCLED_LATIN_CAPITAL_LETTER_A>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_B>,<CIRCLED_LATIN_CAPITAL_LETTER_B>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_C>,<CIRCLED_LATIN_CAPITAL_LETTER_C>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_D>,<CIRCLED_LATIN_CAPITAL_LETTER_D>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_E>,<CIRCLED_LATIN_CAPITAL_LETTER_E>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_F>,<CIRCLED_LATIN_CAPITAL_LETTER_F>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_G>,<CIRCLED_LATIN_CAPITAL_LETTER_G>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_H>,<CIRCLED_LATIN_CAPITAL_LETTER_H>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_I>,<CIRCLED_LATIN_CAPITAL_LETTER_I>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_J>,<CIRCLED_LATIN_CAPITAL_LETTER_J>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_K>,<CIRCLED_LATIN_CAPITAL_LETTER_K>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_L>,<CIRCLED_LATIN_CAPITAL_LETTER_L>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_M>,<CIRCLED_LATIN_CAPITAL_LETTER_M>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_N>,<CIRCLED_LATIN_CAPITAL_LETTER_N>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_O>,<CIRCLED_LATIN_CAPITAL_LETTER_O>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_P>,<CIRCLED_LATIN_CAPITAL_LETTER_P>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_Q>,<CIRCLED_LATIN_CAPITAL_LETTER_Q>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_R>,<CIRCLED_LATIN_CAPITAL_LETTER_R>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_S>,<CIRCLED_LATIN_CAPITAL_LETTER_S>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_T>,<CIRCLED_LATIN_CAPITAL_LETTER_T>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_U>,<CIRCLED_LATIN_CAPITAL_LETTER_U>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_V>,<CIRCLED_LATIN_CAPITAL_LETTER_V>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_W>,<CIRCLED_LATIN_CAPITAL_LETTER_W>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_X>,<CIRCLED_LATIN_CAPITAL_LETTER_X>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_Y>,<CIRCLED_LATIN_CAPITAL_LETTER_Y>);/
|
||||
(<CIRCLED_LATIN_SMALL_LETTER_Z>,<CIRCLED_LATIN_CAPITAL_LETTER_Z>)
|
||||
tolower (<CIRCLED_LATIN_CAPITAL_LETTER_A>,<CIRCLED_LATIN_SMALL_LETTER_A>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_B>,<CIRCLED_LATIN_SMALL_LETTER_B>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_C>,<CIRCLED_LATIN_SMALL_LETTER_C>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_D>,<CIRCLED_LATIN_SMALL_LETTER_D>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_E>,<CIRCLED_LATIN_SMALL_LETTER_E>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_F>,<CIRCLED_LATIN_SMALL_LETTER_F>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_G>,<CIRCLED_LATIN_SMALL_LETTER_G>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_H>,<CIRCLED_LATIN_SMALL_LETTER_H>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_I>,<CIRCLED_LATIN_SMALL_LETTER_I>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_J>,<CIRCLED_LATIN_SMALL_LETTER_J>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_K>,<CIRCLED_LATIN_SMALL_LETTER_K>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_L>,<CIRCLED_LATIN_SMALL_LETTER_L>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_M>,<CIRCLED_LATIN_SMALL_LETTER_M>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_N>,<CIRCLED_LATIN_SMALL_LETTER_N>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_O>,<CIRCLED_LATIN_SMALL_LETTER_O>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_P>,<CIRCLED_LATIN_SMALL_LETTER_P>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_Q>,<CIRCLED_LATIN_SMALL_LETTER_Q>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_R>,<CIRCLED_LATIN_SMALL_LETTER_R>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_S>,<CIRCLED_LATIN_SMALL_LETTER_S>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_T>,<CIRCLED_LATIN_SMALL_LETTER_T>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_U>,<CIRCLED_LATIN_SMALL_LETTER_U>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_V>,<CIRCLED_LATIN_SMALL_LETTER_V>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_W>,<CIRCLED_LATIN_SMALL_LETTER_W>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_X>,<CIRCLED_LATIN_SMALL_LETTER_X>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_Y>,<CIRCLED_LATIN_SMALL_LETTER_Y>);/
|
||||
(<CIRCLED_LATIN_CAPITAL_LETTER_Z>,<CIRCLED_LATIN_SMALL_LETTER_Z>)
|
||||
|
||||
**********************************************************************
|
||||
* 0x2500 - 0x257F Box Drawing
|
||||
**********************************************************************
|
||||
|
||||
punct <BOX_DRAWINGS_LIGHT_HORIZONTAL>;...;<BOX_DRAWINGS_HEAVY_UP_AND_LIGHT_DOWN>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2580 - 0x259F Block Elements
|
||||
**********************************************************************
|
||||
|
||||
punct <UPPER_HALF_BLOCK>;...;<QUADRANT_UPPER_RIGHT_AND_LOWER_LEFT_AND_LOWER_RIGHT>
|
||||
|
||||
**********************************************************************
|
||||
* 0x25A0 - 0x25FF Geometric Shapes
|
||||
**********************************************************************
|
||||
|
||||
punct <BLACK_SQUARE>;...;<LOWER_RIGHT_TRIANGLE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2600 - 0x26FF Miscellaneous symbols
|
||||
**********************************************************************
|
||||
|
||||
punct <BLACK_SUN_WITH_RAYS>;...;<WHITE_FLAG_WITH_HORIZONTAL_MIDDLE_BLACK_STRIPE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2700 - 0x27BF Dingbats
|
||||
**********************************************************************
|
||||
|
||||
graph <BLACK_SAFETY_SCISSORS>;...;<DOUBLE_CURLY_LOOP>
|
||||
digit <DINGBAT_NEGATIVE_CIRCLED_DIGIT_ONE>;...;<DINGBAT_NEGATIVE_CIRCLED_DIGIT_NINE>
|
||||
number <DINGBAT_NEGATIVE_CIRCLED_NUMBER_TEN>
|
||||
digit <DINGBAT_CIRCLED_SANS-SERIF_DIGIT_ONE>;...;<DINGBAT_CIRCLED_SANS-SERIF_DIGIT_NINE>
|
||||
number <DINGBAT_CIRCLED_SANS-SERIF_NUMBER_TEN>
|
||||
digit <DINGBAT_NEGATIVE_CIRCLED_SANS-SERIF_DIGIT_ONE>;...;<DINGBAT_NEGATIVE_CIRCLED_SANS-SERIF_DIGIT_NINE>
|
||||
number <DINGBAT_NEGATIVE_CIRCLED_SANS-SERIF_NUMBER_TEN>
|
||||
punct <BLACK_SAFETY_SCISSORS>;...;<MEDIUM_RIGHT_CURLY_BRACKET_ORNAMENT>;/
|
||||
<HEAVY_WIDE-HEADED_RIGHTWARDS_ARROW>;...;<DOUBLE_CURLY_LOOP>
|
||||
|
||||
**********************************************************************
|
||||
* 0x27C0 - 0x27EF Miscellaneous Mathematical Symbols-A
|
||||
**********************************************************************
|
||||
|
||||
punct <THREE_DIMENSIONAL_ANGLE>;...;<MATHEMATICAL_RIGHT_FLATTENED_PARENTHESIS>
|
||||
|
||||
**********************************************************************
|
||||
* 0x27F0 - 0x27FF Supplemental Arrows-A
|
||||
**********************************************************************
|
||||
|
||||
punct <UPWARDS_QUADRUPLE_ARROW>;...;<LONG_RIGHTWARDS_SQUIGGLE_ARROW>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2800 - 0x28FF Braille Patterns
|
||||
**********************************************************************
|
||||
|
||||
punct <BRAILLE_PATTERN_BLANK>;...;<BRAILLE_PATTERN_DOTS-12345678>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2900 - 0x297F Supplemental Arrows-B
|
||||
**********************************************************************
|
||||
|
||||
punct <RIGHTWARDS_TWO-HEADED_ARROW_WITH_VERTICAL_STROKE>;...;<DOWN_FISH_TAIL>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2980 - 0x29FF Miscellaneouis Mathematical Symbols-B
|
||||
**********************************************************************
|
||||
|
||||
punct <TRIPLE_VERTICAL_BAR_DELIMITER>;...;<MINY>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2A00 - 0x2AFF Supplemental Mathematical Operators
|
||||
**********************************************************************
|
||||
|
||||
punct <N-ARY_CIRCLED_DOT_OPERATOR>;...;<N-ARY_WHITE_VERTICAL_BAR>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2B00 - 0x2BFF Miscellaneous Symbols and Arrows
|
||||
**********************************************************************
|
||||
|
||||
graph <NORTH_EAST_WHITE_ARROW>;...;<DOWNWARDS_TRIANGLE-HEADED_ARROW_TO_BAR>;/
|
||||
<NORTH_WEST_TRIANGLE-HEADED_ARROW_TO_BAR>;...;/
|
||||
<RIGHTWARDS_BLACK_ARROW>;/
|
||||
<THREE-D_TOP-LIGHTED_LEFTWARDS_EQUILATERAL_ARROWHEAD>;...;/
|
||||
<UP_ARROWHEAD_IN_A_RECTANGLE_BOX>;/
|
||||
<BALLOT_BOX_WITH_LIGHT_X>;...;<UNCERTAINTY_SIGN>
|
||||
punct <NORTH_EAST_WHITE_ARROW>;...;<DOWNWARDS_TRIANGLE-HEADED_ARROW_TO_BAR>;/
|
||||
<NORTH_WEST_TRIANGLE-HEADED_ARROW_TO_BAR>;...;/
|
||||
<RIGHTWARDS_BLACK_ARROW>;/
|
||||
<THREE-D_TOP-LIGHTED_LEFTWARDS_EQUILATERAL_ARROWHEAD>;...;/
|
||||
<UP_ARROWHEAD_IN_A_RECTANGLE_BOX>;/
|
||||
<BALLOT_BOX_WITH_LIGHT_X>;...;<UNCERTAINTY_SIGN>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2C00 - 0x2C5F Glagolitic
|
||||
**********************************************************************
|
||||
|
||||
graph <GLAGOLITIC_CAPITAL_LETTER_AZU>;...;/
|
||||
<GLAGOLITIC_CAPITAL_LETTER_LATINATE_MYSLITE>;/
|
||||
<GLAGOLITIC_SMALL_LETTER_AZU>;...;/
|
||||
<GLAGOLITIC_SMALL_LETTER_LATINATE_MYSLITE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2C80 - 0x2CFF Coptic
|
||||
**********************************************************************
|
||||
|
||||
graph <COPTIC_CAPITAL_LETTER_ALFA>;...;<COPTIC_SMALL_LETTER_BOHAIRIC_KHEI>;/
|
||||
<COPTIC_OLD_NUBIAN_FULL_STOP>;...;<COPTIC_MORPHOLOGICAL_DIVIDER>
|
||||
number <COPTIC_FRACTION_ONE_HALF>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2E00 - 0x2E7F Supplemental Punctuation
|
||||
**********************************************************************
|
||||
|
||||
punct <RIGHT_ANGLE_SUBSTITUTION_MARKER>;...;<DOUBLE_LOW-REVERSED-9_QUOTATION_MARK>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2E80 - 0x2EFF CJK Radicals Supplement
|
||||
**********************************************************************
|
||||
|
||||
punct <CJK_RADICAL_REPEAT>;...;<CJK_RADICAL_C-SIMPLIFIED_TURTLE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2F00 - 0x2FDF Kangxi Radicals
|
||||
**********************************************************************
|
||||
|
||||
punct <KANGXI_RADICAL_ONE>;...;<KANGXI_RADICAL_FLUTE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2FF0 - 0x2FFF Ideographic Description Characters
|
||||
**********************************************************************
|
||||
|
||||
punct <IDEOGRAPHIC_DESCRIPTION_CHARACTER_LEFT_TO_RIGHT>;...;/
|
||||
<IDEOGRAPHIC_DESCRIPTION_CHARACTER_OVERLAID>
|
||||
|
||||
**********************************************************************
|
||||
* 0x3000 - 0x30FF CJK Symbols and Punctuation
|
||||
**********************************************************************
|
||||
|
||||
space <IDEOGRAPHIC_SPACE>
|
||||
graph <IDEOGRAPHIC_COMMA>;...;<IDEOGRAPHIC_HALF_FILL_SPACE>
|
||||
number <IDEOGRAPHIC_NUMBER_ZERO>;/
|
||||
<HANGZHOU_NUMERAL_ONE>;...;<HANGZHOU_NUMERAL_NINE>;/
|
||||
<HANGZHOU_NUMERAL_TEN>;...;<HANGZHOU_NUMERAL_THIRTY>
|
||||
alpha <IDEOGRAPHIC_ITERATION_MARK>;/
|
||||
<IDEOGRAPHIC_CLOSING_MARK>;/
|
||||
<VERTICAL_IDEOGRAPHIC_ITERATION_MARK>
|
||||
punct <IDEOGRAPHIC_COMMA>;...;<JAPANESE_INDUSTRIAL_STANDARD_SYMBOL>;/
|
||||
<LEFT_ANGLE_BRACKET>;...;<POSTAL_MARK_FACE>;/
|
||||
<IDEOGRAPHIC_LEVEL_TONE_MARK>;...;/
|
||||
<IDEOGRAPHIC_TELEGRAPH_LINE_FEED_SEPARATOR_SYMBOL>;/
|
||||
<MASU_MARK>;...;<IDEOGRAPHIC_HALF_FILL_SPACE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x3100 - 0x312F Bopomofo
|
||||
**********************************************************************
|
||||
|
||||
graph <BOPOMOFO_LETTER_B>;...;<BOPOMOFO_LETTER_IH>
|
||||
|
||||
**********************************************************************
|
||||
* 0x3190 - 0x319F Kanbun
|
||||
**********************************************************************
|
||||
|
||||
graph <IDEOGRAPHIC_ANNOTATION_LINKING_MARK>;...;/
|
||||
<IDEOGRAPHIC_ANNOTATION_MAN_MARK>
|
||||
number <IDEOGRAPHIC_ANNOTATION_ONE_MARK>;...;/
|
||||
<IDEOGRAPHIC_ANNOTATION_FOUR_MARK>
|
||||
punct <IDEOGRAPHIC_ANNOTATION_LINKING_MARK>;/
|
||||
<IDEOGRAPHIC_ANNOTATION_REVERSE_MARK>;/
|
||||
<IDEOGRAPHIC_ANNOTATION_MIDDLE_MARK>;...;/
|
||||
<IDEOGRAPHIC_ANNOTATION_MAN_MARK>
|
||||
|
||||
**********************************************************************
|
||||
* 0x31A0 - 0x31BF : Bopomofo Extended
|
||||
**********************************************************************
|
||||
|
||||
graph <BOPOMOFO_LETTER_BU>;...;<BOPOMOFO_LETTER_ZY>
|
||||
|
||||
**********************************************************************
|
||||
* 0x31C0 - 0x31EF : CJK Strokes
|
||||
**********************************************************************
|
||||
|
||||
graph <CJK_STROKE_T>;...;<CJK_STROKE_Q>
|
||||
|
||||
**********************************************************************
|
||||
* 0x4DC0 - 0x4DFF Yijing Hexagram Symbols
|
||||
**********************************************************************
|
||||
|
||||
graph <HEXAGRAM_FOR_THE_CREATIVE_HEAVEN>;...;<HEXAGRAM_FOR_BEFORE_COMPLETION>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA4D0 - 0xA4FF Lisu
|
||||
**********************************************************************
|
||||
|
||||
graph <LISU_LETTER_BA>;...;<LISU_PUNCTUATION_FULL_STOP>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA6A0 - 0xA6FF Bamum
|
||||
**********************************************************************
|
||||
|
||||
graph <BAMUM_LETTER_A>;...;<BAMUM_QUESTION_MARK>
|
||||
number <BAMUM_LETTER_MO>;...;<BAMUM_LETTER_KOGHOM>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA700 - 0xA71F Modifier Tone Letters
|
||||
**********************************************************************
|
||||
|
||||
graph <MODIFIER_LETTER_CHINESE_TONE_YIN_PING>;...;/
|
||||
<MODIFIER_LETTER_LOW_INVERTED_EXCLAMATION_MARK>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA800 - 0xA82F Syloti Nagri
|
||||
**********************************************************************
|
||||
|
||||
graph <SYLOTI_NAGRI_LETTER_A>;...;<SYLOTI_NAGRI_POETRY_MARK-4>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA830 - 0x083F Common Indic Number Forms
|
||||
**********************************************************************
|
||||
|
||||
number <NORTH_INDIC_FRACTION_ONE_QUARTER>;...;<NORTH_INDIC_FRACTION_THREE_SIXTEENTHS>
|
||||
graph <NORTH_INDIC_QUARTER_MARK>;...;<NORTH_INDIC_QUANTITY_MARK>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA840 - 0xA87F Phags-pa
|
||||
**********************************************************************
|
||||
|
||||
graph <PHAGS-PA_LETTER_KA>;...;<PHAGS-PA_MARK_DOUBLE_SHAD>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA880 - 0xA8DF Saurashra
|
||||
**********************************************************************
|
||||
|
||||
graph <SAURASHTRA_SIGN_ANUSVARA>;...;<SAURASHTRA_SIGN_VIRAMA>;/
|
||||
<SAURASHTRA_DANDA>;/
|
||||
<SAURASHTRA_DOUBLE_DANDA>
|
||||
digit <SAURASHTRA_DIGIT_ZERO>;...;<SAURASHTRA_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA900 - 0xA92F Kayah Li
|
||||
**********************************************************************
|
||||
|
||||
digit <KAYAH_LI_DIGIT_ZERO>;...;<KAYAH_LI_DIGIT_NINE>
|
||||
graph <KAYAH_LI_LETTER_KA>;...;<KAYAH_LI_SIGN_SHYA>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA930 - 0xA95F Rejang
|
||||
**********************************************************************
|
||||
|
||||
graph <REJANG_LETTER_KA>;...;<REJANG_VIRAMA>;/
|
||||
<REJANG_SECTION_MARK>
|
||||
|
||||
**********************************************************************
|
||||
* 0xA980 - 0xA9DF Javanese
|
||||
**********************************************************************
|
||||
|
||||
graph <JAVANESE_SIGN_PANYANGGA>;...;<JAVANESE_TURNED_PADA_PISELEH>;/
|
||||
<JAVANESE_PANGRANGKEP>;/
|
||||
<JAVANESE_PADA_TIRTA_TUMETES>;/
|
||||
<JAVANESE_PADA_ISEN-ISEN>
|
||||
digit <JAVANESE_DIGIT_ZERO>;...;<JAVANESE_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0xAA00 - 0xAA5F Cham
|
||||
**********************************************************************
|
||||
|
||||
graph <CHAM_LETTER_A>;...;<CHAM_CONSONANT_SIGN_WA>;/
|
||||
<CHAM_LETTER_FINAL_K>;...;<CHAM_CONSONANT_SIGN_FINAL_H>;/
|
||||
<CHAM_PUNCTUATION_SPIRAL>;...;<CHAM_PUNCTUATION_TRIPLE_DANDA>
|
||||
digit <CHAM_DIGIT_ZERO>;...;<CHAM_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0xAA80 - 0xAADF Tal Viet
|
||||
**********************************************************************
|
||||
|
||||
graph <TAI_VIET_LETTER_LOW_KO>;...;<TAI_VIET_TONE_MAI_SONG>;/
|
||||
<TAI_VIET_SYMBOL_KON>;...;<TAI_VIET_SYMBOL_KOI_KOI>
|
||||
|
||||
**********************************************************************
|
||||
* 0xAAE0 - 0xAAFF Meetei Mayek Extensions
|
||||
**********************************************************************
|
||||
|
||||
graph <MEETEI_MAYEK_LETTER_E>;...;<MEETEI_MAYEK_VIRAMA>
|
||||
|
||||
**********************************************************************
|
||||
* 0xABC0 - 0xABFF Meetei Mayek
|
||||
**********************************************************************
|
||||
|
||||
graph <MEETEI_MAYEK_LETTER_KOK>;...;<MEETEI_MAYEK_APUN_IYEK>
|
||||
digit <MEETEI_MAYEK_DIGIT_ZERO>;...;<MEETEI_MAYEK_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0xFB50 - 0xFDFF Arabic Presentation Forms (differential)
|
||||
**********************************************************************
|
||||
|
||||
punct <ORNATE_LEFT_PARENTHESIS>;/
|
||||
<ORNATE_RIGHT_PARENTHESIS>
|
||||
|
||||
**********************************************************************
|
||||
* 0xFE10 - 0xFE1F Vertical Forms
|
||||
**********************************************************************
|
||||
|
||||
graph <PRESENTATION_FORM_FOR_VERTICAL_COMMA>;...;/
|
||||
<PRESENTATION_FORM_FOR_VERTICAL_HORIZONTAL_ELLIPSIS>
|
||||
|
||||
**********************************************************************
|
||||
* 0xFE20 - 0xFE2F Combining Half Marks
|
||||
**********************************************************************
|
||||
|
||||
graph <COMBINING_LIGATURE_LEFT_HALF>;...;<COMBINING_CONJOINING_MACRON_BELOW>
|
||||
|
||||
**********************************************************************
|
||||
* 0xFE30 - 0xFE4F CJK Compatibility Forms
|
||||
**********************************************************************
|
||||
|
||||
punct <PRESENTATION_FORM_FOR_VERTICAL_TWO_DOT_LEADER>;...;<WAVY_LOW_LINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0xFE50 - 0xFE6F Small Form Variants
|
||||
**********************************************************************
|
||||
|
||||
punct <SMALL_COMMA>;...;<SMALL_COMMERCIAL_AT>
|
||||
|
||||
**********************************************************************
|
||||
* 0xFE70 - 0xFEFF Arabic Presentation Forms-B (differential)
|
||||
**********************************************************************
|
||||
|
||||
blank <ZERO_WIDTH_NO-BREAK_SPACE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x10300 - 0x1032F Old Italic
|
||||
**********************************************************************
|
||||
|
||||
graph <OLD_ITALIC_LETTER_A>;...;<OLD_ITALIC_NUMERAL_FIFTY>
|
||||
number <OLD_ITALIC_NUMERAL_ONE>;...;<OLD_ITALIC_NUMERAL_FIFTY>
|
||||
|
||||
**********************************************************************
|
||||
* 0x10330 - 0x1034F Gothic
|
||||
**********************************************************************
|
||||
|
||||
graph <GOTHIC_LETTER_AHSA>;...;<GOTHIC_LETTER_NINE_HUNDRED>
|
||||
number <GOTHIC_LETTER_NINE_HUNDRED>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1D100 - 0x1D1FF Musical Symbols
|
||||
**********************************************************************
|
||||
|
||||
punct <MUSICAL_SYMBOL_SINGLE_BARLINE>;...;<MUSICAL_SYMBOL_DRUM_CLEF-2>;/
|
||||
<MUSICAL_SYMBOL_MULTIPLE_MEASURE_REST>;...;<MUSICAL_SYMBOL_COMBINING_FLAG-5>;/
|
||||
<MUSICAL_SYMBOL_COMBINING_ACCENT>
|
||||
cntrl <MUSICAL_SYMBOL_BEGIN_BEAM>;...;<MUSICAL_SYMBOL_END_PHRASE>
|
||||
graph <MUSICAL_SYMBOL_COMBINING_STACCATO>;...;<MUSICAL_SYMBOL_PES_SUBPUNCTIS>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1D400 - 0x1D7FF Mathematical Alphanumeric Symbols
|
||||
**********************************************************************
|
||||
|
||||
graph <MATHEMATICAL_BOLD_CAPITAL_A>;...;<MATHEMATICAL_MONOSPACE_DIGIT_NINE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1F600 - 0x1F64F Emoticons (Emoji)
|
||||
**********************************************************************
|
||||
|
||||
graph <GRINNING_FACE>;...;<PERSON_WITH_FOLDED_HANDS>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1F680 - 0x1F6FF Transport and Map Symbols
|
||||
**********************************************************************
|
||||
|
||||
graph <ROCKET>;...;<LEFT_LUGGAGE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1F700 - 0x1F77F Alchemical Symbols
|
||||
**********************************************************************
|
||||
|
||||
graph <ALCHEMICAL_SYMBOL_FOR_QUINTESSENCE>;...;/
|
||||
<ALCHEMICAL_SYMBOL_FOR_HALF_OUNCE>
|
||||
|
||||
**********************************************************************
|
||||
* 0x1F800 - 0x1F8FF Supplemental Arrows-C
|
||||
**********************************************************************
|
||||
|
||||
graph <LEFTWARDS_ARROW_WITH_SMALL_TRIANGLE_ARROWHEAD>;...;/
|
||||
<WHITE_ARROW_SHAFT_WIDTH_TWO_THIRDS>
|
||||
|
||||
**********************************************************************
|
||||
* 0x20000 - 0x2A6D6 CJK Unified Ideographs Extension B
|
||||
**********************************************************************
|
||||
|
||||
alpha <CJK_UNIFIED_IDEOGRAPH-20000>;...;<CJK_UNIFIED_IDEOGRAPH-2B81D>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2A700 - 0x2B734 CJK Unified Ideographs Extension C
|
||||
**********************************************************************
|
||||
|
||||
alpha <CJK_UNIFIED_IDEOGRAPH-2A700>;...;<CJK_UNIFIED_IDEOGRAPH-2B734>
|
||||
|
||||
**********************************************************************
|
||||
* 0x2B740 - 0x2B81D CJK Unified Ideographs Extension D
|
||||
**********************************************************************
|
||||
|
||||
alpha <CJK_UNIFIED_IDEOGRAPH-2B740>;...;<CJK_UNIFIED_IDEOGRAPH-2B81D>
|
|
@ -58,7 +58,7 @@ my %FILESNAMES = (
|
|||
"timedef" => "LC_TIME",
|
||||
"msgdef" => "LC_MESSAGES",
|
||||
"numericdef" => "LC_NUMERIC",
|
||||
"colldef" => "LC_COLLATE",
|
||||
"colldef" => "LC_COLLATE",
|
||||
"ctypedef" => "LC_CTYPE"
|
||||
);
|
||||
|
||||
|
@ -348,19 +348,9 @@ sub transform_ctypes {
|
|||
$file .= $c;
|
||||
my $actfile = $file;
|
||||
|
||||
my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
|
||||
$filename = "$ETCDIR/$file.$DEFENCODING.src"
|
||||
if (! -f $filename);
|
||||
if (! -f $filename
|
||||
&& defined $languages{$l}{$f}{fallback}) {
|
||||
$file = $languages{$l}{$f}{fallback};
|
||||
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
|
||||
}
|
||||
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
|
||||
if (! -f $filename);
|
||||
my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
|
||||
if (! -f $filename) {
|
||||
print STDERR
|
||||
"Cannot open $file.$DEFENCODING.src or fallback\n";
|
||||
print STDERR "Cannot open $filename\n";
|
||||
next;
|
||||
}
|
||||
open(FIN, "$filename");
|
||||
|
@ -370,34 +360,45 @@ sub transform_ctypes {
|
|||
my $shex;
|
||||
my $uhex;
|
||||
while (<FIN>) {
|
||||
if ((/^comment_char\s/) || (/^escape_char\s/)){
|
||||
push @lines, $_;
|
||||
}
|
||||
if (/^LC_CTYPE/../^END LC_CTYPE/) {
|
||||
push @lines, $_;
|
||||
}
|
||||
push @lines, $_;
|
||||
}
|
||||
close(FIN);
|
||||
$shex = sha1_hex(join("\n", @lines));
|
||||
$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
|
||||
$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
|
||||
open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
|
||||
print FOUT <<EOF;
|
||||
print FOUT @lines;
|
||||
close(FOUT);
|
||||
foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
|
||||
next if ($enc eq $DEFENCODING);
|
||||
$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
|
||||
if (! -f $filename) {
|
||||
print STDERR "Cannot open $filename\n";
|
||||
next;
|
||||
}
|
||||
@lines = ();
|
||||
open(FIN, "$filename");
|
||||
while (<FIN>) {
|
||||
if ((/^comment_char\s/) || (/^escape_char\s/)){
|
||||
push @lines, $_;
|
||||
}
|
||||
if (/^LC_CTYPE/../^END LC_CTYPE/) {
|
||||
push @lines, $_;
|
||||
}
|
||||
}
|
||||
close(FIN);
|
||||
$uhex = sha1_hex(join("\n", @lines) . $enc);
|
||||
$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
|
||||
$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
|
||||
open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
|
||||
print FOUT <<EOF;
|
||||
# Warning: Do not edit. This file is automatically extracted from the
|
||||
# tools in /usr/src/tools/tools/locale. The data is obtained from the
|
||||
# CLDR project, obtained from http://cldr.unicode.org/
|
||||
# -----------------------------------------------------------------------------
|
||||
EOF
|
||||
print FOUT @lines;
|
||||
close(FOUT);
|
||||
|
||||
foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
|
||||
next if ($enc eq $DEFENCODING);
|
||||
copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
|
||||
"$TYPE.draft/$actfile.$enc.src");
|
||||
$uhex = sha1_hex(join("\n", @lines) . $enc);
|
||||
$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
|
||||
$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
|
||||
print FOUT @lines;
|
||||
close(FOUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -769,6 +770,7 @@ sub make_makefile {
|
|||
print "Creating Makefile for $TYPE\n";
|
||||
my $SRCOUT;
|
||||
my $SRCOUT2;
|
||||
my $SRCOUT3;
|
||||
my $MAPLOC;
|
||||
if ($TYPE eq "colldef") {
|
||||
$SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
|
||||
|
@ -786,6 +788,15 @@ sub make_makefile {
|
|||
$SRCOUT2 = "LC_CTYPE";
|
||||
$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
|
||||
"locale/etc/final-maps\n";
|
||||
$SRCOUT3 = "## SYMPAIRS\n\n" .
|
||||
".for PAIR in \${SYMPAIRS}\n" .
|
||||
"\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " .
|
||||
"\${PAIR:C/:.*//}\n" .
|
||||
"\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
|
||||
"\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
|
||||
"\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
|
||||
" || true\n" .
|
||||
".endfor\n\n";
|
||||
}
|
||||
else {
|
||||
$SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
|
||||
|
@ -794,7 +805,6 @@ sub make_makefile {
|
|||
}
|
||||
open(FOUT, ">$TYPE.draft/Makefile");
|
||||
print FOUT <<EOF;
|
||||
# \$FreeBSD\$
|
||||
# Warning: Do not edit. This file is automatically generated from the
|
||||
# tools in /usr/src/tools/tools/locale.
|
||||
|
||||
|
@ -826,18 +836,26 @@ EOF
|
|||
} keys(%{$hashtable{$hash}});
|
||||
} elsif ($TYPE eq "ctypedef") {
|
||||
@files = sort {
|
||||
if ($a =~ /^en_x_US/ ||
|
||||
if ($a eq 'en_x_US.UTF-8') { return -1; }
|
||||
elsif ($b eq 'en_x_US.UTF-8') { return 1; }
|
||||
if ($a =~ /^en_x_US/) { return -1; }
|
||||
elsif ($b =~ /^en_x_US/) { return 1; }
|
||||
|
||||
if ($a =~ /^en_x_GB.ISO8859-15/ ||
|
||||
$a =~ /^ru_x_RU/) { return -1; }
|
||||
elsif ($b =~ /^en_x_US/ ||
|
||||
elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
|
||||
$b =~ /ru_x_RU/) { return 1; }
|
||||
else { return uc($b) cmp uc($a); }
|
||||
|
||||
if ($a eq 'en_x_US.UTF-8') { return -1; }
|
||||
elsif ($b eq 'en_x_US.UTF-8') { return 1; }
|
||||
else { return uc($b) cmp uc($a); }
|
||||
} keys(%{$hashtable{$hash}});
|
||||
} else {
|
||||
@files = sort(keys(%{$hashtable{$hash}}));
|
||||
@files = sort {
|
||||
if ($a =~ /_Comm_/ ||
|
||||
$b eq 'en_x_US.UTF-8') { return 1; }
|
||||
elsif ($b =~ /_Comm_/ ||
|
||||
$a eq 'en_x_US.UTF-8') { return -1; }
|
||||
else { return uc($b) cmp uc($a); }
|
||||
} keys(%{$hashtable{$hash}});
|
||||
}
|
||||
if ($#files > 0) {
|
||||
my $link = shift(@files);
|
||||
|
@ -909,7 +927,7 @@ SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
|
|||
FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
|
||||
.endfor
|
||||
|
||||
.include <bsd.prog.mk>
|
||||
${SRCOUT3}.include <bsd.prog.mk>
|
||||
EOF
|
||||
|
||||
close(FOUT);
|
||||
|
|
|
@ -23,37 +23,28 @@ old=${base}/../${1}.draft
|
|||
new=${base}/../${1}
|
||||
TEMP=/tmp/${1}.locales
|
||||
TEMP2=/tmp/${1}.hashes
|
||||
TEMP3=/tmp/${1}.symlinks
|
||||
FULLMAP=/tmp/utf8-map
|
||||
FULLEXTRACT=/tmp/extracted-names
|
||||
AWKCMD="/## PLACEHOLDER/ { \
|
||||
while ( getline line < \"${TEMP}\" ) {print line} } \
|
||||
!/## PLACEHOLDER/ { print \$0 }"
|
||||
/## SYMPAIRS/ { \
|
||||
while ( getline line < \"${TEMP3}\" ) {print line} } \
|
||||
!/## / { print \$0 }"
|
||||
|
||||
grep '^LOCALES+' ${old}/Makefile > ${TEMP}
|
||||
|
||||
if [ $1 = "ctypedef" ]
|
||||
then
|
||||
keep=$(cat ${TEMP} | awk '/UTF-8/ { print $2 }')
|
||||
else
|
||||
keep=$(cat ${TEMP} | awk '{ print $2 }')
|
||||
fi
|
||||
|
||||
for original in ${keep}
|
||||
do
|
||||
cp ${old}/${original}.src ${new}/
|
||||
done
|
||||
|
||||
if [ $1 = "ctypedef" ]
|
||||
then
|
||||
keep=$(cat ${TEMP} | awk '{ print $2 ".src" }')
|
||||
(cd ${old} && md5 -r ${keep} | sort) > ${TEMP2}
|
||||
linx=$(cat ${TEMP2} | awk '!/UTF-8/ { print $2 }')
|
||||
for original in ${linx}
|
||||
keep=$(awk '{ if ($1 != last1) print $2; last1 = $1; }' ${TEMP2})
|
||||
for original in ${keep}
|
||||
do
|
||||
linkhash=$(fgrep "${original}" ${TEMP2} | awk '{ print $1 }')
|
||||
utf8file=$(fgrep "${linkhash}" ${TEMP2} | fgrep 'UTF-8' | awk '{ print $2 }')
|
||||
ln -s ${utf8file} ${new}/${original}
|
||||
cp ${old}/${original} ${new}/
|
||||
done
|
||||
awk '{ if ($1 == last1) { print "SYMPAIRS+=\t" last2 ":" $2 } \
|
||||
else {last1 = $1; last2 = $2}}' ${TEMP2} > ${TEMP3}
|
||||
rm -f ${TEMP2}
|
||||
/usr/bin/sed -E -e 's/[ ]+/ /g' \
|
||||
${CLDRDIR}/posix/UTF-8.cm \
|
||||
|
@ -80,8 +71,16 @@ fi
|
|||
echo map ${map} converted.
|
||||
done
|
||||
|
||||
else # below is everything but ctypedef
|
||||
|
||||
keep=$(cat ${TEMP} | awk '{ print $2 }')
|
||||
for original in ${keep}
|
||||
do
|
||||
cp ${old}/${original}.src ${new}/
|
||||
done
|
||||
|
||||
fi
|
||||
|
||||
grep -v '^LOCALES+' ${old}/Makefile | awk "${AWKCMD}" > ${new}/Makefile
|
||||
|
||||
rm -f ${TEMP}
|
||||
rm -f ${TEMP} ${TEMP3}
|
||||
|
|
373
tools/tools/locale/tools/utf8-rollup.pl
Executable file
373
tools/tools/locale/tools/utf8-rollup.pl
Executable file
|
@ -0,0 +1,373 @@
|
|||
#!/usr/local/bin/perl -wC
|
||||
|
||||
use strict;
|
||||
#use File::Copy;
|
||||
#use XML::Parser;
|
||||
use Tie::IxHash;
|
||||
#use Data::Dumper;
|
||||
use Getopt::Long;
|
||||
#use Digest::SHA qw(sha1_hex);
|
||||
#require "charmaps.pm";
|
||||
|
||||
|
||||
if ($#ARGV != 1) {
|
||||
print "Usage: $0 --cldr=<cldrdir> --etc=<etcdir>\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my $CLDRDIR = undef;
|
||||
my $ETCDIR = undef;
|
||||
|
||||
my $result = GetOptions (
|
||||
"cldr=s" => \$CLDRDIR,
|
||||
"etc=s" => \$ETCDIR,
|
||||
);
|
||||
|
||||
my @SECTIONS = (
|
||||
["en_US", "* 0x0000 - 0x007F Basic Latin\n" .
|
||||
"* 0x0080 - 0x00FF Latin-1 Supplement\n" .
|
||||
"* 0x0100 - 0x017F Latin Extended-A\n" .
|
||||
"* 0x0180 - 0x024F Latin Extended-B\n" .
|
||||
"* 0x0250 - 0x02AF IPA Extensions\n" .
|
||||
"* 0x1D00 - 0x1D7F Phonetic Extensions\n" .
|
||||
"* 0x1D80 - 0x1DBF Phonetic Extensions Supplement\n" .
|
||||
"* 0x1E00 - 0x1EFF Latin Extended Additional\n" .
|
||||
"* 0x2150 - 0x218F Number Forms (partial - Roman Numerals)\n".
|
||||
"* 0x2C60 - 0x2C7F Latin Extended-C\n" .
|
||||
"* 0xA720 - 0xA7FF Latin Extended-D\n" .
|
||||
"* 0xAB30 - 0xAB6F Latin Extended-E\n" .
|
||||
"* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n".
|
||||
"* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"],
|
||||
["el_GR", "* 0x0370 - 0x03FF Greek (No Coptic!)\n" .
|
||||
"* 0x1F00 - 0x1FFF Greek Extended\n"],
|
||||
["ru_RU", "* 0x0400 - 0x04FF Cyrillic\n" .
|
||||
"* 0x0500 - 0x052F Cyrillic Supplementary\n" .
|
||||
"* 0x2DE0 - 0x2DFF Cyrillic Extended-A\n" .
|
||||
"* 0xA640 - 0xA69F Cyrillic Extended-B\n"],
|
||||
["hy_AM", "* 0x0530 - 0x058F Armenian\n" .
|
||||
"* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n"],
|
||||
["he_IL", "* 0x0590 - 0x05FF Hebrew\n" .
|
||||
"* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n"],
|
||||
["ar_SA", "* 0x0600 - 0x06FF Arabic\n" .
|
||||
"* 0x0750 - 0x074F Arabic Supplement\n" .
|
||||
"* 0x08A0 - 0x08FF Arabic Extended-A\n" .
|
||||
"* 0xFB50 - 0xFDFF Arabic Presentation Forms (partial)\n" .
|
||||
"* 0xFE70 - 0xFEFF Arabic Presentation Forms-B (partial)\n"],
|
||||
["hi_IN", "* 0x0900 - 0x097F Devanagari\n" .
|
||||
"* 0xA8E0 - 0xA8FF Devanagari Extended\n"],
|
||||
["bn_IN", "* 0x0900 - 0x097F Bengali\n"],
|
||||
["pa_Guru_IN", "* 0x0A00 - 0x0A7F Gurmukhi\n"],
|
||||
["gu_IN", "* 0x0A80 - 0x0AFF Gujarati\n"],
|
||||
["or_IN", "* 0x0B00 - 0x0B7F Oriya\n"],
|
||||
["ta_IN", "* 0x0B80 - 0x0BFF Tamil\n"],
|
||||
["te_IN", "* 0x0C00 - 0x0C7F Telugu\n"],
|
||||
["kn_IN", "* 0x0C80 - 0x0CFF Kannada\n"],
|
||||
["ml_IN", "* 0x0D00 - 0x0D7F Malayalam\n"],
|
||||
["si_LK", "* 0x0D80 - 0x0DFF Sinhala\n"],
|
||||
["th_TH", "* 0x0E00 - 0x0E7F Thai\n"],
|
||||
["lo_LA", "* 0x0E80 - 0x0EFF Lao\n"],
|
||||
["bo_IN", "* 0x0F00 - 0x0FFF Tibetan\n"],
|
||||
["my_MM", "* 0x1000 - 0x109F Myanmar\n" .
|
||||
"* 0xA9E0 - 0xA9FF Myanmar Extended-B\n" .
|
||||
"* 0xAA60 - 0xAA7F Myanmar Extended-A\n"],
|
||||
["ka_GE", "* 0x10A0 - 0x10FF Georgia\n" .
|
||||
"* 0x2D00 - 0x2D2F Georgian Supplement\n"],
|
||||
["ja_JP", "* 0x1100 - 0x11FF Hangul Jamo\n" .
|
||||
"* 0x3000 - 0x30FF CJK Symbols and Punctuation (partial)\n" .
|
||||
"* 0x3040 - 0x309F Hiragana\n" .
|
||||
"* 0x30A0 - 0x30FF Katakana\n" .
|
||||
"* 0x31F0 - 0x31FF Katakana Phonetic Extensions\n" .
|
||||
"* 0x3130 - 0x318F Hangul Compatibility Jamo (partial)\n" .
|
||||
"* 0x3200 - 0x32FF Enclosed CJK Letters and Months (partial)\n" .
|
||||
"* 0x3300 - 0x33FF CJK Compatibility\n" .
|
||||
"* 0x3400 - 0x4DB5 CJK Unified Ideographs Extension-A (added)\n" .
|
||||
"* 0x4E00 - 0x9FCC CJK Unified Ideographs (overridden)\n" .
|
||||
"* 0xAC00 - 0xA7A3 Hangul Syllables (partial)\n" .
|
||||
"* 0xD7B0 - 0xD7FF Hangul Jamo Extended-B\n" .
|
||||
"* 0xF900 - 0xFAFF CJK Compatibility Ideographs (partial)\n" .
|
||||
"* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"],
|
||||
["am_ET", "* 0x1200 - 0x137F Ethiopic\n" .
|
||||
"* 0x1380 - 0x139F Ethiopic Supplement\n" .
|
||||
"* 0x2D80 - 0x2DDF Ethiopic Extended\n" .
|
||||
"* 0xAB00 - 0xAB2F Ethiopic Extended-A\n"],
|
||||
["chr_US", "* 0x13A0 - 0x13FF Cherokee\n"],
|
||||
["km_KH", "* 0x1780 - 0x17FF Khmer\n" .
|
||||
"* 0x19E0 - 0x19FF Khmer Symbols\n"],
|
||||
["shi_Tfng_MA", "* 0x2D30 - 0x2D2F Tifinagh\n"],
|
||||
["ii_CN", "* 0xA000 - 0xA48F Yi Syllables\n" .
|
||||
"* 0xA490 - 0xA4CF Yi Radicals\n"],
|
||||
["vai_Vaii_LR", "* 0xA500 - 0xA63F Vai\n"],
|
||||
["ko_KR", "* 0x3130 - 0x318F Hangul Compatibility Jamo (partial)\n" .
|
||||
"* 0xA960 - 0xA97F Hangul Jamo Extended-A\n" .
|
||||
"* 0xAC00 - 0xA7A3 Hangul Syllables (partial)\n" .
|
||||
"* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"],
|
||||
);
|
||||
|
||||
# ["zh_Hans_CN", "* 0x2E80 - 0x2EFF CJK Radicals Supplement\n" .
|
||||
# "* 0x2F00 - 0x2FDF Rangxi Radicales\n" .
|
||||
# "* 0x3000 - 0x30FF CJK Symbols and Punctuation (partial)\n" .
|
||||
# "* 0x3200 - 0x32FF Enclosed CJK Letters and Months (partial)\n" .
|
||||
# "* 0x3400 - 0x4DB5 CJK Unified Ideographs Extension A\n" .
|
||||
# "* 0xF900 - 0xFAFF CJK Compatibility Ideographs (partial)\n"],
|
||||
|
||||
my %seen = ();
|
||||
my %pending_seen = ();
|
||||
my %utf8map = ();
|
||||
my %utf8aliases = ();
|
||||
my $outfilename = "$ETCDIR/common.UTF-8.src";
|
||||
my $manual_file = "$ETCDIR/manual-input.UTF-8";
|
||||
my $stars = "**********************************************************************\n";
|
||||
|
||||
get_utf8map("$CLDRDIR/posix/UTF-8.cm");
|
||||
generate_header ();
|
||||
generate_sections ();
|
||||
generate_footer ();
|
||||
|
||||
############################
|
||||
|
||||
sub get_utf8map {
|
||||
my $file = shift;
|
||||
|
||||
open(FIN, $file);
|
||||
my @lines = <FIN>;
|
||||
close(FIN);
|
||||
chomp(@lines);
|
||||
|
||||
my $prev_k = undef;
|
||||
my $prev_v = "";
|
||||
my $incharmap = 0;
|
||||
foreach my $l (@lines) {
|
||||
$l =~ s/\r//;
|
||||
next if ($l =~ /^\#/);
|
||||
next if ($l eq "");
|
||||
|
||||
if ($l eq "CHARMAP") {
|
||||
$incharmap = 1;
|
||||
next;
|
||||
}
|
||||
|
||||
next if (!$incharmap);
|
||||
last if ($l eq "END CHARMAP");
|
||||
|
||||
$l =~ /^<([^\s]+)>\s+(.*)/;
|
||||
my $k = $1;
|
||||
my $v = $2;
|
||||
$k =~ s/_/ /g; # unicode char string
|
||||
$v =~ s/\\x//g; # UTF-8 char code
|
||||
$utf8map{$k} = $v;
|
||||
|
||||
$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
|
||||
|
||||
$prev_v = $v;
|
||||
$prev_k = $k;
|
||||
}
|
||||
}
|
||||
|
||||
sub generate_header {
|
||||
open(FOUT, ">", "$outfilename")
|
||||
or die ("can't write to $outfilename\n");
|
||||
print FOUT <<EOF;
|
||||
# Warning: Do not edit. This file is automatically generated from the
|
||||
# tools in /usr/src/tools/tools/locale. The data is obtained from the
|
||||
# CLDR project, obtained from http://cldr.unicode.org/
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
comment_char *
|
||||
escape_char /
|
||||
|
||||
LC_CTYPE
|
||||
EOF
|
||||
}
|
||||
|
||||
sub generate_footer {
|
||||
print FOUT "\nEND LC_CTYPE\n";
|
||||
close (FOUT);
|
||||
}
|
||||
|
||||
sub already_seen {
|
||||
my $ucode = shift;
|
||||
if (defined $seen{$ucode}) {
|
||||
return 1;
|
||||
}
|
||||
$pending_seen{$ucode} = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub already_seen_RO {
|
||||
my $ucode = shift;
|
||||
if (defined $seen{$ucode}) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub merge_seen {
|
||||
foreach my $sn (keys %pending_seen) {
|
||||
$seen{$sn} = 1;
|
||||
}
|
||||
%pending_seen = ();
|
||||
}
|
||||
|
||||
sub initialize_lines {
|
||||
my @result = ();
|
||||
my $terr = shift;
|
||||
my $n;
|
||||
my $back2hex;
|
||||
my @types = ("graph", "alpha");
|
||||
if ($terr eq "ja_JP") {
|
||||
foreach my $T (@types) {
|
||||
push @result, "$T\t<CJK_UNIFIED_IDEOGRAPH-3400>;/\n";
|
||||
for ($n = hex("3401"); $n <= hex("4DB4"); $n++) {
|
||||
$back2hex=sprintf("%X", $n);
|
||||
push @result, "\t<CJK_UNIFIED_IDEOGRAPH-" .
|
||||
$back2hex . ">;/\n";
|
||||
}
|
||||
push @result, "\t<CJK_UNIFIED_IDEOGRAPH-4DB5>\n";
|
||||
push @result, "$T\t<CJK_UNIFIED_IDEOGRAPH-4E00>;/\n";
|
||||
for ($n = hex("4E01"); $n <= hex("9FCB"); $n++) {
|
||||
$back2hex=sprintf("%X", $n);
|
||||
push @result, "\t<CJK_UNIFIED_IDEOGRAPH-" .
|
||||
$back2hex . ">;/\n";
|
||||
}
|
||||
push @result, "\t<CJK_UNIFIED_IDEOGRAPH-9FCC>\n";
|
||||
}
|
||||
push @result, "merge\tnow\n";
|
||||
}
|
||||
return @result;
|
||||
}
|
||||
|
||||
sub compress_ctype {
|
||||
my $territory = shift;
|
||||
my $term;
|
||||
my $active = 0;
|
||||
my $cat_loaded = 0;
|
||||
my $lock_ID;
|
||||
my $prev_ID;
|
||||
my $curr_ID;
|
||||
my $lock_name;
|
||||
my $prev_name;
|
||||
my $curr_name;
|
||||
my $key_name;
|
||||
my $category = '';
|
||||
|
||||
my @lines = initialize_lines ($territory);
|
||||
|
||||
my $filename = "$CLDRDIR/posix/$territory.UTF-8.src";
|
||||
if (! -f $filename) {
|
||||
print STDERR "Cannot open $filename\n";
|
||||
return;
|
||||
}
|
||||
open(FIN, "$filename");
|
||||
print "Reading from $filename\n";
|
||||
while (<FIN>) {
|
||||
if (/^LC_CTYPE/../^END LC_CTYPE/) {
|
||||
if ($_ ne "LC_CTYPE\n" && $_ ne "END LC_CTYPE\n" &&
|
||||
$_ ne "*************\n" && $_ ne "\n") {
|
||||
push @lines, $_;
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FIN);
|
||||
foreach my $line (@lines) {
|
||||
if ($line =~ m/^([a-z]{3,})\t/) {
|
||||
$category = $1;
|
||||
if ($category eq 'merge') {
|
||||
merge_seen;
|
||||
next;
|
||||
}
|
||||
if ($category ne 'print') {
|
||||
$cat_loaded = 1;
|
||||
}
|
||||
}
|
||||
next if ($category eq 'print');
|
||||
if ($category eq 'toupper' || $category eq 'tolower') {
|
||||
if ($line =~ m/<([-_A-Za-z0-9]+)>,/) {
|
||||
$key_name = $1;
|
||||
$key_name =~ s/_/ /g;
|
||||
if (already_seen_RO (hex($utf8map{$key_name}))) {
|
||||
next;
|
||||
}
|
||||
if ($cat_loaded) { print FOUT $category; }
|
||||
$cat_loaded = 0;
|
||||
$line =~ s/^[a-z]{3,}\t/\t/;
|
||||
print FOUT $line;
|
||||
}
|
||||
next;
|
||||
}
|
||||
if ($line =~ m/<([-_A-Za-z0-9]+)>(;.|)$/) {
|
||||
$term = ($2 eq '') ? 1 : 0;
|
||||
$curr_name = $1;
|
||||
$key_name = $1;
|
||||
$key_name =~ s/_/ /g;
|
||||
$curr_ID = hex($utf8map{$key_name});
|
||||
if (already_seen ($curr_ID)) {
|
||||
next;
|
||||
}
|
||||
if ($active) {
|
||||
if ($curr_ID == $prev_ID + 1) {
|
||||
$prev_ID = $curr_ID;
|
||||
$prev_name = $curr_name;
|
||||
} else {
|
||||
if ($cat_loaded) { print FOUT $category; }
|
||||
$cat_loaded = 0;
|
||||
if ($prev_ID == $lock_ID) {
|
||||
print FOUT "\t<" . $prev_name . ">;/\n";
|
||||
} elsif ($prev_ID - 1 == $lock_ID) {
|
||||
print FOUT "\t<" . $lock_name . ">;/\n";
|
||||
print FOUT "\t<" . $prev_name . ">;/\n";
|
||||
} else {
|
||||
print FOUT "\t<" . $lock_name .
|
||||
">;...;<" . $prev_name . ">;/\n";
|
||||
}
|
||||
$lock_ID = $curr_ID;
|
||||
$prev_ID = $curr_ID;
|
||||
$lock_name = $curr_name;
|
||||
$prev_name = $curr_name;
|
||||
}
|
||||
} else {
|
||||
$active = 1;
|
||||
$lock_ID = $curr_ID;
|
||||
$prev_ID = $curr_ID;
|
||||
$lock_name = $curr_name;
|
||||
$prev_name = $curr_name;
|
||||
}
|
||||
if ($term) {
|
||||
if ($cat_loaded) { print FOUT $category; }
|
||||
$cat_loaded = 0;
|
||||
if ($curr_ID == $lock_ID) {
|
||||
print FOUT "\t<" . $curr_name . ">\n";
|
||||
} elsif ($curr_ID == $lock_ID + 1) {
|
||||
print FOUT "\t<" . $lock_name . ">;/\n";
|
||||
print FOUT "\t<" . $curr_name . ">\n";
|
||||
} else {
|
||||
print FOUT "\t<" . $lock_name .
|
||||
">;...;<" . $curr_name . ">\n";
|
||||
}
|
||||
$active = 0;
|
||||
}
|
||||
} else {
|
||||
print FOUT $line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub generate_sections {
|
||||
foreach my $section (@SECTIONS ) {
|
||||
print FOUT "\n";
|
||||
print FOUT $stars;
|
||||
print FOUT @$section[1];
|
||||
print FOUT $stars;
|
||||
compress_ctype (@$section[0]);
|
||||
merge_seen;
|
||||
}
|
||||
my @lines = ();
|
||||
open(FIN, "$manual_file");
|
||||
print "Reading from $manual_file\n";
|
||||
while (<FIN>) {
|
||||
push @lines, $_;
|
||||
}
|
||||
close(FIN);
|
||||
foreach my $line (@lines) {
|
||||
print FOUT $line;
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue