wine/tools/make_unicode
Nikolay Sivov 1ad2cb51c5 nls: Hardcode minus sign for all locales.
CLDR data for RTL locales includes LRM marker for minus sign text,
in addition to that some locales are using different dash character.
Windows seems to consistently use same character for all locales.

Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52866
Signed-off-by: Nikolay Sivov <nsivov@codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
2022-04-27 13:24:22 +02:00

5407 lines
212 KiB
Perl
Executable file

#!/usr/bin/perl -w
#
# Generate code page .c files from ftp.unicode.org descriptions
#
# Copyright 2000 Alexandre Julliard
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
use strict;
use XML::LibXML;
use Encode;
# base URLs for www.unicode.org files
my $UNIVERSION = "14.0.0";
my $UNIDATA = "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip";
my $UNIHAN = "https://www.unicode.org/Public/$UNIVERSION/ucd/Unihan.zip";
my $IDNADATA = "https://www.unicode.org/Public/idna/$UNIVERSION";
my $JISDATA = "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS";
my $KSCDATA = "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC";
my $REPORTS = "http://www.unicode.org/reports";
my $MSDATA = "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498";
my $MSCODEPAGES = "$MSDATA/Windows Supported Code Page Data Files.zip";
my $CLDRVERSION = "41";
my $CLDRDATA = "https://github.com/unicode-org/cldr/archive/refs/tags/release-$CLDRVERSION.zip";
my $CLDR33DATA = "https://www.unicode.org/Public/cldr/33/cldr-common-33.0.zip";
my $ISO639VERSION = "20220120";
my $ISO639 = "https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_$ISO639VERSION.zip";
# Sort keys file
my $SORTKEYS = "tr10/allkeys.txt";
# Default char for undefined mappings
my $DEF_CHAR = ord '?';
# Last valid Unicode character
my $MAX_CHAR = 0x10ffff;
my @allfiles =
(
"CodpageFiles/037.txt",
"CodpageFiles/437.txt",
"CodpageFiles/500.txt",
"CodpageFiles/708.txt",
"CodpageFiles/720.txt",
"CodpageFiles/737.txt",
"CodpageFiles/775.txt",
"CodpageFiles/850.txt",
"CodpageFiles/852.txt",
"CodpageFiles/855.txt",
"CodpageFiles/857.txt",
"CodpageFiles/860.txt",
"CodpageFiles/861.txt",
"CodpageFiles/862.txt",
"CodpageFiles/863.txt",
"CodpageFiles/864.txt",
"CodpageFiles/865.txt",
"CodpageFiles/866.txt",
"CodpageFiles/869.txt",
"CodpageFiles/874.txt",
"CodpageFiles/875.txt",
"CodpageFiles/932.txt",
"CodpageFiles/936.txt",
"CodpageFiles/949.txt",
"CodpageFiles/950.txt",
"CodpageFiles/1026.txt",
"CodpageFiles/1250.txt",
"CodpageFiles/1251.txt",
"CodpageFiles/1252.txt",
"CodpageFiles/1253.txt",
"CodpageFiles/1254.txt",
"CodpageFiles/1255.txt",
"CodpageFiles/1256.txt",
"CodpageFiles/1257.txt",
"CodpageFiles/1258.txt",
"CodpageFiles/1361.txt",
"CodpageFiles/10000.txt",
"CodpageFiles/10001.txt",
"CodpageFiles/10002.txt",
"CodpageFiles/10003.txt",
"CodpageFiles/10004.txt",
"CodpageFiles/10005.txt",
"CodpageFiles/10006.txt",
"CodpageFiles/10007.txt",
"CodpageFiles/10008.txt",
"CodpageFiles/10010.txt",
"CodpageFiles/10017.txt",
"CodpageFiles/10021.txt",
"CodpageFiles/10029.txt",
"CodpageFiles/10079.txt",
"CodpageFiles/10081.txt",
"CodpageFiles/10082.txt",
"CodpageFiles/20127.txt",
"CodpageFiles/20866.txt",
"CodpageFiles/21866.txt",
"CodpageFiles/28591.txt",
"CodpageFiles/28592.txt",
"CodpageFiles/28593.txt",
"CodpageFiles/28594.txt",
"CodpageFiles/28595.txt",
"CodpageFiles/28596.txt",
"CodpageFiles/28597.txt",
"CodpageFiles/28598.txt",
"CodpageFiles/28599.txt",
"CodpageFiles/28603.txt",
"CodpageFiles/28605.txt",
);
my %ctype =
(
# CT_CTYPE1
"upper" => 0x0001,
"lower" => 0x0002,
"digit" => 0x0004,
"space" => 0x0008,
"punct" => 0x0010,
"cntrl" => 0x0020,
"blank" => 0x0040,
"xdigit" => 0x0080,
"alpha" => 0x0100 | 0x80000000,
"defin" => 0x0200,
# CT_CTYPE3 in high 16 bits
"nonspacing" => 0x00010000,
"diacritic" => 0x00020000,
"vowelmark" => 0x00040000,
"symbol" => 0x00080000,
"katakana" => 0x00100000,
"hiragana" => 0x00200000,
"halfwidth" => 0x00400000,
"fullwidth" => 0x00800000,
"ideograph" => 0x01000000,
"kashida" => 0x02000000,
"lexical" => 0x04000000,
"highsurrogate" => 0x08000000,
"lowsurrogate" => 0x10000000,
);
my %bracket_types =
(
"o" => 0x0000,
"c" => 0x0001,
);
my %indic_types =
(
"Other" => 0x0000,
"Bindu" => 0x0001,
"Visarga" => 0x0002,
"Avagraha" => 0x0003,
"Nukta" => 0x0004,
"Virama" => 0x0005,
"Vowel_Independent" => 0x0006,
"Vowel_Dependent" => 0x0007,
"Vowel" => 0x0008,
"Consonant_Placeholder" => 0x0009,
"Consonant" => 0x000a,
"Consonant_Dead" => 0x000b,
"Consonant_Succeeding_Repha" => 0x000c,
"Consonant_Subjoined" => 0x000d,
"Consonant_Medial" => 0x000e,
"Consonant_Final" => 0x000f,
"Consonant_Head_Letter" => 0x0010,
"Modifying_Letter" => 0x0011,
"Tone_Letter" => 0x0012,
"Tone_Mark" => 0x0013,
"Register_Shifter" => 0x0014,
"Consonant_Preceding_Repha" => 0x0015,
"Pure_Killer" => 0x0016,
"Invisible_Stacker" => 0x0017,
"Gemination_Mark" => 0x0018,
"Cantillation_Mark" => 0x0019,
"Non_Joiner" => 0x001a,
"Joiner" => 0x001b,
"Number_Joiner" => 0x001c,
"Number" => 0x001d,
"Brahmi_Joining_Number" => 0x001e,
"Consonant_With_Stacker" => 0x001f,
"Consonant_Prefixed" => 0x0020,
"Syllable_Modifier" => 0x0021,
"Consonant_Killer" => 0x0022,
"Consonant_Initial_Postfixed" => 0x0023,
);
my %matra_types =
(
"Right" => 0x01,
"Left" => 0x02,
"Visual_Order_Left" => 0x03,
"Left_And_Right" => 0x04,
"Top" => 0x05,
"Bottom" => 0x06,
"Top_And_Bottom" => 0x07,
"Top_And_Right" => 0x08,
"Top_And_Left" => 0x09,
"Top_And_Left_And_Right" => 0x0a,
"Bottom_And_Right" => 0x0b,
"Top_And_Bottom_And_Right" => 0x0c,
"Overstruck" => 0x0d,
"Invisible" => 0x0e,
"Bottom_And_Left" => 0x0f,
"Top_And_Bottom_And_Left" => 0x10,
);
my %break_types =
(
"BK" => 0x0001,
"CR" => 0x0002,
"LF" => 0x0003,
"CM" => 0x0004,
"SG" => 0x0005,
"GL" => 0x0006,
"CB" => 0x0007,
"SP" => 0x0008,
"ZW" => 0x0009,
"NL" => 0x000a,
"WJ" => 0x000b,
"JL" => 0x000c,
"JV" => 0x000d,
"JT" => 0x000e,
"H2" => 0x000f,
"H3" => 0x0010,
"XX" => 0x0011,
"OP" => 0x0012,
"CL" => 0x0013,
"CP" => 0x0014,
"QU" => 0x0015,
"NS" => 0x0016,
"EX" => 0x0017,
"SY" => 0x0018,
"IS" => 0x0019,
"PR" => 0x001a,
"PO" => 0x001b,
"NU" => 0x001c,
"AL" => 0x001d,
"ID" => 0x001e,
"IN" => 0x001f,
"HY" => 0x0020,
"BB" => 0x0021,
"BA" => 0x0022,
"SA" => 0x0023,
"AI" => 0x0024,
"B2" => 0x0025,
"HL" => 0x0026,
"CJ" => 0x0027,
"RI" => 0x0028,
"EB" => 0x0029,
"EM" => 0x002a,
"ZWJ" => 0x002b,
);
my %vertical_types =
(
"R" => 0x0000,
"U" => 0x0001,
"Tr" => 0x0002,
"Tu" => 0x0003,
);
my %categories =
(
"Lu" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}, # Letter, Uppercase
"Ll" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"lower"}, # Letter, Lowercase
"Lt" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}|$ctype{"lower"}, # Letter, Titlecase
"Mn" => $ctype{"defin"}|$ctype{"nonspacing"}, # Mark, Non-Spacing
"Mc" => $ctype{"defin"}, # Mark, Spacing Combining
"Me" => $ctype{"defin"}, # Mark, Enclosing
"Nd" => $ctype{"defin"}|$ctype{"digit"}, # Number, Decimal Digit
"Nl" => $ctype{"defin"}|$ctype{"alpha"}, # Number, Letter
"No" => $ctype{"defin"}, # Number, Other
"Zs" => $ctype{"defin"}|$ctype{"space"}, # Separator, Space
"Zl" => $ctype{"defin"}|$ctype{"space"}, # Separator, Line
"Zp" => $ctype{"defin"}|$ctype{"space"}, # Separator, Paragraph
"Cc" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Control
"Cf" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Format
"Cs" => $ctype{"defin"}, # Other, Surrogate
"Co" => $ctype{"defin"}, # Other, Private Use
"Cn" => $ctype{"defin"}, # Other, Not Assigned
"Lm" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Modifier
"Lo" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Other
"Pc" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Connector
"Pd" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Dash
"Ps" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Open
"Pe" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Close
"Pi" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Initial quote
"Pf" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Final quote
"Po" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Other
"Sm" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Math
"Sc" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Currency
"Sk" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Modifier
"So" => $ctype{"defin"}|$ctype{"symbol"} # Symbol, Other
);
# a few characters need additional categories that cannot be determined automatically
my %special_categories =
(
"xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
"space" => [ 0x09..0x0d, 0x85 ],
"blank" => [ 0x09, 0x20, 0xa0, 0x3000, 0xfeff ],
"cntrl" => [ 0x070f, 0x200c, 0x200d,
0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e,
0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
0xfff9, 0xfffa, 0xfffb ],
"punct" => [ 0x24, 0x2b, 0x3c..0x3e, 0x5e, 0x60, 0x7c, 0x7e, 0xa2..0xbe,
0xd7, 0xf7 ],
"digit" => [ 0xb2, 0xb3, 0xb9 ],
"lower" => [ 0xaa, 0xba, 0x2071, 0x207f ],
"nonspacing" => [ 0xc0..0xc5, 0xc7..0xcf, 0xd1..0xd6, 0xd8..0xdd, 0xe0..0xe5, 0xe7..0xef,
0xf1..0xf6, 0xf8..0xfd, 0xff, 0x6de, 0x1929..0x192b, 0x302e..0x302f ],
"diacritic" => [ 0x5e, 0x60, 0xb7, 0xd8, 0xf8 ],
"symbol" => [ 0x09..0x0d, 0x20..0x23, 0x25, 0x26, 0x28..0x2a, 0x2c, 0x2e..0x2f, 0x3a..0x40,
0x5b..0x60, 0x7b..0x7e, 0xa0..0xa9, 0xab..0xb1, 0xb4..0xb8, 0xbb, 0xbf,
0x02b9..0x02ba, 0x02c6..0x02cf ],
"halfwidth" => [ 0x20..0x7e, 0xa2..0xa3, 0xa5..0xa6, 0xac, 0xaf, 0x20a9 ],
"fullwidth" => [ 0x2018..0x2019, 0x201c..0x201d, 0x3000..0x3002, 0x300c..0x300d, 0x309b..0x309c,
0x30a1..0x30ab, 0x30ad, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9,
0x30bb, 0x30bd, 0x30bf, 0x30c1, 0x30c3, 0x30c4, 0x30c6, 0x30c8, 0x30ca..0x30cf,
0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de..0x30ed, 0x30ef, 0x30f2..0x30f3, 0x30fb,
0x3131..0x3164 ],
"ideograph" => [ 0x3006..0x3007 ],
"lexical" => [ 0x22, 0x24, 0x27, 0x2d, 0x2f, 0x3d, 0x40, 0x5c, 0x5e..0x60, 0x7e,
0xa8, 0xaa, 0xad, 0xaf, 0xb4, 0xb8, 0xba,
0x02b0..0x02b8, 0x02bc, 0x02c7, 0x02ca..0x02cb, 0x02cf, 0x02d8..0x02dd, 0x02e0..0x02e3,
0x037a, 0x0384..0x0385, 0x0387, 0x0559..0x055a, 0x0640, 0x1fbd..0x1fc1,
0x1fcd..0x1fcf, 0x1fdd..0x1fdf, 0x1fed..0x1fef, 0x1ffd..0x1ffe, 0x2010..0x2015,
0x2032..0x2034, 0x2038, 0x2043..0x2044, 0x207b..0x207c, 0x207f, 0x208b..0x208c,
0x2212, 0x2215..0x2216, 0x2500, 0x2504..0x2505, 0x2508..0x2509, 0x254c..0x254d,
0x3003, 0x301c, 0x3030..0x3035, 0x309b..0x309e, 0x30fd..0x30fe, 0xfe31..0xfe32,
0xfe58, 0xfe63, 0xfe66, 0xfe68..0xfe69, 0xfe6b, 0xff04, 0xff07, 0xff0d, 0xff0f,
0xff1d, 0xff20, 0xff3c, 0xff3e, 0xff40, 0xff5e ],
"kashida" => [ 0x0640 ],
);
my %directions =
(
"L" => 1, # Left-to-Right
"R" => 2, # Right-to-Left
"AL" => 12, # Right-to-Left Arabic
"EN" => 3, # European Number
"ES" => 4, # European Number Separator
"ET" => 5, # European Number Terminator
"AN" => 6, # Arabic Number
"CS" => 7, # Common Number Separator
"NSM" => 13, # Non-Spacing Mark
"BN" => 14, # Boundary Neutral
"B" => 8, # Paragraph Separator
"S" => 9, # Segment Separator
"WS" => 10, # Whitespace
"ON" => 11, # Other Neutrals
"LRE" => 15, # Left-to-Right Embedding
"LRO" => 15, # Left-to-Right Override
"RLE" => 15, # Right-to-Left Embedding
"RLO" => 15, # Right-to-Left Override
"PDF" => 15, # Pop Directional Format
"LRI" => 15, # Left-to-Right Isolate
"RLI" => 15, # Right-to-Left Isolate
"FSI" => 15, # First Strong Isolate
"PDI" => 15 # Pop Directional Isolate
);
my %c2_types =
(
"L" => 1, # C2_LEFTTORIGHT
"R" => 2, # C2_RIGHTTOLEFT
"AL" => 2, # C2_RIGHTTOLEFT
"EN" => 3, # C2_EUROPENUMBER
"ES" => 4, # C2_EUROPESEPARATOR
"ET" => 5, # C2_EUROPETERMINATOR
"AN" => 6, # C2_ARABICNUMBER
"CS" => 7, # C2_COMMONSEPARATOR
"NSM" => 11, # C2_OTHERNEUTRAL
"BN" => 0, # C2_NOTAPPLICABLE
"B" => 8, # C2_BLOCKSEPARATOR
"S" => 9, # C2_SEGMENTSEPARATOR
"WS" => 10, # C2_WHITESPACE
"ON" => 11, # C2_OTHERNEUTRAL
"LRE" => 11, # C2_OTHERNEUTRAL
"LRO" => 11, # C2_OTHERNEUTRAL
"RLE" => 11, # C2_OTHERNEUTRAL
"RLO" => 11, # C2_OTHERNEUTRAL
"PDF" => 11, # C2_OTHERNEUTRAL
"LRI" => 11, # C2_OTHERNEUTRAL
"RLI" => 11, # C2_OTHERNEUTRAL
"FSI" => 11, # C2_OTHERNEUTRAL
"PDI" => 11 # C2_OTHERNEUTRAL
);
my %bidi_types =
(
"ON" => 0, # Other Neutrals
"L" => 1, # Left-to-Right
"R" => 2, # Right-to-Left
"AN" => 3, # Arabic Number
"EN" => 4, # European Number
"AL" => 5, # Right-to-Left Arabic
"NSM" => 6, # Non-Spacing Mark
"CS" => 7, # Common Number Separator
"ES" => 8, # European Number Separator
"ET" => 9, # European Number Terminator
"BN" => 10, # Boundary Neutral
"S" => 11, # Segment Separator
"WS" => 12, # Whitespace
"B" => 13, # Paragraph Separator
"RLO" => 14, # Right-to-Left Override
"RLE" => 15, # Right-to-Left Embedding
"LRO" => 16, # Left-to-Right Override
"LRE" => 17, # Left-to-Right Embedding
"PDF" => 18, # Pop Directional Format
"LRI" => 19, # Left-to-Right Isolate
"RLI" => 20, # Right-to-Left Isolate
"FSI" => 21, # First Strong Isolate
"PDI" => 22 # Pop Directional Isolate
);
my %joining_types =
(
"U" => 0, # Non_Joining
"L" => 1, # Left_Joining
"R" => 2, # Right_Joining
"D" => 3, # Dual_Joining
"C" => 3, # Join_Causing
"ALAPH" => 4, # Syriac ALAPH
"DALATH RISH" => 5, # Syriac DALATH RISH group
"T" => 6, # Transparent
);
my @locales =
(
{ name => "", lcid => 0x0000007f, file => "root", territory => "IV", sabbrevlangname => "IVL", sopentypelang =>"dflt" },
{ name => "aa", dir => "seed", sopentypelang => "AFR" },
{ name => "aa-DJ", dir => "seed" },
{ name => "aa-ER", dir => "seed" },
{ name => "aa-ET", dir => "seed" },
{ name => "af", lcid => 0x00000036, oemcp => 850, sabbrevlangname => "AFK", sopentypelang => "AFK" },
{ name => "af-NA" },
{ name => "af-ZA", lcid => 0x00000436 },
{ name => "agq" },
{ name => "agq-CM" },
{ name => "ak", sopentypelang => "TWI" },
{ name => "ak-GH" },
{ name => "am", lcid => 0x0000005e, sabbrevlangname => "AMH" },
{ name => "am-ET", lcid => 0x0000045e },
{ name => "ar", lcid => 0x00000001, territory => "SA", oemcp => 720, group => 13 },
{ name => "ar-001" },
{ name => "ar-AE", lcid => 0x00003801, sabbrevlangname => "ARU" },
{ name => "ar-BH", lcid => 0x00003c01, sabbrevlangname => "ARH" },
{ name => "ar-DJ" },
{ name => "ar-DZ", lcid => 0x00001401, sabbrevlangname => "ARG" },
{ name => "ar-EG", lcid => 0x00000c01, sabbrevlangname => "ARE" },
{ name => "ar-EH" },
{ name => "ar-ER" },
{ name => "ar-IL" },
{ name => "ar-IQ", lcid => 0x00000801, sabbrevlangname => "ARI" },
{ name => "ar-JO", lcid => 0x00002c01, sabbrevlangname => "ARJ" },
{ name => "ar-KM" },
{ name => "ar-KW", lcid => 0x00003401, sabbrevlangname => "ARK" },
{ name => "ar-LB", lcid => 0x00003001, sabbrevlangname => "ARB" },
{ name => "ar-LY", lcid => 0x00001001, sabbrevlangname => "ARL" },
{ name => "ar-MA", lcid => 0x00001801, sabbrevlangname => "ARM" },
{ name => "ar-MR" },
{ name => "ar-OM", lcid => 0x00002001, sabbrevlangname => "ARO" },
{ name => "ar-PS" },
{ name => "ar-QA", lcid => 0x00004001, sabbrevlangname => "ARQ" },
{ name => "ar-SA", lcid => 0x00000401, sabbrevlangname => "ARA" },
{ name => "ar-SD" },
{ name => "ar-SO" },
{ name => "ar-SS" },
{ name => "ar-SY", lcid => 0x00002801, sabbrevlangname => "ARS" },
{ name => "ar-TD" },
{ name => "ar-TN", lcid => 0x00001c01, sabbrevlangname => "ART" },
{ name => "ar-YE", lcid => 0x00002401, sabbrevlangname => "ARY" },
{ name => "arn", lcid => 0x0000007a, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed", sabbrevlangname => "MPD", sopentypelang => "MAP" },
{ name => "arn-CL", lcid => 0x0000047a, dir => "seed" },
{ name => "arn-Latn", alias => "arn" },
{ name => "arn-Latn-CL", alias => "arn-CL" },
{ name => "as", lcid => 0x0000004d, slist => ",", group => 15 },
{ name => "as-IN", lcid => 0x0000044d },
{ name => "asa" },
{ name => "asa-TZ" },
{ name => "ast" },
{ name => "ast-ES" },
{ name => "az", lcid => 0x0000002c, oemcp => 857, ebcdiccp => 20905, group => 2 },
{ name => "az-Cyrl", lcid => 0x0000742c, oemcp => 866, ebcdiccp => 20880, group => 5, sabbrevlangname => "AZC" },
{ name => "az-Cyrl-AZ", lcid => 0x0000082c },
{ name => "az-Latn", lcid => 0x0000782c },
{ name => "az-Latn-AZ", lcid => 0x0000042c },
{ name => "ba", lcid => 0x0000006d, oemcp => 866, group => 5, dir => "seed", sabbrevlangname => "BAS", sopentypelang => "BSH" },
{ name => "ba-Cyrl", alias => "ba" },
{ name => "ba-Cyrl-RU", alias => "ba-RU" },
{ name => "ba-RU", lcid => 0x0000046d, dir => "seed" },
{ name => "bas" },
{ name => "bas-CM" },
{ name => "be", lcid => 0x00000023, oemcp => 866, ebcdiccp => 500, group => 5 },
{ name => "be-BY", lcid => 0x00000423 },
{ name => "bem" },
{ name => "bem-ZM" },
{ name => "bez" },
{ name => "bez-TZ" },
{ name => "bg", lcid => 0x00000002, oemcp => 866, ebcdiccp => 21025, group => 5, sabbrevlangname => "BGR", sopentypelang => "BGR" },
{ name => "bg-BG", lcid => 0x00000402 },
{ name => "bin", lcid => 0x00000066, oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "EDO" },
{ name => "bin-NG", lcid => 0x00000466, file => "bin", dir => "exemplars" },
{ name => "bm", sopentypelang => "BMB" },
{ name => "bm-Latn", file => "bm" },
{ name => "bm-Latn-ML", file => "bm_ML" },
{ name => "bm-ML", alias => "bm-Latn-ML" },
{ name => "bn", lcid => 0x00000045, slist => ",", group => 15, sabbrevlangname => "BNB" },
{ name => "bn-BD", lcid => 0x00000845 },
{ name => "bn-IN", lcid => 0x00000445, sabbrevlangname => "BNG" },
{ name => "bo", lcid => 0x00000051, slist => ",", group => 15, sabbrevlangname => "BOB", sopentypelang => "TIB" },
{ name => "bo-CN", lcid => 0x00000451 },
{ name => "bo-IN", slist => "," },
{ name => "bo-Tibt", alias => "bo" },
{ name => "bo-Tibt-CN", alias => "bo-CN" },
{ name => "bo-Tibt-IN", alias => "bo-IN" },
{ name => "br", lcid => 0x0000007e, oemcp => 850, ebcdiccp => 20297 },
{ name => "br-FR", lcid => 0x0000047e },
{ name => "br-Latn", alias => "br" },
{ name => "br-Latn-FR", alias => "br-FR" },
{ name => "brx" },
{ name => "brx-IN" },
{ name => "bs", lcid => 0x0000781a, oemcp => 852, maccp => 10082, ebcdiccp => 870, group => 2, sabbrevlangname => "BSB" },
{ name => "bs-Cyrl", lcid => 0x0000641a, oemcp => 855, group => 5, sabbrevlangname => "BSC" },
{ name => "bs-Cyrl-BA", lcid => 0x0000201a },
{ name => "bs-Latn", lcid => 0x0000681a },
{ name => "bs-Latn-BA", lcid => 0x0000141a },
{ name => "byn", dir => "seed", sopentypelang => "BIL" },
{ name => "byn-ER", dir => "seed" },
{ name => "ca", lcid => 0x00000003, oemcp => 850 },
{ name => "ca-AD", maccp => 65001 },
{ name => "ca-ES", lcid => 0x00000403 },
{ name => "ca-ES-valencia", lcid => 0x00000803, file => "ca_ES_VALENCIA", sabbrevlangname => "VAL" },
{ name => "ca-FR", maccp => 65001 },
{ name => "ca-IT", maccp => 65001 },
{ name => "ccp" },
{ name => "ccp-BD", alias => "ccp-Cakm-BD" },
{ name => "ccp-Cakm", file => "ccp" },
{ name => "ccp-Cakm-BD", file => "ccp_BD" },
{ name => "ccp-Cakm-IN", file => "ccp_IN" },
{ name => "ccp-IN", alias => "ccp-Cakm-IN" },
{ name => "ce" },
{ name => "ce-RU" },
{ name => "ceb" },
{ name => "ceb-Latn", file => "ceb" },
{ name => "ceb-Latn-PH", file => "ceb_PH" },
{ name => "ceb-PH", alias => "ceb-Latn-PH" },
{ name => "cgg" },
{ name => "cgg-UG" },
{ name => "chr", lcid => 0x0000005c, slist => ",", sabbrevlangname => "CRE" },
{ name => "chr-Cher", lcid => 0x00007c5c, file => "chr" },
{ name => "chr-Cher-US", lcid => 0x0000045c, file => "chr_US" },
{ name => "chr-US", alias => "chr-Cher-US" },
{ name => "ckb", alias => "ku" },
{ name => "ckb-IQ", alias => "ku-Arab-IQ" },
{ name => "ckb-IR", alias => "ku-Arab-IR" },
{ name => "co", lcid => 0x00000083, oemcp => 850, ebcdiccp => 20297, dir => "seed" },
{ name => "co-FR", lcid => 0x00000483, dir => "seed" },
{ name => "co-Latn", alias => "co" },
{ name => "co-Latn-FR", alias => "co-FR" },
{ name => "cs", lcid => 0x00000005, oemcp => 852, group => 2, sabbrevlangname => "CSY", sopentypelang => "CSY" },
{ name => "cs-CZ", lcid => 0x00000405 },
{ name => "cu", dir => "seed", sopentypelang => "CSL" },
{ name => "cu-RU", dir => "seed" },
{ name => "cy", lcid => 0x00000052, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "CYM", sopentypelang => "WEL" },
{ name => "cy-GB", lcid => 0x00000452 },
{ name => "da", lcid => 0x00000006, oemcp => 850, ebcdiccp => 20277 },
{ name => "da-DK", lcid => 0x00000406 },
{ name => "da-GL", maccp => 65001 },
{ name => "dav" },
{ name => "dav-KE" },
{ name => "de", lcid => 0x00000007, oemcp => 850, ebcdiccp => 20273 },
{ name => "de-AT", lcid => 0x00000c07, sabbrevlangname => "DEA" },
{ name => "de-BE" },
{ name => "de-CH", lcid => 0x00000807, sabbrevlangname => "DES" },
{ name => "de-DE", lcid => 0x00000407 },
{ name => "de-DE_phoneb", lcid => 0x00010407, alias => "de-DE" },
{ name => "de-DE-u-co-phonebk", alias => "de-DE_phoneb" },
{ name => "de-IT", oemcp => 65001 },
{ name => "de-LI", lcid => 0x00001407, sabbrevlangname => "DEC" },
{ name => "de-LU", lcid => 0x00001007, sabbrevlangname => "DEL" },
{ name => "dje", sopentypelang => "DJR" },
{ name => "dje-NE" },
{ name => "doi" },
{ name => "doi-IN" },
{ name => "dsb", lcid => 0x00007c2e, sparent => "hsb", oemcp => 850, ebcdiccp => 870, sabbrevlangname => "DSB", sopentypelang => "LSB" },
{ name => "dsb-DE", lcid => 0x0000082e },
{ name => "dua" },
{ name => "dua-CM" },
{ name => "dv", lcid => 0x00000065, slist => "\x{060c}", group => 13, dir => "seed" },
{ name => "dv-MV", lcid => 0x00000465, dir => "seed" },
{ name => "dyo" },
{ name => "dyo-SN" },
{ name => "dz", sopentypelang => "DZN" },
{ name => "dz-BT", lcid => 0x00000c51, sabbrevlangname => "ZZZ" },
{ name => "ebu" },
{ name => "ebu-KE" },
{ name => "ee" },
{ name => "ee-GH" },
{ name => "ee-TG" },
{ name => "el", lcid => 0x00000008, oemcp => 737, group => 4 },
{ name => "el-CY" },
{ name => "el-GR", lcid => 0x00000408 },
{ name => "en", lcid => 0x00000009, oemcp => 437, slist => ",", sabbrevlangname => "ENU" },
{ name => "en-001", oemcp => 850 },
{ name => "en-029", lcid => 0x00002409, file => "en", oemcp => 850, sabbrevlangname => "ENB" },
{ name => "en-150", oemcp => 65001 },
{ name => "en-AE", lcid => 0x00004c09, oemcp => 65001, sabbrevlangname => "ZZZ" },
{ name => "en-AG", oemcp => 850 },
{ name => "en-AI", oemcp => 850 },
{ name => "en-AS", oemcp => 850 },
{ name => "en-AT", oemcp => 65001 },
{ name => "en-AU", lcid => 0x00000c09, oemcp => 850, sabbrevlangname => "ENA" },
{ name => "en-BB", oemcp => 850 },
{ name => "en-BE", oemcp => 850 },
{ name => "en-BI", oemcp => 65001 },
{ name => "en-BM", oemcp => 850 },
{ name => "en-BS", oemcp => 850 },
{ name => "en-BW", oemcp => 850 },
{ name => "en-BZ", lcid => 0x00002809, oemcp => 850, sabbrevlangname => "ENL" },
{ name => "en-CA", lcid => 0x00001009, oemcp => 850, ebcdiccp => 37, sabbrevlangname => "ENC" },
{ name => "en-CC", oemcp => 850 },
{ name => "en-CH", oemcp => 65001 },
{ name => "en-CK", oemcp => 850 },
{ name => "en-CM", oemcp => 850 },
{ name => "en-CX", oemcp => 850 },
{ name => "en-CY", oemcp => 65001 },
{ name => "en-DE", oemcp => 65001 },
{ name => "en-DG", oemcp => 850 },
{ name => "en-DK", oemcp => 65001 },
{ name => "en-DM", oemcp => 850 },
{ name => "en-ER", oemcp => 850 },
{ name => "en-FI", oemcp => 65001 },
{ name => "en-FJ", oemcp => 850 },
{ name => "en-FK", oemcp => 850 },
{ name => "en-FM", oemcp => 850 },
{ name => "en-GB", lcid => 0x00000809, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "ENG" },
{ name => "en-GD", oemcp => 850 },
{ name => "en-GG", oemcp => 850 },
{ name => "en-GH", oemcp => 850 },
{ name => "en-GI", oemcp => 850 },
{ name => "en-GM", oemcp => 850 },
{ name => "en-GU", oemcp => 850 },
{ name => "en-GY", oemcp => 850 },
{ name => "en-HK", lcid => 0x00003c09, oemcp => 850, sabbrevlangname => "ENH" },
{ name => "en-ID", lcid => 0x00003809, file => "en", oemcp => 850, sabbrevlangname => "ZZZ" },
{ name => "en-IE", lcid => 0x00001809, oemcp => 850, sabbrevlangname => "ENI" },
{ name => "en-IL", oemcp => 65001 },
{ name => "en-IM", oemcp => 850 },
{ name => "en-IN", lcid => 0x00004009, sabbrevlangname => "ENN" },
{ name => "en-IO", oemcp => 850 },
{ name => "en-JE", oemcp => 850 },
{ name => "en-JM", lcid => 0x00002009, oemcp => 850, sabbrevlangname => "ENJ" },
{ name => "en-KE", oemcp => 850 },
{ name => "en-KI", oemcp => 850 },
{ name => "en-KN", oemcp => 850 },
{ name => "en-KY", oemcp => 850 },
{ name => "en-LC", oemcp => 850 },
{ name => "en-LR", oemcp => 850 },
{ name => "en-LS", oemcp => 850 },
{ name => "en-MG", oemcp => 850 },
{ name => "en-MH", oemcp => 850 },
{ name => "en-MO", oemcp => 850 },
{ name => "en-MP", oemcp => 850 },
{ name => "en-MS", oemcp => 850 },
{ name => "en-MT", oemcp => 850 },
{ name => "en-MU", oemcp => 850 },
{ name => "en-MW", oemcp => 850 },
{ name => "en-MY", lcid => 0x00004409, sabbrevlangname => "ENM" },
{ name => "en-NA", oemcp => 850 },
{ name => "en-NF", oemcp => 850 },
{ name => "en-NG", oemcp => 850 },
{ name => "en-NL", oemcp => 65001 },
{ name => "en-NR", oemcp => 850 },
{ name => "en-NU", oemcp => 850 },
{ name => "en-NZ", lcid => 0x00001409, oemcp => 850, sabbrevlangname => "ENZ" },
{ name => "en-PG", oemcp => 850 },
{ name => "en-PH", lcid => 0x00003409, ebcdiccp => 500, sabbrevlangname => "ENP" },
{ name => "en-PK", oemcp => 850 },
{ name => "en-PN", oemcp => 850 },
{ name => "en-PR", oemcp => 850 },
{ name => "en-PW", oemcp => 850 },
{ name => "en-RW", oemcp => 850 },
{ name => "en-SB", oemcp => 850 },
{ name => "en-SC", oemcp => 850 },
{ name => "en-SD", oemcp => 850 },
{ name => "en-SE", oemcp => 65001 },
{ name => "en-SG", lcid => 0x00004809, sabbrevlangname => "ENE" },
{ name => "en-SH", oemcp => 850 },
{ name => "en-SI", oemcp => 65001 },
{ name => "en-SL", oemcp => 850 },
{ name => "en-SS", oemcp => 850 },
{ name => "en-SX", oemcp => 850 },
{ name => "en-SZ", oemcp => 850 },
{ name => "en-TC", oemcp => 850 },
{ name => "en-TK", oemcp => 850 },
{ name => "en-TO", oemcp => 850 },
{ name => "en-TT", lcid => 0x00002c09, oemcp => 850, sabbrevlangname => "ENT" },
{ name => "en-TV", oemcp => 850 },
{ name => "en-TZ", oemcp => 850 },
{ name => "en-UG", oemcp => 850 },
{ name => "en-UM", oemcp => 850 },
{ name => "en-US", lcid => 0x00000409 },
{ name => "en-VC", oemcp => 850 },
{ name => "en-VG", oemcp => 850 },
{ name => "en-VI", oemcp => 850 },
{ name => "en-VU", oemcp => 850 },
{ name => "en-WS", oemcp => 850 },
{ name => "en-ZA", lcid => 0x00001c09, ebcdiccp => 500, sabbrevlangname => "ENS" },
{ name => "en-ZM", oemcp => 850 },
{ name => "en-ZW", lcid => 0x00003009, ebcdiccp => 500, sabbrevlangname => "ENW" },
{ name => "eo", sopentypelang => "NTO" },
{ name => "eo-001" },
{ name => "es", lcid => 0x0000000a, oemcp => 850, ebcdiccp => 20284, sabbrevlangname => "ESP", sopentypelang => "ESP" },
{ name => "es-419", lcid => 0x0000580a, sabbrevlangname => "ESJ" },
{ name => "es-AR", lcid => 0x00002c0a, sabbrevlangname => "ESS" },
{ name => "es-BO", lcid => 0x0000400a, sabbrevlangname => "ESB" },
{ name => "es-BR", oemcp => 65001 },
{ name => "es-BZ", oemcp => 65001 },
{ name => "es-CL", lcid => 0x0000340a, sabbrevlangname => "ESL" },
{ name => "es-CO", lcid => 0x0000240a, sabbrevlangname => "ESO" },
{ name => "es-CR", lcid => 0x0000140a, sabbrevlangname => "ESC" },
{ name => "es-CU", lcid => 0x00005c0a, sabbrevlangname => "ESK" },
{ name => "es-DO", lcid => 0x00001c0a, sabbrevlangname => "ESD" },
{ name => "es-EA" },
{ name => "es-EC", lcid => 0x0000300a, sabbrevlangname => "ESF" },
{ name => "es-ES", lcid => 0x00000c0a, sabbrevlangname => "ESN" },
{ name => "es-ES_tradnl", lcid => 0x0000040a, file => "es_ES" },
{ name => "es-ES-u-co-trad", alias => "es-ES_tradnl" },
{ name => "es-GQ" },
{ name => "es-GT", lcid => 0x0000100a, sabbrevlangname => "ESG" },
{ name => "es-HN", lcid => 0x0000480a, sabbrevlangname => "ESH" },
{ name => "es-IC" },
{ name => "es-MX", lcid => 0x0000080a, sabbrevlangname => "ESM" },
{ name => "es-NI", lcid => 0x00004c0a, sabbrevlangname => "ESI" },
{ name => "es-PA", lcid => 0x0000180a, sabbrevlangname => "ESA" },
{ name => "es-PE", lcid => 0x0000280a, sabbrevlangname => "ESR" },
{ name => "es-PH" },
{ name => "es-PR", lcid => 0x0000500a, sabbrevlangname => "ESU" },
{ name => "es-PY", lcid => 0x00003c0a, sabbrevlangname => "ESZ" },
{ name => "es-SV", lcid => 0x0000440a, sabbrevlangname => "ESE" },
{ name => "es-US", lcid => 0x0000540a, sabbrevlangname => "EST" },
{ name => "es-UY", lcid => 0x0000380a, sabbrevlangname => "ESY" },
{ name => "es-VE", lcid => 0x0000200a, sabbrevlangname => "ESV" },
{ name => "et", lcid => 0x00000025, oemcp => 775, group => 3, sabbrevlangname => "ETI", sopentypelang => "ETI" },
{ name => "et-EE", lcid => 0x00000425 },
{ name => "eu", lcid => 0x0000002d, oemcp => 850, maccp => 65001, sabbrevlangname => "EUQ", sopentypelang => "EUQ" },
{ name => "eu-ES", lcid => 0x0000042d },
{ name => "ewo" },
{ name => "ewo-CM" },
{ name => "fa", lcid => 0x00000029, inegnumber => 3, oemcp => 720, slist => "\x{061b}", group => 13, sabbrevlangname => "FAR", sopentypelang => "FAR" },
{ name => "fa-AF", alias => "prs-AF" },
{ name => "fa-IR", lcid => 0x00000429 },
{ name => "ff", lcid => 0x00000067, oemcp => 850, ebcdiccp => 20297 },
{ name => "ff-CM", alias => "ff-Latn-CM" },
{ name => "ff-GN", alias => "ff-Latn-GN" },
{ name => "ff-MR", alias => "ff-Latn-MR" },
{ name => "ff-NG", alias => "ff-Latn-NG" },
{ name => "ff-SN", alias => "ff-Latn-SN" },
{ name => "ff-Adlm" },
{ name => "ff-Adlm-BF" },
{ name => "ff-Adlm-CM" },
{ name => "ff-Adlm-GH" },
{ name => "ff-Adlm-GM" },
{ name => "ff-Adlm-GN" },
{ name => "ff-Adlm-GW" },
{ name => "ff-Adlm-LR" },
{ name => "ff-Adlm-MR" },
{ name => "ff-Adlm-NE" },
{ name => "ff-Adlm-NG" },
{ name => "ff-Adlm-SL" },
{ name => "ff-Adlm-SN" },
{ name => "ff-Latn", lcid => 0x00007c67 },
{ name => "ff-Latn-BF", oemcp => 65001 },
{ name => "ff-Latn-CM" },
{ name => "ff-Latn-GH", oemcp => 65001 },
{ name => "ff-Latn-GM", oemcp => 65001 },
{ name => "ff-Latn-GN" },
{ name => "ff-Latn-GW", oemcp => 65001 },
{ name => "ff-Latn-LR", oemcp => 65001 },
{ name => "ff-Latn-MR" },
{ name => "ff-Latn-NE", oemcp => 65001 },
{ name => "ff-Latn-NG", lcid => 0x00000467, sabbrevlangname => "ZZZ" },
{ name => "ff-Latn-SL", oemcp => 65001 },
{ name => "ff-Latn-SN", lcid => 0x00000867 },
{ name => "fi", lcid => 0x0000000b, oemcp => 850, ebcdiccp => 20278 },
{ name => "fi-FI", lcid => 0x0000040b },
{ name => "fil", lcid => 0x00000064, oemcp => 437, ebcdiccp => 500, sabbrevlangname => "FPO", sopentypelang => "PIL" },
{ name => "fil-PH", lcid => 0x00000464 },
{ name => "fil-Latn", alias => "fil" },
{ name => "fil-Latn-PH", alias => "fil-PH" },
{ name => "fo", lcid => 0x00000038, oemcp => 850, maccp => 10079, ebcdiccp => 20277, sabbrevlangname => "FOS", sopentypelang => "FOS" },
{ name => "fo-DK", oemcp => 65001, maccp => 65001 },
{ name => "fo-FO", lcid => 0x00000438 },
{ name => "fr", lcid => 0x0000000c, oemcp => 850, ebcdiccp => 20297 },
{ name => "fr-029", lcid => 0x00001c0c, file => "fr", sabbrevlangname => "ZZZ" },
{ name => "fr-BE", lcid => 0x0000080c, sabbrevlangname => "FRB" },
{ name => "fr-BF" },
{ name => "fr-BI" },
{ name => "fr-BJ" },
{ name => "fr-BL" },
{ name => "fr-CA", lcid => 0x00000c0c, sabbrevlangname => "FRC" },
{ name => "fr-CD", lcid => 0x0000240c, sabbrevlangname => "FRD" },
{ name => "fr-CF" },
{ name => "fr-CG" },
{ name => "fr-CH", lcid => 0x0000100c, sabbrevlangname => "FRS" },
{ name => "fr-CI", lcid => 0x0000300c, sabbrevlangname => "FRI" },
{ name => "fr-CM", lcid => 0x00002c0c, sabbrevlangname => "FRE" },
{ name => "fr-DJ" },
{ name => "fr-DZ" },
{ name => "fr-FR", lcid => 0x0000040c },
{ name => "fr-GA" },
{ name => "fr-GF" },
{ name => "fr-GN" },
{ name => "fr-GP" },
{ name => "fr-GQ" },
{ name => "fr-HT", lcid => 0x00003c0c, sabbrevlangname => "FRH" },
{ name => "fr-KM" },
{ name => "fr-LU", lcid => 0x0000140c, sabbrevlangname => "FRL" },
{ name => "fr-MA", lcid => 0x0000380c, sabbrevlangname => "FRO" },
{ name => "fr-MC", lcid => 0x0000180c, sabbrevlangname => "FRM" },
{ name => "fr-MF" },
{ name => "fr-MG" },
{ name => "fr-ML", lcid => 0x0000340c, sabbrevlangname => "FRF" },
{ name => "fr-MQ" },
{ name => "fr-MR" },
{ name => "fr-MU" },
{ name => "fr-NC" },
{ name => "fr-NE" },
{ name => "fr-PF" },
{ name => "fr-PM" },
{ name => "fr-RE", lcid => 0x0000200c, sabbrevlangname => "FRR" },
{ name => "fr-RW" },
{ name => "fr-SC" },
{ name => "fr-SN", lcid => 0x0000280c, sabbrevlangname => "FRN" },
{ name => "fr-SY" },
{ name => "fr-TD" },
{ name => "fr-TG" },
{ name => "fr-TN" },
{ name => "fr-VU" },
{ name => "fr-WF" },
{ name => "fr-YT" },
{ name => "fur", sopentypelang => "FRL" },
{ name => "fur-IT" },
{ name => "fy", lcid => 0x00000062, oemcp => 850, sabbrevlangname => "FYN", sopentypelang => "FRI" },
{ name => "fy-NL", lcid => 0x00000462 },
{ name => "ga", lcid => 0x0000003c, oemcp => 850, sabbrevlangname => "IRE", sopentypelang => "IRI" },
{ name => "ga-GB" },
{ name => "ga-IE", lcid => 0x0000083c },
{ name => "gd", lcid => 0x00000091, oemcp => 850, ebcdiccp => 20285, sopentypelang => "GAE" },
{ name => "gd-GB", lcid => 0x00000491 },
{ name => "gd-Latn", alias => "gd" },
{ name => "gl", lcid => 0x00000056, oemcp => 850, sabbrevlangname => "GLC", sopentypelang => "GAL" },
{ name => "gl-ES", lcid => 0x00000456 },
{ name => "gn", lcid => 0x00000074, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed", sopentypelang => "GUA" },
{ name => "gn-PY", lcid => 0x00000474, dir => "seed" },
{ name => "gsw", lcid => 0x00000084, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "ZZZ", sopentypelang => "ALS" },
{ name => "gsw-CH" },
{ name => "gsw-FR", lcid => 0x00000484, sabbrevlangname => "GSW" },
{ name => "gsw-LI" },
{ name => "gu", lcid => 0x00000047, slist => ",", group => 15 },
{ name => "gu-IN", lcid => 0x00000447 },
{ name => "guz" },
{ name => "guz-KE" },
{ name => "gv", sopentypelang => "MNX" },
{ name => "gv-IM" },
{ name => "ha", lcid => 0x00000068, oemcp => 437 },
{ name => "ha-GH", alias => "ha-Latn-GH" },
{ name => "ha-Latn", lcid => 0x00007c68, file => "ha" },
{ name => "ha-Latn-GH", file => "ha_GH", ebcdiccp => 500 },
{ name => "ha-Latn-NE", file => "ha_NE", ebcdiccp => 500 },
{ name => "ha-Latn-NG", lcid => 0x00000468, file => "ha_NG" },
{ name => "ha-NE", alias => "ha-Latn-NE" },
{ name => "ha-NG", alias => "ha-Latn-NG" },
{ name => "haw", lcid => 0x00000075, oemcp => 437 },
{ name => "haw-Latn", alias => "haw" },
{ name => "haw-Latn-US", alias => "haw-US" },
{ name => "haw-US", lcid => 0x00000475 },
{ name => "he", lcid => 0x0000000d, oemcp => 862, slist => ",", group => 12, sopentypelang => "IWR" },
{ name => "he-IL", lcid => 0x0000040d },
{ name => "hi", lcid => 0x00000039, slist => ",", group => 15 },
{ name => "hi-IN", lcid => 0x00000439 },
{ name => "hr", lcid => 0x0000001a, inegnumber => 2, oemcp => 852, maccp => 10082, group => 2 },
{ name => "hr-BA", lcid => 0x0000101a, ebcdiccp => 870, inegnumber => 1, sabbrevlangname => "HRB" },
{ name => "hr-HR", lcid => 0x0000041a },
{ name => "hsb", lcid => 0x0000002e, oemcp => 850, ebcdiccp => 870, sopentypelang => "USB" },
{ name => "hsb-DE", lcid => 0x0000042e },
{ name => "hu", lcid => 0x0000000e, oemcp => 852, group => 2 },
{ name => "hu-HU", lcid => 0x0000040e },
{ name => "hu-HU_technl", lcid => 0x0001040e, alias => "hu-HU" },
{ name => "hy", lcid => 0x0000002b, slist => ",", group => 17 },
{ name => "hy-AM", lcid => 0x0000042b },
{ name => "ia" },
{ name => "ia-001" },
## name => "ibb", lcid => 0x00000069 },
## name => "ibb-NG", lcid => 0x00000469 },
{ name => "id", lcid => 0x00000021, oemcp => 850 },
{ name => "id-ID", lcid => 0x00000421 },
{ name => "ig", lcid => 0x00000070, oemcp => 437 },
{ name => "ig-Latn", alias => "ig" },
{ name => "ig-Latn-NG", alias => "ig-NG" },
{ name => "ig-NG", lcid => 0x00000470 },
{ name => "ii", lcid => 0x00000078, group => 9, sopentypelang => "YIM" },
{ name => "ii-CN", lcid => 0x00000478 },
{ name => "ii-Yiii", alias => "ii" },
{ name => "ii-Yiii-CN", alias => "ii-CN" },
{ name => "is", lcid => 0x0000000f, oemcp => 850, maccp => 10079, ebcdiccp => 20871 },
{ name => "is-IS", lcid => 0x0000040f },
{ name => "it", lcid => 0x00000010, oemcp => 850, ebcdiccp => 20280 },
{ name => "it-CH", lcid => 0x00000810, ebcdiccp => 500, sabbrevlangname => "ITS" },
{ name => "it-IT", lcid => 0x00000410 },
{ name => "it-SM" },
{ name => "it-VA", oemcp => 65001 },
{ name => "iu", lcid => 0x0000005d, oemcp => 437, slist => ",", sortlocale => "iu-Latn-CA", dir => "seed", sabbrevlangname => "IUK", sopentypelang => "INU" },
{ name => "iu-Cans", lcid => 0x0000785d, file => "iu", oemcp => 65001, dir => "seed", sabbrevlangname => "IUS" },
{ name => "iu-Cans-CA", lcid => 0x0000045d, file => "iu_CA", dir => "seed" },
{ name => "iu-Latn", lcid => 0x00007c5d, dir => "seed" },
{ name => "iu-Latn-CA", lcid => 0x0000085d, dir => "seed" },
{ name => "ja", lcid => 0x00000011, ireadinglayout => 2, oemcp => 932, slist => ",", sscripts => "Hani Hira Jpan Kana", group => 7, sopentypelang => "JAN" },
{ name => "ja-JP", lcid => 0x00000411 },
{ name => "ja-JP_radstr", lcid => 0x00040411, alias => "ja-JP" },
{ name => "ja-JP-u-co-unihan", alias => "ja-JP_radstr" },
{ name => "jgo" },
{ name => "jgo-CM" },
{ name => "jmc" },
{ name => "jmc-TZ" },
{ name => "jv", oemcp => 850 },
{ name => "jv-ID", alias => "jv-Latn-ID" },
## name => "jv-Java" },
## name => "jv-Java-ID" },
{ name => "jv-Latn", file => "jv" },
{ name => "jv-Latn-ID", file => "jv_ID" },
{ name => "ka", lcid => 0x00000037, group => 16 },
{ name => "ka-GE", lcid => 0x00000437 },
{ name => "ka-GE_modern", lcid => 0x00010437, alias => "ka-GE" },
{ name => "kab", sopentypelang => "KAB0" },
{ name => "kab-DZ" },
{ name => "kam", sopentypelang => "KMB" },
{ name => "kam-KE" },
{ name => "kde" },
{ name => "kde-TZ" },
{ name => "kea" },
{ name => "kea-CV" },
{ name => "kgp" },
{ name => "kgp-BR" },
{ name => "khq" },
{ name => "khq-ML" },
{ name => "ki" },
{ name => "ki-KE" },
{ name => "kk", lcid => 0x0000003f, group => 5, sabbrevlangname => "KKZ" },
{ name => "kk-Cyrl", alias => "kk" },
{ name => "kk-Cyrl-KZ", alias => "kk-KZ" },
{ name => "kk-KZ", lcid => 0x0000043f },
{ name => "kkj" },
{ name => "kkj-CM" },
{ name => "kl", lcid => 0x0000006f, oemcp => 850, ebcdiccp => 20277, sopentypelang => "GRN" },
{ name => "kl-GL", lcid => 0x0000046f },
{ name => "kln", sopentypelang => "KAL" },
{ name => "kln-KE" },
{ name => "km", lcid => 0x00000053, inegnumber => 2, slist => ",", group => 15 },
{ name => "km-KH", lcid => 0x00000453 },
{ name => "kn", lcid => 0x0000004b, slist => ",", group => 15, sabbrevlangname => "KDI" },
{ name => "kn-IN", lcid => 0x0000044b },
{ name => "ko", lcid => 0x00000012, ireadinglayout => 2, slist => ",", oemcp => 949, ebcdiccp => 20833, sscripts => "Hang Hani Kore", group => 8 },
{ name => "ko-KP", oemcp => 65001 },
{ name => "ko-KR", lcid => 0x00000412 },
{ name => "kok", lcid => 0x00000057, slist => ",", group => 15, sabbrevlangname => "KNK" },
{ name => "kok-IN", lcid => 0x00000457 },
{ name => "kr", lcid => 0x00000071, sortlocale => "kr-Latn-NG", oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "KNR" },
{ name => "kr-Latn", file => "kr", dir => "exemplars" },
{ name => "kr-Latn-NG", lcid => 0x00000471, file => "kr", dir => "exemplars" },
{ name => "kr-NG", alias => "kr-Latn-NG" },
{ name => "ks", lcid => 0x00000060, group => 15, sabbrevlangname => "ZZZ", sopentypelang => "KSH" },
{ name => "ks-Arab", lcid => 0x00000460 },
{ name => "ks-Arab-IN" },
{ name => "ks-Deva", slist => "," },
{ name => "ks-Deva-IN", lcid => 0x00000860 },
{ name => "ks-IN", alias => "ks-Arab-IN" },
{ name => "ksb" },
{ name => "ksb-TZ" },
{ name => "ksf" },
{ name => "ksf-CM" },
{ name => "ksh", sopentypelang => "KSH0" },
{ name => "ksh-DE" },
{ name => "ku", lcid => 0x00000092, file => "ckb", slist => "\x{061b}", sortlocale => "ku-Arab-IQ", oemcp => 720 },
{ name => "ku-Arab", lcid => 0x00007c92, file => "ckb", group => 13 },
{ name => "ku-Arab-IQ", lcid => 0x00000492, file => "ckb_IQ" },
{ name => "ku-Arab-IR", file => "ckb_IR", oemcp => 65001 },
{ name => "kw" },
{ name => "kw-GB" },
{ name => "ky", lcid => 0x00000040, oemcp => 866, group => 5, sabbrevlangname => "KYR" },
{ name => "ky-Cyrl", alias => "ky" },
{ name => "ky-Cyrl-KG", alias => "ky-KG" },
{ name => "ky-KG", lcid => 0x00000440 },
{ name => "la", lcid => 0x00000076, oemcp => 437, slist => ",", dir => "seed", sabbrevlangname => "ZZZ" },
{ name => "la-001", lcid => 0x00000476, file => "la", dir => "seed" },
{ name => "lag" },
{ name => "lag-TZ" },
{ name => "lb", lcid => 0x0000006e, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "LBX" },
{ name => "lb-LU", lcid => 0x0000046e },
{ name => "lg" },
{ name => "lg-UG" },
{ name => "lkt" },
{ name => "lkt-US" },
{ name => "ln" },
{ name => "ln-AO" },
{ name => "ln-CD" },
{ name => "ln-CF" },
{ name => "ln-CG" },
{ name => "lo", lcid => 0x00000054, group => 15 },
{ name => "lo-LA", lcid => 0x00000454 },
{ name => "lrc" },
{ name => "lrc-IQ" },
{ name => "lrc-IR" },
{ name => "lt", lcid => 0x00000027, oemcp => 775, group => 3, sabbrevlangname => "LTH", sopentypelang => "LTH" },
{ name => "lt-LT", lcid => 0x00000427 },
{ name => "lu" },
{ name => "lu-CD" },
{ name => "luo" },
{ name => "luo-KE" },
{ name => "luy", sopentypelang => "LUH" },
{ name => "luy-KE" },
{ name => "lv", lcid => 0x00000026, oemcp => 775, group => 3, sabbrevlangname => "LVI", sopentypelang => "LVI" },
{ name => "lv-LV", lcid => 0x00000426 },
{ name => "mai" },
{ name => "mai-IN" },
{ name => "mas" },
{ name => "mas-KE" },
{ name => "mas-TZ" },
{ name => "mer" },
{ name => "mer-KE" },
{ name => "mfe" },
{ name => "mfe-MU" },
{ name => "mg" },
{ name => "mg-MG" },
{ name => "mgh" },
{ name => "mgh-MZ" },
{ name => "mgo" },
{ name => "mgo-CM" },
{ name => "mi", lcid => 0x00000081, slist => "," },
{ name => "mi-Latn", alias => "mi" },
{ name => "mi-Latn-NZ", alias => "mi-NZ" },
{ name => "mi-NZ", lcid => 0x00000481 },
{ name => "mk", lcid => 0x0000002f, oemcp => 866, ebcdiccp => 500, group => 5, sabbrevlangname => "MKI" },
{ name => "mk-MK", lcid => 0x0000042f },
{ name => "ml", lcid => 0x0000004c, group => 15, sabbrevlangname => "MYM", sopentypelang => "MLR" },
{ name => "ml-IN", lcid => 0x0000044c },
{ name => "mn", lcid => 0x00000050, oemcp => 866, sopentypelang => "MNG" },
{ name => "mn-Cyrl", lcid => 0x00007850, file => "mn", sabbrevlangname => "MNN" },
{ name => "mn-Cyrl-MN", alias => "mn-MN" },
{ name => "mn-MN", lcid => 0x00000450, sparent => "mn-Cyrl", group => 5 },
{ name => "mn-Mong", lcid => 0x00007c50, oemcp => 65001, slist => ",", group => 15, dir => "seed", sabbrevlangname => "MNG" },
{ name => "mn-Mong-CN", lcid => 0x00000850, dir => "seed" },
{ name => "mn-Mong-MN", lcid => 0x00000c50, dir => "seed", sabbrevlangname => "MNM" },
{ name => "mni", lcid => 0x00000058, slist => ",", sabbrevlangname => "ZZZ" },
{ name => "mni-IN", lcid => 0x00000458, file => "mni_Beng_IN" },
{ name => "moh", lcid => 0x0000007c, oemcp => 850, ebcdiccp => 37, slist => ",", dir => "seed", sabbrevlangname => "MWK" },
{ name => "moh-CA", lcid => 0x0000047c, dir => "seed" },
{ name => "moh-Latn", alias => "moh" },
{ name => "moh-Latn-CA", alias => "moh-CA" },
{ name => "mr", lcid => 0x0000004e, slist => ",", group => 15 },
{ name => "mr-IN", lcid => 0x0000044e },
{ name => "ms", lcid => 0x0000003e, oemcp => 850, sabbrevlangname => "MSL", sopentypelang => "MLY" },
{ name => "ms-BN", lcid => 0x0000083e, sabbrevlangname => "MSB" },
{ name => "ms-ID" },
{ name => "ms-Latn", alias => "ms" },
{ name => "ms-Latn-BN", alias => "ms-BN" },
{ name => "ms-Latn-MY", alias => "ms-MY" },
{ name => "ms-Latn-SG", alias => "ms-SG" },
{ name => "ms-MY", lcid => 0x0000043e },
{ name => "ms-SG" },
{ name => "mt", lcid => 0x0000003a, sopentypelang => "MTS" },
{ name => "mt-MT", lcid => 0x0000043a },
{ name => "mua" },
{ name => "mua-CM" },
{ name => "my", lcid => 0x00000055, sopentypelang => "BRM" },
{ name => "my-MM", lcid => 0x00000455 },
{ name => "mzn" },
{ name => "mzn-IR" },
{ name => "naq" },
{ name => "naq-NA" },
{ name => "nb", lcid => 0x00007c14, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NOR", sopentypelang => "NOR" },
{ name => "nb-NO", lcid => 0x00000414 },
{ name => "nb-SJ" },
{ name => "nd", sopentypelang => "NDB" },
{ name => "nd-ZW" },
{ name => "nds" },
{ name => "nds-DE" },
{ name => "nds-NL" },
{ name => "ne", lcid => 0x00000061, slist => "," },
{ name => "ne-IN", lcid => 0x00000861, sabbrevlangname => "NEI" },
{ name => "ne-NP", lcid => 0x00000461, group => 15 },
{ name => "nl", lcid => 0x00000013, oemcp => 850 },
{ name => "nl-AW" },
{ name => "nl-BE", lcid => 0x00000813, sabbrevlangname => "NLB" },
{ name => "nl-BQ" },
{ name => "nl-CW" },
{ name => "nl-NL", lcid => 0x00000413 },
{ name => "nl-SR" },
{ name => "nl-SX" },
{ name => "nmg" },
{ name => "nmg-CM" },
{ name => "nn", lcid => 0x00007814, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NON", sopentypelang => "NYN" },
{ name => "nn-NO", lcid => 0x00000814 },
{ name => "nnh" },
{ name => "nnh-CM" },
{ name => "no", lcid => 0x00000014, oemcp => 850, ebcdiccp => 20277, sortlocale => "nb-NO" },
{ name => "nqo", idigits => 3, inegnumber => 3, slist => "\x{060c}", dir => "seed", sopentypelang => "NKO" },
{ name => "nqo-GN", dir => "seed" },
{ name => "nr", dir => "seed", sopentypelang => "NDB" },
{ name => "nr-ZA", dir => "seed" },
{ name => "nso", lcid => 0x0000006c, oemcp => 850, dir => "seed", sopentypelang => "SOT" },
{ name => "nso-ZA", lcid => 0x0000046c, dir => "seed" },
{ name => "nus" },
{ name => "nus-SD", alias => "nus-SS" },
{ name => "nus-SS" },
{ name => "nyn", sopentypelang => "NKL" },
{ name => "nyn-UG" },
{ name => "oc", lcid => 0x00000082, oemcp => 850, ebcdiccp => 20297, dir => "seed" },
{ name => "oc-FR", lcid => 0x00000482, dir => "seed" },
{ name => "oc-Latn", alias => "oc" },
{ name => "oc-Latn-FR", alias => "oc-FR" },
{ name => "om", lcid => 0x00000072, sopentypelang => "ORO" },
{ name => "om-ET", lcid => 0x00000472 },
{ name => "om-KE" },
{ name => "or", lcid => 0x00000048, slist => ",", group => 15 },
{ name => "or-IN", lcid => 0x00000448 },
{ name => "os" },
{ name => "os-GE" },
{ name => "os-RU" },
{ name => "pa", lcid => 0x00000046, slist => "," },
{ name => "pa-Arab", lcid => 0x00007c46, slist => ";", inegnumber => 2, oemcp => 720, group => 13, sabbrevlangname => "PAP" },
{ name => "pa-Arab-PK", lcid => 0x00000846 },
{ name => "pa-Guru" },
{ name => "pa-Guru-IN", alias => "pa-IN" },
{ name => "pa-IN", lcid => 0x00000446, sparent => "pa-Guru", file => "pa_Guru_IN", group => 15 },
## name => "pap", lcid => 0x00000079 },
## name => "pap-029", lcid => 0x00000479 },
{ name => "pcm" },
{ name => "pcm-NG" },
{ name => "pl", lcid => 0x00000015, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "PLK", sopentypelang => "PLK" },
{ name => "pl-PL", lcid => 0x00000415 },
## name => "prg" },
## name => "prg-001" },
{ name => "prs", lcid => 0x0000008c, file => "fa", inegnumber => 3, oemcp => 720, group => 13, sopentypelang => "DRI" },
{ name => "prs-AF", lcid => 0x0000048c, file => "fa_AF" },
{ name => "prs-Arab", alias => "prs" },
{ name => "prs-Arab-AF", alias => "prs-AF" },
{ name => "ps", lcid => 0x00000063, group => 13, sabbrevlangname => "PAS", sopentypelang => "PAS" },
{ name => "ps-AF", lcid => 0x00000463 },
{ name => "ps-PK" },
{ name => "pt", lcid => 0x00000016, oemcp => 850, sabbrevlangname => "PTB", sopentypelang => "PTG" },
{ name => "pt-AO" },
{ name => "pt-BR", lcid => 0x00000416 },
{ name => "pt-CH", oemcp => 65001 },
{ name => "pt-CV" },
{ name => "pt-GQ", oemcp => 65001 },
{ name => "pt-GW" },
{ name => "pt-LU", oemcp => 65001 },
{ name => "pt-MO" },
{ name => "pt-MZ" },
{ name => "pt-PT", lcid => 0x00000816, sabbrevlangname => "PTG" },
{ name => "pt-ST" },
{ name => "pt-TL" },
## name => qps-Latn-x-sh", lcid => 0x80000901 },
## name => qps-ploc", lcid => 0x80000501 },
## name => qps-ploca", lcid => 0x800005fe },
## name => qps-plocm", lcid => 0x800009ff },
{ name => "qu", alias => "quz" },
{ name => "qu-BO", alias => "quz-BO" },
{ name => "qu-EC", alias => "quz-EC" },
{ name => "qu-PE", alias => "quz-PE" },
{ name => "quc", lcid => 0x00000086, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed" },
{ name => "quc-Latn", lcid => 0x00007c86, file => "quc", dir => "seed" },
{ name => "quc-Latn-GT", lcid => 0x00000486, file => "quc_GT", dir => "seed" },
{ name => "qut", alias => "quc" },
{ name => "qut-GT", alias => "quc-Latn-GT" },
{ name => "quz", lcid => 0x0000006b, file => "qu", territory => "BO", oemcp => 850, ebcdiccp => 20284, slist => "," },
{ name => "quz-BO", lcid => 0x0000046b, file => "qu_BO" },
{ name => "quz-EC", lcid => 0x0000086b, file => "qu_EC" },
{ name => "quz-Latn", alias => "quz" },
{ name => "quz-Latn-BO", alias => "quz-BO" },
{ name => "quz-Latn-EC", alias => "quz-EC" },
{ name => "quz-Latn-PE", alias => "quz-PE" },
{ name => "quz-PE", lcid => 0x00000c6b, file => "qu_PE" },
{ name => "rm", lcid => 0x00000017, oemcp => 850, ebcdiccp => 20273, sabbrevlangname => "RMC", sopentypelang => "RMS" },
{ name => "rm-CH", lcid => 0x00000417 },
{ name => "rn" },
{ name => "rn-BI" },
{ name => "ro", lcid => 0x00000018, oemcp => 852, ebcdiccp => 20880, sabbrevlangname => "ROM", sopentypelang => "ROM" },
{ name => "ro-MD", lcid => 0x00000818, maccp => 65001, sabbrevlangname => "ROD" },
{ name => "ro-RO", lcid => 0x00000418, group => 2 },
{ name => "rof" },
{ name => "rof-TZ" },
{ name => "ru", lcid => 0x00000019, oemcp => 866 },
{ name => "ru-BY", maccp => 65001 },
{ name => "ru-KG", maccp => 65001 },
{ name => "ru-KZ", maccp => 65001 },
{ name => "ru-MD", lcid => 0x00000819, maccp => 65001, sabbrevlangname => "RUM" },
{ name => "ru-RU", lcid => 0x00000419, group => 5 },
{ name => "ru-UA", maccp => 65001 },
{ name => "rw", lcid => 0x00000087, oemcp => 437, sopentypelang => "RUA" },
{ name => "rw-RW", lcid => 0x00000487 },
{ name => "rwk" },
{ name => "rwk-TZ" },
{ name => "sa", lcid => 0x0000004f, slist => ",", group => 15 },
{ name => "sa-Deva", alias => "sa" },
{ name => "sa-Deva-IN", alias => "sa-IN" },
{ name => "sa-IN", lcid => 0x0000044f },
{ name => "sah", lcid => 0x00000085, oemcp => 866, group => 5, sopentypelang => "YAK" },
{ name => "sah-Cyrl", alias => "sah" },
{ name => "sah-Cyrl-RU", alias => "sah-RU" },
{ name => "sah-RU", lcid => 0x00000485 },
{ name => "saq" },
{ name => "saq-KE" },
{ name => "sat" },
{ name => "sat-Olck" },
{ name => "sat-Olck-IN" },
{ name => "sbp" },
{ name => "sbp-TZ" },
{ name => "sc" },
{ name => "sc-IT" },
{ name => "sd", lcid => 0x00000059, inegnumber => 3, oemcp => 720, sabbrevlangname => "SIP" },
{ name => "sd-Arab", lcid => 0x00007c59, group => 13 },
{ name => "sd-Arab-PK", lcid => 0x00000859 },
{ name => "sd-Deva", inegnumber => 1, slist => ",", oemcp => 65001, group => 15 },
{ name => "sd-Deva-IN", lcid => 0x00000459, sabbrevlangname => "ZZZ" },
{ name => "sd-PK", alias => "sd-Arab-PK" },
{ name => "se", lcid => 0x0000003b, oemcp => 850, ebcdiccp => 20277, sopentypelang => "NSM" },
{ name => "se-FI", lcid => 0x00000c3b, ebcdiccp => 20278, sabbrevlangname => "SMG" },
{ name => "se-NO", lcid => 0x0000043b },
{ name => "se-SE", lcid => 0x0000083b, ebcdiccp => 20278, sabbrevlangname => "SMF" },
{ name => "se-Latn", alias => "se" },
{ name => "se-Latn-FI", alias => "se-FI" },
{ name => "se-Latn-NO", alias => "se-NO" },
{ name => "se-Latn-SE", alias => "se-SE" },
{ name => "seh" },
{ name => "seh-MZ" },
{ name => "ses" },
{ name => "ses-ML" },
{ name => "sg", sopentypelang => "SGO" },
{ name => "sg-CF" },
{ name => "shi" },
{ name => "shi-Latn" },
{ name => "shi-Latn-MA" },
{ name => "shi-Tfng" },
{ name => "shi-Tfng-MA" },
{ name => "si", lcid => 0x0000005b, group => 15, sopentypelang => "SNH" },
{ name => "si-LK", lcid => 0x0000045b },
{ name => "sk", lcid => 0x0000001b, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "SKY", sopentypelang => "SKY" },
{ name => "sk-SK", lcid => 0x0000041b },
{ name => "sl", lcid => 0x00000024, oemcp => 852, ebcdiccp => 20880, group => 2 },
{ name => "sl-SI", lcid => 0x00000424 },
{ name => "sma", lcid => 0x0000783b, sparent => "se", ebcdiccp => 20278, dir => "seed", sabbrevlangname => "SMB", sopentypelang => "SSM" },
{ name => "sma-Latn", alias => "sma" },
{ name => "sma-Latn-NO", alias => "sma-NO" },
{ name => "sma-Latn-SE", alias => "sma-SE" },
{ name => "sma-NO", lcid => 0x0000183b, ebcdiccp => 20277, dir => "seed", sabbrevlangname => "SMA" },
{ name => "sma-SE", lcid => 0x00001c3b, dir => "seed" },
{ name => "smj", lcid => 0x00007c3b, sparent => "se", ebcdiccp => 20278, dir => "seed", sabbrevlangname => "SMK", sopentypelang => "LSM" },
{ name => "smj-Latn", alias => "smj" },
{ name => "smj-Latn-NO", alias => "smj-NO" },
{ name => "smj-Latn-SE", alias => "smj-SE" },
{ name => "smj-NO", lcid => 0x0000103b, ebcdiccp => 20277, dir => "seed", sabbrevlangname => "SMJ" },
{ name => "smj-SE", lcid => 0x0000143b, dir => "seed" },
{ name => "smn", lcid => 0x0000703b, sparent => "se", ebcdiccp => 20278, sopentypelang => "ISM" },
{ name => "smn-FI", lcid => 0x0000243b },
{ name => "smn-Latn", alias => "smn" },
{ name => "smn-Latn-FI", alias => "smn-FI" },
{ name => "sms", lcid => 0x0000743b, sparent => "se", ebcdiccp => 20278, dir => "seed", sopentypelang => "SKS" },
{ name => "sms-FI", lcid => 0x0000203b, dir => "seed" },
{ name => "sms-Latn", alias => "sms" },
{ name => "sms-Latn-FI", alias => "sms-FI" },
{ name => "sn", sopentypelang => "SNA0" },
{ name => "sn-Latn", file => "sn" },
{ name => "sn-Latn-ZW", file => "sn_ZW" },
{ name => "sn-ZW", alias => "sn-Latn-ZW" },
{ name => "so", lcid => 0x00000077, sopentypelang => "SML" },
{ name => "so-DJ" },
{ name => "so-ET" },
{ name => "so-KE" },
{ name => "so-SO", lcid => 0x00000477 },
{ name => "sq", lcid => 0x0000001c, oemcp => 852, ebcdiccp => 20880, group => 2 },
{ name => "sq-AL", lcid => 0x0000041c },
{ name => "sq-MK" },
{ name => "sq-XK" },
{ name => "sr", lcid => 0x00007c1a, sortlocale => "sr-Latn-RS", oemcp => 852, group => 2, sabbrevlangname => "SRB", sopentypelang => "SRB" },
{ name => "sr-Cyrl", lcid => 0x00006c1a, oemcp => 855, ebcdiccp => 21025, group => 5, sabbrevlangname => "SRO" },
{ name => "sr-Cyrl-BA", lcid => 0x00001c1a, sabbrevlangname => "SRN" },
{ name => "sr-Cyrl-ME", lcid => 0x0000301a, sabbrevlangname => "SRQ" },
{ name => "sr-Cyrl-RS", lcid => 0x0000281a },
{ name => "sr-Cyrl-XK" },
{ name => "sr-Latn", lcid => 0x0000701a, sabbrevlangname => "SRM" },
{ name => "sr-Latn-BA", lcid => 0x0000181a, maccp => 10082, ebcdiccp => 870, sabbrevlangname => "SRS" },
{ name => "sr-Latn-ME", lcid => 0x00002c1a, sabbrevlangname => "SRP" },
{ name => "sr-Latn-RS", lcid => 0x0000241a, sabbrevlangname => "SRM" },
{ name => "sr-Latn-XK" },
## name => "sr-Cyrl-CS", lcid => 0x00000c1a },
## name => "sr-Latn-CS", lcid => 0x0000081a },
{ name => "ss", dir => "seed", sopentypelang => "SWZ" },
{ name => "ss-SZ", dir => "seed" },
{ name => "ss-ZA", dir => "seed" },
{ name => "ssy", dir => "seed" },
{ name => "ssy-ER", dir => "seed" },
{ name => "st", lcid => 0x00000030, dir => "seed" },
{ name => "st-LS", dir => "seed" },
{ name => "st-ZA", lcid => 0x00000430, dir => "seed" },
{ name => "su" },
{ name => "su-Latn" },
{ name => "su-Latn-ID" },
{ name => "sv", lcid => 0x0000001d, oemcp => 850, ebcdiccp => 20278, sabbrevlangname => "SVE", sopentypelang => "SVE" },
{ name => "sv-AX" },
{ name => "sv-FI", lcid => 0x0000081d, sabbrevlangname => "SVF" },
{ name => "sv-SE", lcid => 0x0000041d, sabbrevlangname => "SVE" },
{ name => "sw", lcid => 0x00000041, territory => "KE", oemcp => 437, ebcdiccp => 500, sabbrevlangname => "SWK", sopentypelang => "SWK" },
{ name => "sw-CD" },
{ name => "sw-KE", lcid => 0x00000441 },
{ name => "sw-TZ" },
{ name => "sw-UG" },
{ name => "swc-CD", alias => "sw-CD" },
{ name => "syr", lcid => 0x0000005a, slist => ",", group => 13, dir => "seed" },
{ name => "syr-SY", lcid => 0x0000045a, dir => "seed" },
{ name => "syr-Syrc", alias => "syr" },
{ name => "syr-Syrc-SY", alias => "syr-SY" },
{ name => "ta", lcid => 0x00000049, slist => ",", group => 15, sabbrevlangname => "TAI" },
{ name => "ta-IN", lcid => 0x00000449 },
{ name => "ta-LK", lcid => 0x00000849, sabbrevlangname => "TAM" },
{ name => "ta-MY" },
{ name => "ta-SG" },
{ name => "te", lcid => 0x0000004a, group => 15 },
{ name => "te-IN", lcid => 0x0000044a },
{ name => "teo" },
{ name => "teo-KE" },
{ name => "teo-UG" },
{ name => "tg", lcid => 0x00000028, oemcp => 866, group => 5, sabbrevlangname => "TAJ", sopentypelang => "TAJ" },
{ name => "tg-Cyrl", lcid => 0x00007c28, file => "tg" },
{ name => "tg-Cyrl-TJ", lcid => 0x00000428, file => "tg_TJ" },
{ name => "tg-TJ", alias => "tg-Cyrl-TJ" },
{ name => "th", lcid => 0x0000001e, oemcp => 874, ebcdiccp => 20838, slist => ",", group => 11 },
{ name => "th-TH", lcid => 0x0000041e },
{ name => "ti", lcid => 0x00000073, territory => "ER", sopentypelang => "TGY" },
{ name => "ti-ER", lcid => 0x00000873 },
{ name => "ti-ET", lcid => 0x00000473, sabbrevlangname => "TIE" },
{ name => "tig", dir => "seed", sopentypelang => "TGR" },
{ name => "tig-ER", dir => "seed" },
{ name => "tk", lcid => 0x00000042, oemcp => 852, ebcdiccp => 20880, group => 2, sopentypelang => "TKM" },
{ name => "tk-Latn", alias => "tk" },
{ name => "tk-Latn-TM", alias => "tk-TM" },
{ name => "tk-TM", lcid => 0x00000442 },
{ name => "tn", lcid => 0x00000032, oemcp => 850, dir => "seed", sopentypelang => "TNA" },
{ name => "tn-BW", lcid => 0x00000832, dir => "seed", sabbrevlangname => "TSB" },
{ name => "tn-ZA", lcid => 0x00000432, dir => "seed" },
{ name => "to", sopentypelang => "TGN" },
{ name => "to-TO" },
{ name => "tr", lcid => 0x0000001f, oemcp => 857, ebcdiccp => 20905, group => 6, sabbrevlangname => "TRK", sopentypelang => "TRK" },
{ name => "tr-CY" },
{ name => "tr-TR", lcid => 0x0000041f },
{ name => "ts", lcid => 0x00000031, dir => "seed", sopentypelang => "TSG" },
{ name => "ts-ZA", lcid => 0x00000431, dir => "seed" },
{ name => "tt", lcid => 0x00000044, oemcp => 866, group => 5, sabbrevlangname => "TTT" },
{ name => "tt-Cyrl", alias => "tt" },
{ name => "tt-Cyrl-RU", alias => "tt-RU" },
{ name => "tt-RU", lcid => 0x00000444 },
{ name => "twq" },
{ name => "twq-NE" },
{ name => "tzm", lcid => 0x0000005f, sortlocale => "tzm-Latn-DZ", oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "TZA" },
{ name => "tzm-Latn", lcid => 0x00007c5f, territory => "DZ", file => "tzm" },
{ name => "tzm-Latn-MA", file => "tzm_MA", oemcp => 65001 },
{ name => "tzm-Latn-DZ", lcid => 0x0000085f, file => "tzm" },
{ name => "tzm-MA", alias => "tzm-Latn-MA" },
## name => "tzm-Arab", group => 13 },
## name => "tzm-Arab-MA", lcid => 0x0000045f },
## name => "tzm-Tfng", lcid => 0x0000785f },
## name => "tzm-Tfng-MA", lcid => 0x0000105f },
{ name => "ug", lcid => 0x00000080, oemcp => 720, slist => ",", group => 13, sopentypelang => "UYG" },
{ name => "ug-Arab", alias => "ug" },
{ name => "ug-Arab-CN", alias => "ug-CN" },
{ name => "ug-CN", lcid => 0x00000480 },
{ name => "uk", lcid => 0x00000022, oemcp => 866, maccp => 10017, ebcdiccp => 500, group => 5 },
{ name => "uk-UA", lcid => 0x00000422 },
{ name => "ur", lcid => 0x00000020, oemcp => 720 },
{ name => "ur-IN", lcid => 0x00000820, maccp => 65001, sabbrevlangname => "URI" },
{ name => "ur-PK", lcid => 0x00000420, group => 13 },
{ name => "uz", lcid => 0x00000043, oemcp => 857, maccp => 10029, group => 2 },
{ name => "uz-Arab", oemcp => 65001, maccp => 65001 },
{ name => "uz-Arab-AF" },
{ name => "uz-Cyrl", lcid => 0x00007843, oemcp => 866, maccp => 10007, group => 5, sabbrevlangname => "UZC" },
{ name => "uz-Cyrl-UZ", lcid => 0x00000843 },
{ name => "uz-Latn", lcid => 0x00007c43 },
{ name => "uz-Latn-UZ", lcid => 0x00000443 },
{ name => "vai" },
{ name => "vai-Latn" },
{ name => "vai-Latn-LR" },
{ name => "vai-Vaii" },
{ name => "vai-Vaii-LR" },
{ name => "ve", lcid => 0x00000033, dir => "seed", sabbrevlangname => "ZZZ" },
{ name => "ve-ZA", lcid => 0x00000433, dir => "seed" },
{ name => "vi", lcid => 0x0000002a, oemcp => 1258, slist => ",", group => 14, sabbrevlangname => "VIT", sopentypelang => "VIT" },
{ name => "vi-VN", lcid => 0x0000042a },
{ name => "vo", dir => "seed" },
{ name => "vo-001", dir => "seed" },
{ name => "vun" },
{ name => "vun-TZ" },
{ name => "wae" },
{ name => "wae-CH" },
{ name => "wal", dir => "seed" },
{ name => "wal-ET", dir => "seed" },
{ name => "wo", lcid => 0x00000088, oemcp => 850, ebcdiccp => 20297, sopentypelang => "WLF" },
{ name => "wo-Latn", alias => "wo" },
{ name => "wo-Latn-SN", alias => "wo-SN" },
{ name => "wo-SN", lcid => 0x00000488 },
{ name => "x-IV_mathan", lcid => 0x0001007f, alias => "" },
{ name => "xh", lcid => 0x00000034, oemcp => 850, sopentypelang => "XHS" },
{ name => "xh-ZA", lcid => 0x00000434 },
{ name => "xog" },
{ name => "xog-UG" },
{ name => "yav" },
{ name => "yav-CM" },
{ name => "yi", lcid => 0x0000003d, sabbrevlangname => "ZZZ", sopentypelang => "JII" },
{ name => "yi-001", lcid => 0x0000043d },
{ name => "yo", lcid => 0x0000006a, oemcp => 437, sopentypelang => "YBA" },
{ name => "yo-BJ", ebcdiccp => 500 },
{ name => "yo-Latn", alias => "yo" },
{ name => "yo-Latn-NG", alias => "yo-NG" },
{ name => "yo-NG", lcid => 0x0000046a },
{ name => "yrl" },
{ name => "yrl-BR" },
{ name => "yrl-CO" },
{ name => "yrl-VE" },
{ name => "yue" },
{ name => "yue-Hans" },
{ name => "yue-Hans-CN" },
{ name => "yue-Hant" },
{ name => "yue-Hant-HK" },
{ name => "zgh" },
{ name => "zgh-MA", alias => "zgh-Tfng-MA" },
{ name => "zgh-Tfng", file => "zgh" },
{ name => "zgh-Tfng-MA", file => "zgh_MA" },
{ name => "zh", lcid => 0x00007804, ireadinglayout => 2, oemcp => 936, slist => ",", sscripts => "Hani Hans", sabbrevlangname => "CHS", sopentypelang => "ZHS" },
{ name => "zh-CN", lcid => 0x00000804, file => "zh_Hans_CN", sparent => "zh-Hans" },
{ name => "zh-CN_phoneb", lcid => 0x00050804, alias => "zh-CN" },
{ name => "zh-CN_stroke", lcid => 0x00020804, alias => "zh-CN" },
{ name => "zh-Hans", lcid => 0x00000004, group => 10 },
{ name => "zh-Hans-CN", alias => "zh-CN" },
{ name => "zh-Hans-CN-u-co-phonebk", alias => "zh-CN_phoneb" },
{ name => "zh-Hans-CN-u-co-stroke", alias => "zh-CN_stroke" },
{ name => "zh-Hans-HK", slist => ";" },
{ name => "zh-Hans-MO", slist => ";" },
{ name => "zh-Hans-SG", alias => "zh-SG" },
{ name => "zh-Hans-SG-u-co-phonebk", alias => "zh-SG_phoneb" },
{ name => "zh-Hans-SG-u-co-stroke", alias => "zh-SG_stroke" },
{ name => "zh-Hant", lcid => 0x00007c04, sortlocale => "zh-HK", ireadinglayout => 2, oemcp => 950, slist => ",", sscripts => "Hani Hant", group => 9, sabbrevlangname => "CHT", sopentypelang => "ZHH" },
{ name => "zh-Hant-HK", alias => "zh-HK" },
{ name => "zh-Hant-HK-u-co-unihan", alias => "zh-HK_radstr" },
{ name => "zh-Hant-MO", alias => "zh-MO" },
{ name => "zh-Hant-MO-u-co-stroke", alias => "zh-MO_stroke" },
{ name => "zh-Hant-MO-u-co-unihan", alias => "zh-MO_radstr" },
{ name => "zh-Hant-TW", alias => "zh-TW" },
{ name => "zh-Hant-TW-u-co-phonetic", alias => "zh-TW_pronun" },
{ name => "zh-Hant-TW-u-co-unihan", alias => "zh-TW_radstr" },
{ name => "zh-HK", lcid => 0x00000c04, file => "zh_Hant_HK", sparent => "zh-Hant", sabbrevlangname => "ZHH" },
{ name => "zh-HK_radstr", lcid => 0x00040c04, alias => "zh-HK" },
{ name => "zh-MO", lcid => 0x00001404, file => "zh_Hant_MO", sparent => "zh-Hant", sabbrevlangname => "ZHM", sopentypelang => "ZHT" },
{ name => "zh-MO_radstr", lcid => 0x00041404, alias => "zh-MO" },
{ name => "zh-MO_stroke", lcid => 0x00021404, alias => "zh-MO" },
{ name => "zh-SG", lcid => 0x00001004, file => "zh_Hans_SG", sparent => "zh-Hans", sabbrevlangname => "ZHI" },
{ name => "zh-SG_phoneb", lcid => 0x00051004, alias => "zh-SG" },
{ name => "zh-SG_stroke", lcid => 0x00021004, alias => "zh-SG" },
{ name => "zh-TW", lcid => 0x00000404, file => "zh_Hant_TW", sparent => "zh-Hant", sopentypelang => "ZHT" },
{ name => "zh-TW_pronun", lcid => 0x00030404, alias => "zh-TW" },
{ name => "zh-TW_radstr", lcid => 0x00040404, alias => "zh-TW" },
{ name => "zu", lcid => 0x00000035, oemcp => 850 },
{ name => "zu-ZA", lcid => 0x00000435 },
);
my @calendars =
(
{ id => 1, name => "Gregorian", itwodigityearmax => 2049 },
{ id => 2, type => "gregorian", locale => "en-US", itwodigityearmax => 2049 },
{ id => 3, type => "japanese", locale => "ja-JP", eras => [ 232..236 ] },
{ id => 4, type => "roc", locale => "zh-TW", eras => [ 1 ] },
{ id => 5, type => "dangi", locale => "ko-KR", eras => [ 0 ] },
{ id => 6, type => "islamic", locale => "ar-SA", itwodigityearmax => 1451 },
{ id => 7, type => "buddhist", locale => "th-TH", eras => [ 0 ] },
{ id => 8, type => "hebrew", locale => "he-IL", itwodigityearmax => 5810 },
{ id => 9, type => "gregorian", locale => "fr-FR", itwodigityearmax => 2049 },
{ id => 10, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
{ id => 11, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
{ id => 12, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
{ id => 13, name => "Julian", locale => "en-US", itwodigityearmax => 2049 },
{ id => 14, name => "Japanese Lunisolar" },
{ id => 15, name => "Chinese Lunisolar" },
{ id => 16, name => "Saka" },
{ id => 17, name => "Lunar ETO Chinese" },
{ id => 18, name => "Lunar ETO Korean" },
{ id => 19, name => "Lunar ETO Rokuyou" },
{ id => 20, name => "Korean Lunisolar" },
{ id => 21, name => "Taiwan Lunisolar" },
{ id => 22, type => "persian", locale => "prs-AF", itwodigityearmax => 1429 },
{ id => 23, type => "islamic-umalqura", locale => "ar-SA", itwodigityearmax => 1451 },
);
my @geoids =
(
{ id => 2, name => "AG" }, # Antigua and Barbuda
{ id => 3, name => "AF" }, # Afghanistan
{ id => 4, name => "DZ" }, # Algeria
{ id => 5, name => "AZ" }, # Azerbaijan
{ id => 6, name => "AL" }, # Albania
{ id => 7, name => "AM" }, # Armenia
{ id => 8, name => "AD" }, # Andorra
{ id => 9, name => "AO" }, # Angola
{ id => 10, name => "AS" }, # American Samoa
{ id => 11, name => "AR" }, # Argentina
{ id => 12, name => "AU" }, # Australia
{ id => 14, name => "AT" }, # Austria
{ id => 17, name => "BH" }, # Bahrain
{ id => 18, name => "BB" }, # Barbados
{ id => 19, name => "BW" }, # Botswana
{ id => 20, name => "BM" }, # Bermuda
{ id => 21, name => "BE" }, # Belgium
{ id => 22, name => "BS" }, # Bahamas, The
{ id => 23, name => "BD" }, # Bangladesh
{ id => 24, name => "BZ" }, # Belize
{ id => 25, name => "BA" }, # Bosnia and Herzegovina
{ id => 26, name => "BO" }, # Bolivia
{ id => 27, name => "MM" }, # Myanmar
{ id => 28, name => "BJ" }, # Benin
{ id => 29, name => "BY" }, # Belarus
{ id => 30, name => "SB" }, # Solomon Islands
{ id => 32, name => "BR" }, # Brazil
{ id => 34, name => "BT" }, # Bhutan
{ id => 35, name => "BG" }, # Bulgaria
{ id => 37, name => "BN" }, # Brunei
{ id => 38, name => "BI" }, # Burundi
{ id => 39, name => "CA" }, # Canada
{ id => 40, name => "KH" }, # Cambodia
{ id => 41, name => "TD" }, # Chad
{ id => 42, name => "LK" }, # Sri Lanka
{ id => 43, name => "CG" }, # Congo
{ id => 44, name => "CD" }, # Congo (DRC)
{ id => 45, name => "CN" }, # China
{ id => 46, name => "CL" }, # Chile
{ id => 49, name => "CM" }, # Cameroon
{ id => 50, name => "KM" }, # Comoros
{ id => 51, name => "CO" }, # Colombia
{ id => 54, name => "CR" }, # Costa Rica
{ id => 55, name => "CF" }, # Central African Republic
{ id => 56, name => "CU" }, # Cuba
{ id => 57, name => "CV" }, # Cape Verde
{ id => 59, name => "CY" }, # Cyprus
{ id => 61, name => "DK" }, # Denmark
{ id => 62, name => "DJ" }, # Djibouti
{ id => 63, name => "DM" }, # Dominica
{ id => 65, name => "DO" }, # Dominican Republic
{ id => 66, name => "EC" }, # Ecuador
{ id => 67, name => "EG" }, # Egypt
{ id => 68, name => "IE" }, # Ireland
{ id => 69, name => "GQ" }, # Equatorial Guinea
{ id => 70, name => "EE" }, # Estonia
{ id => 71, name => "ER" }, # Eritrea
{ id => 72, name => "SV" }, # El Salvador
{ id => 73, name => "ET" }, # Ethiopia
{ id => 75, name => "CZ" }, # Czech Republic
{ id => 77, name => "FI" }, # Finland
{ id => 78, name => "FJ" }, # Fiji Islands
{ id => 80, name => "FM" }, # Micronesia
{ id => 81, name => "FO" }, # Faroe Islands
{ id => 84, name => "FR" }, # France
{ id => 86, name => "GM" }, # Gambia, The
{ id => 87, name => "GA" }, # Gabon
{ id => 88, name => "GE" }, # Georgia
{ id => 89, name => "GH" }, # Ghana
{ id => 90, name => "GI" }, # Gibraltar
{ id => 91, name => "GD" }, # Grenada
{ id => 93, name => "GL" }, # Greenland
{ id => 94, name => "DE" }, # Germany
{ id => 98, name => "GR" }, # Greece
{ id => 99, name => "GT" }, # Guatemala
{ id => 100, name => "GN" }, # Guinea
{ id => 101, name => "GY" }, # Guyana
{ id => 103, name => "HT" }, # Haiti
{ id => 104, name => "HK" }, # Hong Kong S.A.R.
{ id => 106, name => "HN" }, # Honduras
{ id => 108, name => "HR" }, # Croatia
{ id => 109, name => "HU" }, # Hungary
{ id => 110, name => "IS" }, # Iceland
{ id => 111, name => "ID" }, # Indonesia
{ id => 113, name => "IN" }, # India
{ id => 114, name => "IO" }, # British Indian Ocean Territory
{ id => 116, name => "IR" }, # Iran
{ id => 117, name => "IL" }, # Israel
{ id => 118, name => "IT" }, # Italy
{ id => 119, name => "CI" }, # Côte d'Ivoire
{ id => 121, name => "IQ" }, # Iraq
{ id => 122, name => "JP" }, # Japan
{ id => 124, name => "JM" }, # Jamaica
{ id => 125, name => "SJ" }, # Jan Mayen
{ id => 126, name => "JO" }, # Jordan
{ id => 127, parent => "UM" }, # Johnston Atoll
{ id => 129, name => "KE" }, # Kenya
{ id => 130, name => "KG" }, # Kyrgyzstan
{ id => 131, name => "KP" }, # North Korea
{ id => 133, name => "KI" }, # Kiribati
{ id => 134, name => "KR" }, # Korea
{ id => 136, name => "KW" }, # Kuwait
{ id => 137, name => "KZ" }, # Kazakhstan
{ id => 138, name => "LA" }, # Laos
{ id => 139, name => "LB" }, # Lebanon
{ id => 140, name => "LV" }, # Latvia
{ id => 141, name => "LT" }, # Lithuania
{ id => 142, name => "LR" }, # Liberia
{ id => 143, name => "SK" }, # Slovakia
{ id => 145, name => "LI" }, # Liechtenstein
{ id => 146, name => "LS" }, # Lesotho
{ id => 147, name => "LU" }, # Luxembourg
{ id => 148, name => "LY" }, # Libya
{ id => 149, name => "MG" }, # Madagascar
{ id => 151, name => "MO" }, # Macao S.A.R.
{ id => 152, name => "MD" }, # Moldova
{ id => 154, name => "MN" }, # Mongolia
{ id => 156, name => "MW" }, # Malawi
{ id => 157, name => "ML" }, # Mali
{ id => 158, name => "MC" }, # Monaco
{ id => 159, name => "MA" }, # Morocco
{ id => 160, name => "MU" }, # Mauritius
{ id => 162, name => "MR" }, # Mauritania
{ id => 163, name => "MT" }, # Malta
{ id => 164, name => "OM" }, # Oman
{ id => 165, name => "MV" }, # Maldives
{ id => 166, name => "MX" }, # Mexico
{ id => 167, name => "MY" }, # Malaysia
{ id => 168, name => "MZ" }, # Mozambique
{ id => 173, name => "NE" }, # Niger
{ id => 174, name => "VU" }, # Vanuatu
{ id => 175, name => "NG" }, # Nigeria
{ id => 176, name => "NL" }, # Netherlands
{ id => 177, name => "NO" }, # Norway
{ id => 178, name => "NP" }, # Nepal
{ id => 180, name => "NR" }, # Nauru
{ id => 181, name => "SR" }, # Suriname
{ id => 182, name => "NI" }, # Nicaragua
{ id => 183, name => "NZ" }, # New Zealand
{ id => 184, name => "PS" }, # Palestinian Authority
{ id => 185, name => "PY" }, # Paraguay
{ id => 187, name => "PE" }, # Peru
{ id => 190, name => "PK" }, # Pakistan
{ id => 191, name => "PL" }, # Poland
{ id => 192, name => "PA" }, # Panama
{ id => 193, name => "PT" }, # Portugal
{ id => 194, name => "PG" }, # Papua New Guinea
{ id => 195, name => "PW" }, # Palau
{ id => 196, name => "GW" }, # Guinea-Bissau
{ id => 197, name => "QA" }, # Qatar
{ id => 198, name => "RE" }, # Reunion
{ id => 199, name => "MH" }, # Marshall Islands
{ id => 200, name => "RO" }, # Romania
{ id => 201, name => "PH" }, # Philippines
{ id => 202, name => "PR" }, # Puerto Rico
{ id => 203, name => "RU" }, # Russia
{ id => 204, name => "RW" }, # Rwanda
{ id => 205, name => "SA" }, # Saudi Arabia
{ id => 206, name => "PM" }, # St. Pierre and Miquelon
{ id => 207, name => "KN" }, # St. Kitts and Nevis
{ id => 208, name => "SC" }, # Seychelles
{ id => 209, name => "ZA" }, # South Africa
{ id => 210, name => "SN" }, # Senegal
{ id => 212, name => "SI" }, # Slovenia
{ id => 213, name => "SL" }, # Sierra Leone
{ id => 214, name => "SM" }, # San Marino
{ id => 215, name => "SG" }, # Singapore
{ id => 216, name => "SO" }, # Somalia
{ id => 217, name => "ES" }, # Spain
{ id => 218, name => "LC" }, # St. Lucia
{ id => 219, name => "SD" }, # Sudan
{ id => 220, name => "SJ" }, # Svalbard
{ id => 221, name => "SE" }, # Sweden
{ id => 222, name => "SY" }, # Syria
{ id => 223, name => "CH" }, # Switzerland
{ id => 224, name => "AE" }, # United Arab Emirates
{ id => 225, name => "TT" }, # Trinidad and Tobago
{ id => 227, name => "TH" }, # Thailand
{ id => 228, name => "TJ" }, # Tajikistan
{ id => 231, name => "TO" }, # Tonga
{ id => 232, name => "TG" }, # Togo
{ id => 233, name => "ST" }, # São Tomé and Príncipe
{ id => 234, name => "TN" }, # Tunisia
{ id => 235, name => "TR" }, # Turkey
{ id => 236, name => "TV" }, # Tuvalu
{ id => 237, name => "TW" }, # Taiwan
{ id => 238, name => "TM" }, # Turkmenistan
{ id => 239, name => "TZ" }, # Tanzania
{ id => 240, name => "UG" }, # Uganda
{ id => 241, name => "UA" }, # Ukraine
{ id => 242, name => "GB" }, # United Kingdom
{ id => 244, name => "US" }, # United States
{ id => 245, name => "BF" }, # Burkina Faso
{ id => 246, name => "UY" }, # Uruguay
{ id => 247, name => "UZ" }, # Uzbekistan
{ id => 248, name => "VC" }, # St. Vincent and the Grenadines
{ id => 249, name => "VE" }, # Bolivarian Republic of Venezuela
{ id => 251, name => "VN" }, # Vietnam
{ id => 252, name => "VI" }, # Virgin Islands
{ id => 253, name => "VA" }, # Vatican City
{ id => 254, name => "NA" }, # Namibia
{ id => 257, name => "EH" }, # Western Sahara (disputed)
{ id => 258, parent => "UM" }, # Wake Island
{ id => 259, name => "WS" }, # Samoa
{ id => 260, name => "SZ" }, # Swaziland
{ id => 261, name => "YE" }, # Yemen
{ id => 263, name => "ZM" }, # Zambia
{ id => 264, name => "ZW" }, # Zimbabwe
{ id => 269, name => "CS" }, # Serbia and Montenegro (Former)
{ id => 270, name => "ME" }, # Montenegro
{ id => 271, name => "RS" }, # Serbia
{ id => 273, name => "CW" }, # Curaçao
{ id => 276, name => "SS" }, # South Sudan
{ id => 300, name => "AI" }, # Anguilla
{ id => 301, name => "AQ" }, # Antarctica
{ id => 302, name => "AW" }, # Aruba
{ id => 303, parent => "SH" }, # Ascension Island
{ id => 304, parent => "053" }, # Ashmore and Cartier Islands
{ id => 305, parent => "UM" }, # Baker Island
{ id => 306, name => "BV" }, # Bouvet Island
{ id => 307, name => "KY" }, # Cayman Islands
{ id => 308, name => "830", parent => "155" }, # Channel Islands
{ id => 309, name => "CX" }, # Christmas Island
{ id => 310, parent => "009" }, # Clipperton Island
{ id => 311, name => "CC" }, # Cocos (Keeling) Islands
{ id => 312, name => "CK" }, # Cook Islands
{ id => 313, parent => "053" }, # Coral Sea Islands
{ id => 314, parent => "IO" }, # Diego Garcia
{ id => 315, name => "FK" }, # Falkland Islands (Islas Malvinas)
{ id => 317, name => "GF" }, # French Guiana
{ id => 318, name => "PF" }, # French Polynesia
{ id => 319, name => "TF" }, # French Southern and Antarctic Lands
{ id => 321, name => "GP" }, # Guadeloupe
{ id => 322, name => "GU" }, # Guam
{ id => 323 }, # Guantanamo Bay
{ id => 324, name => "GG" }, # Guernsey
{ id => 325, name => "HM" }, # Heard Island and McDonald Islands
{ id => 326, parent => "UM" }, # Howland Island
{ id => 327, parent => "UM" }, # Jarvis Island
{ id => 328, name => "JE" }, # Jersey
{ id => 329, parent => "UM" }, # Kingman Reef
{ id => 330, name => "MQ" }, # Martinique
{ id => 331, name => "YT" }, # Mayotte
{ id => 332, name => "MS" }, # Montserrat
{ id => 333, name => "AN", region => 1 }, # Netherlands Antilles (Former)
{ id => 334, name => "NC" }, # New Caledonia
{ id => 335, name => "NU" }, # Niue
{ id => 336, name => "NF" }, # Norfolk Island
{ id => 337, name => "MP" }, # Northern Mariana Islands
{ id => 338, parent => "UM" }, # Palmyra Atoll
{ id => 339, name => "PN" }, # Pitcairn Islands
{ id => 340, parent => "MP" }, # Rota Island
{ id => 341, parent => "MP" }, # Saipan
{ id => 342, name => "GS" }, # South Georgia and the South Sandwich Islands
{ id => 343, name => "SH" }, # St. Helena
{ id => 346, parent => "MP" }, # Tinian Island
{ id => 347, name => "TK" }, # Tokelau
{ id => 348, parent => "SH" }, # Tristan da Cunha
{ id => 349, name => "TC" }, # Turks and Caicos Islands
{ id => 351, name => "VG" }, # Virgin Islands, British
{ id => 352, name => "WF" }, # Wallis and Futuna
{ id => 742, name => "002" }, # Africa
{ id => 2129, name => "142" }, # Asia
{ id => 10541, name => "150" }, # Europe
{ id => 15126, name => "IM" }, # Man, Isle of
{ id => 19618, name => "MK" }, # Macedonia, Former Yugoslav Republic of
{ id => 20900, name => "054" }, # Melanesia
{ id => 21206, name => "057" }, # Micronesia
{ id => 21242, parent => "UM" }, # Midway Islands
{ id => 23581, name => "021" }, # Northern America
{ id => 26286, name => "061" }, # Polynesia
{ id => 27082, name => "013" }, # Central America
{ id => 27114, name => "009" }, # Oceania
{ id => 30967, name => "SX" }, # Sint Maarten (Dutch part)
{ id => 31396, name => "005" }, # South America
{ id => 31706, name => "MF" }, # Saint Martin (French part)
{ id => 39070, name => "001" }, # World
{ id => 42483, name => "011" }, # Western Africa
{ id => 42484, name => "017" }, # Middle Africa
{ id => 42487, name => "015" }, # Northern Africa
{ id => 47590, name => "143" }, # Central Asia
{ id => 47599, name => "035" }, # South-Eastern Asia
{ id => 47600, name => "030" }, # Eastern Asia
{ id => 47603, name => "014" }, # Eastern Africa
{ id => 47609, name => "151" }, # Eastern Europe
{ id => 47610, name => "039" }, # Southern Europe
{ id => 47611, name => "145" }, # Middle East
{ id => 47614, name => "034" }, # Southern Asia
{ id => 7299303, name => "TL" }, # Democratic Republic of Timor-Leste
{ id => 9914689, name => "XK" }, # Kosovo
{ id => 10026358, name => "019" }, # Americas
{ id => 10028789, name => "AX" }, # Åland Islands
{ id => 10039880, name => "029", sintlsymbol => "XCD" }, # Caribbean
{ id => 10039882, name => "154" }, # Northern Europe
{ id => 10039883, name => "018" }, # Southern Africa
{ id => 10210824, name => "155" }, # Western Europe
{ id => 10210825, name => "053" }, # Australia and New Zealand
{ id => 161832015, name => "BL" }, # Saint Barthélemy
{ id => 161832256, name => "UM" }, # U.S. Minor Outlying Islands
{ id => 161832257, name => "419", parent => "019" }, # Latin America and the Caribbean
{ id => 161832258, name => "BQ" }, # Bonaire, Sint Eustatius and Saba
);
my @cp2uni = ();
my @glyph2uni = ();
my @lead_bytes = ();
my @uni2cp = ();
my @tolower_table = ();
my @toupper_table = ();
my @digitmap_table = ();
my @halfwidth_table = ();
my @fullwidth_table = ();
my @cjk_compat_table = ();
my @chinese_traditional_table = ();
my @chinese_simplified_table = ();
my @category_table = ();
my @initial_joining_table = ();
my @direction_table = ();
my @decomp_table = ();
my @combining_class_table = ();
my @decomp_compat_table = ();
my @comp_exclusions = ();
my @idna_decomp_table = ();
my @idna_disallowed = ();
my %registry_keys;
my $default_char;
my $default_wchar;
my %joining_forms =
(
"isolated" => [],
"final" => [],
"initial" => [],
"medial" => []
);
sub to_utf16(@)
{
my @ret;
foreach my $ch (@_)
{
if ($ch < 0x10000)
{
push @ret, $ch;
}
else
{
my $val = $ch - 0x10000;
push @ret, 0xd800 | ($val >> 10), 0xdc00 | ($val & 0x3ff);
}
}
return @ret;
}
################################################################
# fetch a unicode.org file and open it
sub open_data_file($$)
{
my ($base, $name) = @_;
my $cache = ($ENV{XDG_CACHE_HOME} || "$ENV{HOME}/.cache") . "/wine";
(my $dir = "$cache/$name") =~ s/\/[^\/]+$//;
my $suffix = ($base =~ /\/\Q$UNIVERSION\E/) ? "-$UNIVERSION" : "";
local *FILE;
if ($base =~ /.*\/([^\/]+)\.zip$/)
{
my $zip = "$1$suffix.zip";
unless (-f "$cache/$zip")
{
system "mkdir", "-p", $cache;
print "Fetching $base...\n";
!system "wget", "-q", "-O", "$cache/$zip", $base or die "cannot fetch $base";
}
open FILE, "-|", "unzip", "-p", "$cache/$zip", $name or die "cannot extract $name from $zip";
}
else
{
(my $dest = "$cache/$name") =~ s/(.*)(\.[^\/.]+)$/$1$suffix$2/;
unless (-f $dest)
{
system "mkdir", "-p", $dir;
print "Fetching $base/$name...\n";
!system "wget", "-q", "-O", $dest, "$base/$name" or die "cannot fetch $base/$name";
}
open FILE, "<$dest" or die "cannot open $dest";
}
return *FILE;
}
################################################################
# load a unicode.org file as XML data
sub load_xml_data_file($$)
{
my ($base, $name) = @_;
my $FILE = open_data_file( $base, $name );
my $xml = XML::LibXML->load_xml( IO => $FILE );
close FILE;
return $xml;
}
################################################################
# recursively get the decomposition for a character
sub get_decomposition($$);
sub get_decomposition($$)
{
my ($char, $table) = @_;
my @ret;
return $char unless defined $table->[$char];
foreach my $ch (@{$table->[$char]})
{
push @ret, get_decomposition( $ch, $table );
}
return @ret;
}
################################################################
# get the composition that results in a given character
sub get_composition($$)
{
my ($ch, $compat) = @_;
return () unless defined $decomp_table[$ch]; # no decomposition
my @ret = @{$decomp_table[$ch]};
return () if @ret < 2; # singleton decomposition
return () if $comp_exclusions[$ch]; # composition exclusion
return () if $combining_class_table[$ch]; # non-starter
return () if $combining_class_table[$ret[0]]; # first char is non-starter
return () if $compat == 1 && !defined $decomp_table[$ret[0]] &&
defined $decomp_compat_table[$ret[0]]; # first char has compat decomposition
return () if $compat == 2 && !defined $decomp_table[$ret[0]] &&
defined $idna_decomp_table[$ret[0]]; # first char has IDNA decomposition
return () if $compat == 2 && defined $idna_decomp_table[$ret[0]] &&
defined $idna_decomp_table[$idna_decomp_table[$ret[0]]->[0]]; # first char's decomposition has IDNA decomposition
return () if $compat == 2 && defined $idna_decomp_table[$ret[1]]; # second char has IDNA decomposition
return @ret;
}
################################################################
# recursively build decompositions
sub build_decompositions(@)
{
my @src = @_;
my @dst;
for (my $i = 0; $i < @src; $i++)
{
next unless defined $src[$i];
my @decomp = to_utf16( get_decomposition( $i, \@src ));
$dst[$i] = \@decomp;
}
return @dst;
}
################################################################
# compose Hangul sequences
sub compose_hangul(@)
{
my $SBASE = 0xac00;
my $LBASE = 0x1100;
my $VBASE = 0x1161;
my $TBASE = 0x11a7;
my $LCOUNT = 19;
my $VCOUNT = 21;
my $TCOUNT = 28;
my $NCOUNT = $VCOUNT * $TCOUNT;
my $SCOUNT = $LCOUNT * $NCOUNT;
my @seq = @_;
my @ret;
my $i;
for ($i = 0; $i < @seq; $i++)
{
my $ch = $seq[$i];
if ($ch >= $LBASE && $ch < $LBASE + $LCOUNT && $i < @seq - 1 &&
$seq[$i+1] >= $VBASE && $seq[$i+1] < $VBASE + $VCOUNT)
{
$ch = $SBASE + (($seq[$i] - $LBASE) * $VCOUNT + ($seq[$i+1] - $VBASE)) * $TCOUNT;
$i++;
}
if ($ch >= $SBASE && $ch < $SBASE + $SCOUNT && !(($ch - $SBASE) % $TCOUNT) && $i < @seq - 1 &&
$seq[$i+1] > $TBASE && $seq[$i+1] < $TBASE + $TCOUNT)
{
$ch += $seq[$i+1] - $TBASE;
$i++;
}
push @ret, $ch;
}
return @ret;
}
################################################################
# remove linguistic-only mappings from the case table
sub remove_linguistic_mappings($$)
{
my ($upper, $lower) = @_;
# remove case mappings that don't round-trip
for (my $i = 0; $i < @{$upper}; $i++)
{
next unless defined ${$upper}[$i];
my $ch = ${$upper}[$i];
${$upper}[$i] = undef unless defined ${$lower}[$ch] && ${$lower}[$ch] == $i;
}
for (my $i = 0; $i < @{$lower}; $i++)
{
next unless defined ${$lower}[$i];
my $ch = ${$lower}[$i];
${$lower}[$i] = undef unless defined ${$upper}[$ch] && ${$upper}[$ch] == $i;
}
}
################################################################
# read in the Unicode database files
sub load_data()
{
my $start;
# now build mappings from the decomposition field of the Unicode database
my $UNICODE_DATA = open_data_file( $UNIDATA, "UnicodeData.txt" );
while (<$UNICODE_DATA>)
{
# Decode the fields ...
my ($code, $name, $cat, $comb, $bidi,
$decomp, $dec, $dig, $num, $mirror,
$oldname, $comment, $upper, $lower, $title) = split /;/;
my $src = hex $code;
die "unknown category $cat" unless defined $categories{$cat};
die "unknown directionality $bidi" unless defined $directions{$bidi};
$category_table[$src] = $categories{$cat};
$direction_table[$src] = $bidi;
if ($cat eq "Mn" || $cat eq "Me" || $cat eq "Cf")
{
$initial_joining_table[$src] = $joining_types{"T"};
}
else
{
$initial_joining_table[$src] = $joining_types{"U"};
}
if ($lower ne "")
{
$tolower_table[$src] = hex $lower;
}
if ($upper ne "")
{
$toupper_table[$src] = hex $upper;
}
if ($dec ne "")
{
$category_table[$src] |= $ctype{"digit"};
}
if ($dig ne "")
{
$digitmap_table[$src] = ord $dig;
}
$combining_class_table[$src] = ($cat ne "Co") ? $comb : 0x100; # Private Use
$category_table[$src] |= $ctype{"nonspacing"} if $bidi eq "NSM";
$category_table[$src] |= $ctype{"diacritic"} if $name =~ /^(COMBINING)|(MODIFIER LETTER)\W/;
$category_table[$src] |= $ctype{"vowelmark"} if $name =~ /\sVOWEL/ || $oldname =~ /\sVOWEL/;
$category_table[$src] |= $ctype{"halfwidth"} if $name =~ /^HALFWIDTH\s/;
$category_table[$src] |= $ctype{"fullwidth"} if $name =~ /^FULLWIDTH\s/;
$category_table[$src] |= $ctype{"hiragana"} if $name =~ /(HIRAGANA)|(\WKANA\W)/;
$category_table[$src] |= $ctype{"katakana"} if $name =~ /(KATAKANA)|(\WKANA\W)/;
$category_table[$src] |= $ctype{"ideograph"} if $name =~ /^<CJK Ideograph/;
$category_table[$src] |= $ctype{"ideograph"} if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
$category_table[$src] |= $ctype{"ideograph"} if $name =~ /^HANGZHOU/;
$category_table[$src] |= $ctype{"highsurrogate"} if $name =~ /High Surrogate/;
$category_table[$src] |= $ctype{"lowsurrogate"} if $name =~ /Low Surrogate/;
# copy the category and direction for everything between First/Last pairs
if ($name =~ /, First>/) { $start = $src; }
if ($name =~ /, Last>/)
{
while ($start < $src)
{
$category_table[$start] = $category_table[$src];
$direction_table[$start] = $direction_table[$src];
$combining_class_table[$start] = $combining_class_table[$src];
$start++;
}
}
next if $decomp eq ""; # no decomposition, skip it
if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)/)
{
my @seq = map { hex $_; } (split /\s+/, (split /\s+/, $decomp, 2)[1]);
$decomp_compat_table[$src] = \@seq;
}
if ($decomp =~ /^<narrow>\s+([0-9a-fA-F]+)$/)
{
$halfwidth_table[hex $1] = $src;
$fullwidth_table[$src] = hex $1;
}
elsif ($decomp =~ /^<wide>\s+([0-9a-fA-F]+)$/)
{
next if hex $1 == 0x5c; # don't remap backslash
$fullwidth_table[hex $1] = $src;
$halfwidth_table[$src] = hex $1;
}
elsif ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
{
# decomposition of the form "<foo> 1234" -> use char if type is known
if ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial")
{
${joining_forms{$1}}[hex $2] = $src;
}
}
elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
{
# decomposition "<compat> 0020 1234" -> combining accent
}
elsif ($decomp =~ /^([0-9a-fA-F]+)/)
{
# store decomposition
if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
{
$decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ];
}
elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
{
my $dst = hex $1;
# Single char decomposition
$decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ];
$cjk_compat_table[$src] = $dst if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
}
}
}
close $UNICODE_DATA;
# patch the category of some special characters
for (my $i = 0; $i < @decomp_table; $i++)
{
next unless defined $decomp_table[$i];
$category_table[$i] |= $category_table[$decomp_table[$i]->[0]];
}
foreach my $cat (keys %special_categories)
{
my $flag = $ctype{$cat};
foreach my $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; }
}
for (my $i = 0; $i < @decomp_compat_table; $i++)
{
next unless defined $decomp_compat_table[$i];
next unless @{$decomp_compat_table[$i]} == 2;
$category_table[$i] |= $category_table[$decomp_compat_table[$i]->[1]] & $ctype{"diacritic"};
}
# load the composition exclusions
my $EXCL = open_data_file( $UNIDATA, "CompositionExclusions.txt" );
while (<$EXCL>)
{
s/\#.*//; # remove comments
if (/^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s*$/)
{
foreach my $i (hex $1 .. hex $2) { $comp_exclusions[$i] = 1; }
}
elsif (/^([0-9a-fA-F]+)\s*$/)
{
$comp_exclusions[hex $1] = 1;
}
}
close $EXCL;
# load the IDNA mappings
@idna_decomp_table = @decomp_compat_table;
my $IDNA = open_data_file( $IDNADATA, "IdnaMappingTable.txt" );
while (<$IDNA>)
{
s/\#.*//; # remove comments
next if /^\s*$/;
my ($char, $type, $mapping) = split /;/;
my ($ch1, $ch2);
if ($char =~ /([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)/)
{
$ch1 = hex $1;
$ch2 = hex $2;
}
elsif ($char =~ /([0-9a-fA-F]+)/)
{
$ch1 = $ch2 = hex $1;
}
if ($type =~ /mapped/ || $type =~ /deviation/)
{
$mapping =~ s/^\s*(([0-9a-fA-F]+\s+)+)\s*$/$1/;
my @seq = map { hex $_; } split /\s+/, $mapping;
foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = @seq ? \@seq : [ 0 ]; }
}
elsif ($type =~ /valid/)
{
}
elsif ($type =~ /ignored/)
{
foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = [ 0 ]; }
}
elsif ($type =~ /disallowed/)
{
foreach my $i ($ch1 .. $ch2)
{
$idna_decomp_table[$i] = undef;
$idna_disallowed[$i] = 1;
}
}
}
close $IDNA;
# load the Unihan mappings
my $UNIHAN = open_data_file( $UNIHAN, "Unihan_Variants.txt" );
while (<$UNIHAN>)
{
s/\#.*//; # remove comments
next if /^\s*$/;
if (/^U\+([0-9a-fA-F]+)\s+kTraditionalVariant\s+U\+([0-9a-fA-F]+)/)
{
$chinese_traditional_table[hex $1] = hex $2;
}
elsif (/^U\+([0-9a-fA-F]+)\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]+)/)
{
$chinese_simplified_table[hex $1] = hex $2;
}
}
close $UNIHAN;
}
################################################################
# add a new registry key
sub add_registry_key($$)
{
my ($key, $defval) = @_;
$registry_keys{$key} = [ $defval ] unless defined $registry_keys{$key};
}
################################################################
# add a new registry value
sub add_registry_value($$$)
{
my ($key, $name, $value) = @_;
add_registry_key( $key, undef );
push @{$registry_keys{$key}}, "'$name' = s '$value'";
}
################################################################
# define a new lead byte
sub add_lead_byte($)
{
my $ch = shift;
return if defined $cp2uni[$ch];
push @lead_bytes, $ch;
$cp2uni[$ch] = 0;
}
################################################################
# define a new char mapping
sub add_mapping($$)
{
my ($cp, $uni) = @_;
$cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
$uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
if ($cp > 0xff) { add_lead_byte( $cp >> 8 ); }
}
################################################################
# get a mapping including glyph chars for MB_USEGLYPHCHARS
sub get_glyphs_mapping(@)
{
my @table = @_;
for (my $i = 0; $i < @glyph2uni; $i++)
{
$table[$i] = $glyph2uni[$i] if defined $glyph2uni[$i];
}
return @table;
}
################################################################
# build EUC-JP table from the JIS 0208/0212 files
sub dump_eucjp_codepage()
{
@cp2uni = ();
@glyph2uni = ();
@lead_bytes = ();
@uni2cp = ();
$default_char = $DEF_CHAR;
$default_wchar = 0x30fb;
# ASCII chars
foreach my $i (0x00 .. 0x7f) { add_mapping( $i, $i ); }
# lead bytes
foreach my $i (0x8e, 0xa1 .. 0xfe) { add_lead_byte($i); }
# JIS X 0201 right plane
foreach my $i (0xa1 .. 0xdf) { add_mapping( 0x8e00 + $i, 0xfec0 + $i ); }
# undefined chars
foreach my $i (0x80 .. 0x8d, 0x8f .. 0x9f) { $cp2uni[$i] = $i; }
$cp2uni[0xa0] = 0xf8f0;
$cp2uni[0xff] = 0xf8f3;
# Fix backslash conversion
add_mapping( 0xa1c0, 0xff3c );
# Add private mappings for rows undefined in JIS 0208/0212
my $private = 0xe000;
foreach my $hi (0xf5 .. 0xfe)
{
foreach my $lo (0xa1 .. 0xfe)
{
add_mapping( ($hi << 8) + $lo, $private++ );
}
}
foreach my $hi (0xf5 .. 0xfe)
{
foreach my $lo (0x21 .. 0x7e)
{
add_mapping( ($hi << 8) + $lo, $private++ );
}
}
my $INPUT = open_data_file( $JISDATA, "JIS0208.TXT" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^0x[0-9a-fA-F]+\s+0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
{
add_mapping( 0x8080 + hex $1, hex $2 );
next;
}
die "Unrecognized line $_\n";
}
close $INPUT;
$INPUT = open_data_file( $JISDATA, "JIS0212.TXT" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
{
add_mapping( 0x8000 + hex $1, hex $2 );
next;
}
die "Unrecognized line $_\n";
}
close $INPUT;
output_codepage_file( 20932 );
}
################################################################
# build Korean Wansung table from the KSX1001 file
sub dump_krwansung_codepage(@)
{
my @cp949 = @_;
@cp2uni = ();
@glyph2uni = ();
@lead_bytes = ();
@uni2cp = ();
$default_char = 0x3f;
$default_wchar = 0x003f;
# ASCII and undefined chars
foreach my $i (0x00 .. 0x9f) { add_mapping( $i, $i ); }
add_mapping( 0xa0, 0xf8e6 );
add_mapping( 0xad, 0xf8e7 );
add_mapping( 0xae, 0xf8e8 );
add_mapping( 0xaf, 0xf8e9 );
add_mapping( 0xfe, 0xf8ea );
add_mapping( 0xff, 0xf8eb );
my $INPUT = open_data_file( $KSCDATA, "KSX1001.TXT" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
{
add_mapping( 0x8080 + hex $1, hex $2 );
next;
}
die "Unrecognized line $_\n";
}
close $INPUT;
# get some extra mappings from cp 949
my @defined_lb;
map { $defined_lb[$_] = 1; } @lead_bytes;
foreach my $i (0x0000 .. 0xffff)
{
next if ($i >= 0x1100 && $i <= 0x11ff); # range not used in 20949
next unless defined $cp949[$i];
if ($cp949[$i] >= 0xff)
{
# only add chars for lead bytes that exist in 20949
my $hi = $cp949[$i] >> 8;
my $lo = $cp949[$i] & 0xff;
next unless $defined_lb[$hi];
next unless $lo >= 0xa1 && $lo <= 0xfe;
}
add_mapping( $cp949[$i], $i );
}
output_codepage_file( 20949 );
}
################################################################
# build the sort keys table
sub dump_sortkeys($)
{
my $filename = shift;
my @sortkeys = ();
my $INPUT = open_data_file( $REPORTS, $SORTKEYS );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
next if /\x1a/; # skip ^Z
next if /^\@version/; # skip @version header
if (/^([0-9a-fA-F]+)\s+;\s+\[([*.])([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]+)\]/)
{
my ($uni,$variable) = (hex $1, $2);
next if $uni > 65535;
$sortkeys[$uni] = [ $uni, hex $3, hex $4, hex $5, hex $6 ];
next;
}
if (/^([0-9a-fA-F]+\s+)+;\s+\[[*.]([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]+)\]/)
{
# multiple character sequence, ignored for now
next;
}
die "$SORTKEYS: Unrecognized line $_\n";
}
close $INPUT;
# compress the keys to 32 bit:
# key 1 to 16 bits, key 2 to 8 bits, key 3 to 4 bits, key 4 to 1 bit
@sortkeys = sort { ${$a}[1] <=> ${$b}[1] or
${$a}[2] <=> ${$b}[2] or
${$a}[3] <=> ${$b}[3] or
${$a}[4] <=> ${$b}[4] or
$a cmp $b; } @sortkeys;
my ($n2, $n3) = (1, 1);
my @keys = (-1, -1, -1, -1, -1 );
my @flatkeys = ();
for (my $i = 0; $i < @sortkeys; $i++)
{
next unless defined $sortkeys[$i];
my @current = @{$sortkeys[$i]};
if ($current[1] == $keys[1])
{
if ($current[2] == $keys[2])
{
if ($current[3] == $keys[3])
{
# nothing
}
else
{
$keys[3] = $current[3];
$n3++;
die if ($n3 >= 16);
}
}
else
{
$keys[2] = $current[2];
$keys[3] = $current[3];
$n2++;
$n3 = 1;
die if ($n2 >= 256);
}
}
else
{
$keys[1] = $current[1];
$keys[2] = $current[2];
$keys[3] = $current[3];
$n2 = 1;
$n3 = 1;
}
if ($current[2]) { $current[2] = $n2; }
if ($current[3]) { $current[3] = $n3; }
if ($current[4]) { $current[4] = 1; }
$flatkeys[$current[0]] = ($current[1] << 16) | ($current[2] << 8) | ($current[3] << 4) | $current[4];
}
open OUTPUT,">$filename.new" or die "Cannot create $filename";
printf "Building $filename\n";
printf OUTPUT "/* Unicode collation element table */\n";
printf OUTPUT "/* generated from %s */\n", "$REPORTS/$SORTKEYS";
printf OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "collation_table", 0xffffffff, 32, @flatkeys );
close OUTPUT;
save_file($filename);
}
################################################################
# dump an array of integers
sub dump_array($$@)
{
my ($bit_width, $default, @array) = @_;
my $format = sprintf "0x%%0%ux", $bit_width / 4;
my $i;
my $ret = " ";
for ($i = 0; $i < $#array; $i++)
{
$ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
$ret .= (($i % 8) != 7) ? ", " : ",\n ";
}
$ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
return $ret;
}
################################################################
# dump an SBCS mapping table in binary format
sub dump_binary_sbcs_table($)
{
my $codepage = shift;
my @header = ( 13, $codepage, 1, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
my $wc_offset = 256 + 3 + (@glyph2uni ? 256 : 0);
print OUTPUT pack "S<*", @header;
print OUTPUT pack "C12", (0) x 12;
print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
if (@glyph2uni)
{
print OUTPUT pack "S<*", 256, get_glyphs_mapping(@cp2uni[0 .. 255]);
}
else
{
print OUTPUT pack "S<*", 0;
}
print OUTPUT pack "S<*", 0, 0;
print OUTPUT pack "C*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
}
################################################################
# dump a DBCS mapping table in binary format
sub dump_binary_dbcs_table($)
{
my $codepage = shift;
my @lb_ranges = get_lb_ranges();
my @header = ( 13, $codepage, 2, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
my @offsets = (0) x 256;
my $pos = 0;
foreach my $i (@lead_bytes)
{
$offsets[$i] = ($pos += 256);
$cp2uni[$i] = 0;
}
my $wc_offset = 256 + 3 + 256 * (1 + scalar @lead_bytes);
print OUTPUT pack "S<*", @header;
print OUTPUT pack "C12", @lb_ranges, 0 x 12;
print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
print OUTPUT pack "S<*", 0, scalar @lb_ranges / 2, @offsets;
foreach my $i (@lead_bytes)
{
my $base = $i << 8;
print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_wchar; } @cp2uni[$base .. $base + 255];
}
print OUTPUT pack "S<", 4;
print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
}
################################################################
# get the list of defined lead byte ranges
sub get_lb_ranges()
{
my @list = ();
my @ranges = ();
foreach my $i (@lead_bytes) { $list[$i] = 1; }
my $on = 0;
for (my $i = 0; $i < 256; $i++)
{
if ($on)
{
if (!defined $list[$i]) { push @ranges, $i-1; $on = 0; }
}
else
{
if ($list[$i]) { push @ranges, $i; $on = 1; }
}
}
if ($on) { push @ranges, 0xff; }
return @ranges;
}
################################################################
# dump the Indic Syllabic Category table
sub dump_indic($)
{
my $filename = shift;
my @indic_table;
my $INPUT = open_data_file( $UNIDATA, "IndicSyllabicCategory.txt" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
{
my $type = $2;
die "unknown indic $type" unless defined $indic_types{$type};
if (hex $1 < 65536)
{
$indic_table[hex $1] = $indic_types{$type};
}
next;
}
elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
{
my $type = $3;
die "unknown indic $type" unless defined $indic_types{$type};
if (hex $1 < 65536 and hex $2 < 65536)
{
foreach my $i (hex $1 .. hex $2)
{
$indic_table[$i] = $indic_types{$type};
}
}
next;
}
die "malformed line $_";
}
close $INPUT;
$INPUT = open_data_file( $UNIDATA, "IndicPositionalCategory.txt" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
{
my $type = $2;
die "unknown matra $type" unless defined $matra_types{$type};
$indic_table[hex $1] |= $matra_types{$type} << 8;
next;
}
elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
{
my $type = $3;
die "unknown matra $type" unless defined $matra_types{$type};
foreach my $i (hex $1 .. hex $2)
{
$indic_table[$i] |= $matra_types{$type} << 8;
}
next;
}
die "malformed line $_";
}
close $INPUT;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
print "Building $filename\n";
print OUTPUT "/* Unicode Indic Syllabic Category */\n";
print OUTPUT "/* generated from $UNIDATA:IndicSyllabicCategory.txt */\n";
print OUTPUT "/* and from $UNIDATA:IndicPositionalCategory.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "indic_syllabic_table", $indic_types{'Other'}, 16, @indic_table );
close OUTPUT;
save_file($filename);
}
################################################################
# dump the Line Break Properties table
sub dump_linebreak($)
{
my $filename = shift;
my @break_table;
my $INPUT = open_data_file( $UNIDATA, "LineBreak.txt" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
{
my $type = $2;
die "unknown breaktype $type" unless defined $break_types{$type};
$break_table[hex $1] = $break_types{$type};
next;
}
elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
{
my $type = $3;
die "unknown breaktype $type" unless defined $break_types{$type};
foreach my $i (hex $1 .. hex $2)
{
$break_table[$i] = $break_types{$type};
}
next;
}
elsif (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
{
my $type = $2;
die "unknown breaktype $type" unless defined $break_types{$type};
$break_table[hex $1] = $break_types{$type};
next;
}
elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
{
my $type = $3;
die "unknown breaktype $type" unless defined $break_types{$type};
foreach my $i (hex $1 .. hex $2)
{
$break_table[$i] = $break_types{$type};
}
next;
}
die "malformed line $_";
}
close $INPUT;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
print "Building $filename\n";
print OUTPUT "/* Unicode Line Break Properties */\n";
print OUTPUT "/* generated from $UNIDATA:LineBreak.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "wine_linebreak_table", $break_types{'XX'}, 16, @break_table );
close OUTPUT;
save_file($filename);
}
my %scripts =
(
"Unknown" => 0,
"Common" => 1,
"Inherited" => 2,
"Arabic" => 3,
"Armenian" => 4,
"Avestan" => 5,
"Balinese" => 6,
"Bamum" => 7,
"Batak" => 8,
"Bengali" => 9,
"Bopomofo" => 10,
"Brahmi" => 11,
"Braille" => 12,
"Buginese" => 13,
"Buhid" => 14,
"Canadian_Aboriginal" => 15,
"Carian" => 16,
"Cham" => 17,
"Cherokee" => 18,
"Coptic" => 19,
"Cuneiform" => 20,
"Cypriot" => 21,
"Cyrillic" => 22,
"Deseret" => 23,
"Devanagari" => 24,
"Egyptian_Hieroglyphs" => 25,
"Ethiopic" => 26,
"Georgian" => 27,
"Glagolitic" => 28,
"Gothic" => 29,
"Greek" => 30,
"Gujarati" => 31,
"Gurmukhi" => 32,
"Han" => 33,
"Hangul" => 34,
"Hanunoo" => 35,
"Hebrew" => 36,
"Hiragana" => 37,
"Imperial_Aramaic" => 38,
"Inscriptional_Pahlavi" => 39,
"Inscriptional_Parthian" => 40,
"Javanese" => 41,
"Kaithi" => 42,
"Kannada" => 43,
"Katakana" => 44,
"Kayah_Li" => 45,
"Kharoshthi" => 46,
"Khmer" => 47,
"Lao" => 48,
"Latin" => 49,
"Lepcha" => 50,
"Limbu" => 51,
"Linear_B" => 52,
"Lisu" => 53,
"Lycian" => 54,
"Lydian" => 55,
"Malayalam" => 56,
"Mandaic" => 57,
"Meetei_Mayek" => 58,
"Mongolian" => 59,
"Myanmar" => 60,
"New_Tai_Lue" => 61,
"Nko" => 62,
"Ogham" => 63,
"Ol_Chiki" => 64,
"Old_Italic" => 65,
"Old_Persian" => 66,
"Old_South_Arabian" => 67,
"Old_Turkic" => 68,
"Oriya" => 69,
"Osmanya" => 70,
"Phags_Pa" => 71,
"Phoenician" => 72,
"Rejang" => 73,
"Runic" => 74,
"Samaritan" => 75,
"Saurashtra" => 76,
"Shavian" => 77,
"Sinhala" => 78,
"Sundanese" => 79,
"Syloti_Nagri" => 80,
"Syriac" => 81,
"Tagalog" => 82,
"Tagbanwa" => 83,
"Tai_Le" => 84,
"Tai_Tham" => 85,
"Tai_Viet" => 86,
"Tamil" => 87,
"Telugu" => 88,
"Thaana" => 89,
"Thai" => 90,
"Tibetan" => 91,
"Tifinagh" => 92,
"Ugaritic" => 93,
"Vai" => 94,
"Yi" => 95,
# Win8/Win8.1
"Chakma" => 96,
"Meroitic_Cursive" => 97,
"Meroitic_Hieroglyphs" => 98,
"Miao" => 99,
"Sharada" => 100,
"Sora_Sompeng" => 101,
"Takri" => 102,
# Win10
"Bassa_Vah" => 103,
"Caucasian_Albanian" => 104,
"Duployan" => 105,
"Elbasan" => 106,
"Grantha" => 107,
"Khojki" => 108,
"Khudawadi" => 109,
"Linear_A" => 110,
"Mahajani" => 111,
"Manichaean" => 112,
"Mende_Kikakui" => 113,
"Modi" => 114,
"Mro" => 115,
"Nabataean" => 116,
"Old_North_Arabian" => 117,
"Old_Permic" => 118,
"Pahawh_Hmong" => 119,
"Palmyrene" => 120,
"Pau_Cin_Hau" => 121,
"Psalter_Pahlavi" => 122,
"Siddham" => 123,
"Tirhuta" => 124,
"Warang_Citi" => 125,
# Win10 RS1
"Adlam" => 126,
"Ahom" => 127,
"Anatolian_Hieroglyphs" => 128,
"Bhaiksuki" => 129,
"Hatran" => 130,
"Marchen" => 131,
"Multani" => 132,
"Newa" => 133,
"Old_Hungarian" => 134,
"Osage" => 135,
"SignWriting" => 136,
"Tangut" => 137,
# Win10 RS4
"Masaram_Gondi" => 138,
"Nushu" => 139,
"Soyombo" => 140,
"Zanabazar_Square" => 141,
# Win10 1903
"Dogra" => 142,
"Gunjala_Gondi" => 143,
"Hanifi_Rohingya" => 144,
"Makasar" => 145,
"Medefaidrin" => 146,
"Old_Sogdian" => 147,
"Sogdian" => 148,
# Win10 2004
"Elymaic" => 149,
"Nyiakeng_Puachue_Hmong" => 150,
"Nandinagari" => 151,
"Wancho" => 152,
# Win11
"Chorasmian" => 153,
"Dives_Akuru" => 154,
"Khitan_Small_Script" => 155,
"Yezidi" => 156,
);
################################################################
# dump Script IDs table
sub dump_scripts($)
{
my $filename = shift;
my $header = $filename;
my @scripts_table;
my $script_index;
my $i;
my $INPUT = open_data_file( $UNIDATA, "Scripts.txt" );
# Fill the table
# Unknown script id is always 0, so undefined scripts are automatically treated as such
while (<$INPUT>)
{
my $type = "";
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
{
$type = $2;
if (defined $scripts{$type})
{
$scripts_table[hex $1] = $scripts{$type};
}
next;
}
elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
{
$type = $3;
if (defined $scripts{$type})
{
foreach my $i (hex $1 .. hex $2)
{
$scripts_table[$i] = $scripts{$type};
}
}
next;
}
}
close $INPUT;
$header = "$filename.h";
open OUTPUT,">$header.new" or die "Cannot create $header";
print "Building $header\n";
print OUTPUT "/* Unicode Script IDs */\n";
print OUTPUT "/* generated from $UNIDATA:Scripts.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "enum unicode_script_id {\n";
foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts)
{
print OUTPUT " Script_$script = $scripts{$script},\n";
}
print OUTPUT " Script_LastId = ", (scalar keys %scripts) - 1, "\n";
print OUTPUT "};\n";
close OUTPUT;
save_file($header);
$filename = "$filename.c";
open OUTPUT,">$filename.new" or die "Cannot create $header";
print "Building $filename\n";
print OUTPUT "/* Unicode Script IDs */\n";
print OUTPUT "/* generated from $UNIDATA:Scripts.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "wine_scripts_table", 0, 16, @scripts_table );
close OUTPUT;
save_file($filename);
}
################################################################
# dump the BiDi mirroring table
sub dump_mirroring($)
{
my $filename = shift;
my @mirror_table = ();
my $INPUT = open_data_file( $UNIDATA, "BidiMirroring.txt" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+)/)
{
$mirror_table[hex $1] = hex $2;
next;
}
die "malformed line $_";
}
close $INPUT;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
print "Building $filename\n";
print OUTPUT "/* Unicode BiDi mirroring */\n";
print OUTPUT "/* generated from $UNIDATA:BidiMirroring.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "wine_mirror_map", 0, 16, @mirror_table );
close OUTPUT;
save_file($filename);
}
################################################################
# dump the Bidi Brackets
sub dump_bracket($)
{
my $filename = shift;
my @bracket_table;
my $INPUT = open_data_file( $UNIDATA, "BidiBrackets.txt" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+);\s*([con])/)
{
my $type = $3;
die "unknown bracket $type" unless defined $bracket_types{$type};
die "characters too distant $1 and $2" if abs(hex($2) - hex($1)) >= 128;
$bracket_table[hex $1] = (hex($2) - hex($1)) % 255;
$bracket_table[hex $1] += $bracket_types{$type} << 8;
next;
}
die "malformed line $_";
}
close $INPUT;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
print "Building $filename\n";
print OUTPUT "/* Unicode Bidirectional Bracket table */\n";
print OUTPUT "/* generated from $UNIDATA:BidiBrackets.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "bidi_bracket_table", 0, 16, @bracket_table );
close OUTPUT;
save_file($filename);
}
################################################################
# dump the Arabic shaping table
sub dump_shaping($)
{
my $filename = shift;
my @joining_table = @initial_joining_table;
my $INPUT = open_data_file( $UNIDATA, "ArabicShaping.txt" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
{
my $type = $2;
$joining_table[hex $1] = $joining_types{$type};
next;
}
die "malformed line $_";
}
close $INPUT;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
print "Building $filename\n";
print OUTPUT "/* Unicode Arabic shaping */\n";
print OUTPUT "/* generated from $UNIDATA:ArabicShaping.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "wine_shaping_table", 0, 16, @joining_table );
print OUTPUT "\nconst unsigned short DECLSPEC_HIDDEN wine_shaping_forms[256][4] =\n{\n";
for (my $i = 0x600; $i <= 0x6ff; $i++)
{
printf OUTPUT " { 0x%04x, 0x%04x, 0x%04x, 0x%04x },\n",
${joining_forms{"isolated"}}[$i] || $i,
${joining_forms{"final"}}[$i] || $i,
${joining_forms{"initial"}}[$i] || $i,
${joining_forms{"medial"}}[$i] || $i;
}
print OUTPUT "};\n";
close OUTPUT;
save_file($filename);
}
################################################################
# dump the Arabic shaping table
sub dump_arabic_shaping($)
{
my $filename = shift;
my @joining_table = @initial_joining_table;
my $INPUT = open_data_file( $UNIDATA, "ArabicShaping.txt" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
{
my $type = $2;
my $group = $3;
if ($group eq "ALAPH" || $group eq "DALATH RISH")
{
$joining_table[hex $1] = $joining_types{$group};
}
else
{
$joining_table[hex $1] = $joining_types{$type};
}
next;
}
die "malformed line $_";
}
close $INPUT;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
print "Building $filename\n";
print OUTPUT "/* Unicode Arabic shaping */\n";
print OUTPUT "/* generated from $UNIDATA:ArabicShaping.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "arabic_shaping_table", 0, 16, @joining_table );
close OUTPUT;
save_file($filename);
}
################################################################
# dump the Vertical Orientation table
sub dump_vertical($$)
{
my ($filename, $unix) = @_;
my @vertical_table;
my $INPUT = open_data_file( $UNIDATA, "VerticalOrientation.txt" );
while (<$INPUT>)
{
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
{
my $type = $2;
die "unknown vertical $type" unless defined $vertical_types{$type};
if (hex $1 < 65536)
{
$vertical_table[hex $1] = $vertical_types{$type};
}
next;
}
elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*/)
{
my $type = $3;
die "unknown vertical $type" unless defined $vertical_types{$type};
foreach my $i (hex $1 .. hex $2)
{
$vertical_table[$i] = $vertical_types{$type};
}
next;
}
die "malformed line $_";
}
close $INPUT;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
print "Building $filename\n";
print OUTPUT "/* Unicode Vertical Orientation */\n";
print OUTPUT "/* generated from $UNIDATA:VerticalOrientation.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
if ($unix)
{
print OUTPUT "#if 0\n";
print OUTPUT "#pragma makedep unix\n";
print OUTPUT "#endif\n\n";
}
print OUTPUT "#include \"windef.h\"\n\n";
dump_two_level_mapping( "vertical_orientation_table", $vertical_types{'R'}, 16, @vertical_table );
close OUTPUT;
save_file($filename);
}
################################################################
# compress a mapping table by removing identical rows
sub compress_array($$@)
{
my $rows = shift;
my $def = shift;
my @table = @_;
my $len = @table / $rows;
my @array;
my $data = "";
# try to merge table rows
for (my $row = 0; $row < $rows; $row++)
{
my $rowtxt = pack "U*", map { defined($_) ? $_ : $def; } @table[($row * $len)..(($row + 1) * $len - 1)];
my $pos = index $data, $rowtxt;
if ($pos == -1)
{
# check if the tail of the data can match the start of the new row
my $first = substr( $rowtxt, 0, 1 );
for (my $i = length($data) - 1; $i > 0; $i--)
{
$pos = index( substr( $data, -$i ), $first );
last if $pos == -1;
$i -= $pos;
next unless substr( $data, -$i ) eq substr( $rowtxt, 0, $i );
substr( $data, -$i ) = "";
last;
}
$pos = length $data;
$data .= $rowtxt;
}
$array[$row] = $rows + $pos;
}
return @array, unpack "U*", $data;
}
################################################################
# dump a char -> 16-bit value mapping table using two-level tables
sub dump_two_level_mapping($$@)
{
my $name = shift;
my $def = shift;
my $size = shift;
my $type = $size == 16 ? "unsigned short" : "unsigned int";
my @row_array = compress_array( 4096, $def, @_[0..65535] );
my @array = compress_array( 256, 0, @row_array[0..4095] );
for (my $i = 256; $i < @array; $i++) { $array[$i] += @array - 4096; }
printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%d] =\n{\n", $type, $name, @array + @row_array - 4096;
printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array[0..255] );
printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array[256..$#array] );
printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @row_array[4096..$#row_array] );
}
################################################################
# dump a char -> value mapping table using three-level tables
sub dump_three_level_mapping($$@)
{
my $name = shift;
my $def = shift;
my $size = shift;
my $type = $size == 16 ? "unsigned short" : "unsigned int";
my $level3 = ($MAX_CHAR + 1) / 16;
my $level2 = $level3 / 16;
my $level1 = $level2 / 16;
my @array3 = compress_array( $level3, $def, @_[0..$MAX_CHAR] );
my @array2 = compress_array( $level2, 0, @array3[0..$level3-1] );
my @array1 = compress_array( $level1, 0, @array2[0..$level2-1] );
for (my $i = $level2; $i < @array2; $i++) { $array2[$i] += @array1 + @array2 - $level2 - $level3; }
for (my $i = $level1; $i < @array1; $i++) { $array1[$i] += @array1 - $level2; }
printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%u] =\n{\n", $type, $name, @array1 + (@array2 - $level2) + (@array3 - $level3);
printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array1[0..$level1-1] );
printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array1[$level1..$#array1] );
printf OUTPUT " /* level 3 offsets */\n%s,\n", dump_array( $size, 0, @array2[$level2..$#array2] );
printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @array3[$level3..$#array3] );
}
################################################################
# dump a binary case mapping table in l_intl.nls format
sub dump_binary_case_table(@)
{
my (@table) = @_;
my $max_char = 0x10000;
my $level1 = $max_char / 16;
my $level2 = $level1 / 16;
my @difftable;
for (my $i = 0; $i < @table; $i++)
{
next unless defined $table[$i];
$difftable[$i] = ($table[$i] - $i) & 0xffff;
}
my @row_array = compress_array( $level1, 0, @difftable[0..$max_char-1] );
my @array = compress_array( $level2, 0, @row_array[0..$level1-1] );
my $offset = @array - $level1;
for (my $i = $level2; $i < @array; $i++) { $array[$i] += $offset; }
return pack "S<*", 1 + $offset + @row_array, @array, @row_array[$level1..$#row_array];
}
################################################################
# dump case mappings for l_intl.nls
sub dump_intl_nls($)
{
my @upper_table = @toupper_table;
my @lower_table = @tolower_table;
remove_linguistic_mappings( \@upper_table, \@lower_table );
my $upper = dump_binary_case_table( @upper_table );
my $lower = dump_binary_case_table( @lower_table );
my $filename = shift;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
printf "Building $filename\n";
binmode OUTPUT;
print OUTPUT pack "S<", 1; # version
print OUTPUT $upper;
print OUTPUT $lower;
close OUTPUT;
save_file($filename);
}
################################################################
# dump the bidi direction table
sub dump_bidi_dir_table($)
{
my $filename = shift;
open OUTPUT,">$filename.new" or die "Cannot create $filename";
printf "Building $filename\n";
printf OUTPUT "/* Unicode BiDi direction table */\n";
printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
printf OUTPUT "#include \"windef.h\"\n\n";
my @table;
for (my $i = 0; $i < 65536; $i++)
{
$table[$i] = $bidi_types{$direction_table[$i]} if defined $direction_table[$i];
}
dump_two_level_mapping( "bidi_direction_table", $bidi_types{"L"}, 16, @table );
close OUTPUT;
save_file($filename);
}
sub rol($$)
{
my ($byte, $count) = @_;
return (($byte << $count) | ($byte >> (8 - $count))) & 0xff;
}
################################################################
# compress the character properties table
sub compress_char_props_table($@)
{
my $rows = shift;
my @table = @_;
my $len = @table / $rows;
my $pos = 0;
my @array = (0) x $rows;
my %sequences;
# add some predefined sequences
foreach my $i (0, 0xfb .. 0xff) { $sequences{pack "L*", (rol($i,5)) x $len} = $i; }
# try to merge table rows
for (my $row = 0; $row < $rows; $row++)
{
my @table_row = map { defined $_ ? $_ : 0x7f; } @table[($row * $len)..(($row + 1) * $len - 1)];
my $rowtxt = pack "L*", @table_row;
if (defined($sequences{$rowtxt}))
{
# reuse an existing row
$array[$row] = $sequences{$rowtxt};
}
else
{
# create a new row
$sequences{$rowtxt} = $array[$row] = ++$pos;
push @array, @table_row;
}
}
return @array;
}
################################################################
# dump a normalization table in binary format
sub dump_norm_table($)
{
my $filename = shift;
my %forms = ( "nfc" => 1, "nfd" => 2, "nfkc" => 5, "nfkd" => 6, "idna" => 13 );
my %decomp = ( "nfc" => \@decomp_table,
"nfd" => \@decomp_table,
"nfkc" => \@decomp_compat_table,
"nfkd" => \@decomp_compat_table ,
"idna" => \@idna_decomp_table );
open OUTPUT,">$filename.new" or die "Cannot create $filename";
print "Building $filename\n";
my $type = $filename;
$type =~ s!.*/norm(\w+)\.nls!$1!;
my $compose = $forms{$type} & 1;
my $compat = !!($forms{$type} & 4) + ($type eq "idna");
my @version = split /\./, $UNIVERSION;
# combining classes
my @classes;
my @class_values;
foreach my $c (grep defined, @combining_class_table)
{
$classes[$c] = 1 if $c < 0x100;
}
for (my $i = 0; $i < @classes; $i++)
{
next unless defined $classes[$i];
$classes[$i] = @class_values;
push @class_values, $i;
}
push @class_values, 0 if (@class_values % 2);
die "too many classes" if @class_values >= 0x40;
# character properties
my @char_props;
my @decomposed;
my @comp_hash_table;
my $comp_hash_size = $compose ? 254 : 0;
for (my $i = 0; $i <= $MAX_CHAR; $i++)
{
next unless defined $combining_class_table[$i];
if (defined $decomp{$type}->[$i])
{
my @dec = get_decomposition( $i, $decomp{$type} );
if ($compose && (my @comp = get_composition( $i, $compat )))
{
my $hash = ($comp[0] + 95 * $comp[1]) % $comp_hash_size;
push @{$comp_hash_table[$hash]}, to_utf16( @comp, $i );
my $val = 0;
foreach my $d (@dec)
{
$val = $combining_class_table[$d];
last if $val;
}
$char_props[$i] = $classes[$val];
}
else
{
$char_props[$i] = 0xbf;
}
@dec = compose_hangul( @dec ) if $compose;
@dec = to_utf16( @dec );
push @dec, 0 if @dec >= 7;
$decomposed[$i] = \@dec;
}
else
{
if ($combining_class_table[$i] == 0x100)
{
$char_props[$i] = 0x7f;
}
elsif ($combining_class_table[$i])
{
$char_props[$i] = $classes[$combining_class_table[$i]] | 0x80;
}
elsif ($type eq "idna" && defined $idna_disallowed[$i])
{
$char_props[$i] = 0xff;
}
else
{
$char_props[$i] = 0;
}
}
}
if ($compose)
{
for (my $i = 0; $i <= $MAX_CHAR; $i++)
{
my @comp = get_composition( $i, $compat );
next unless @comp;
if ($combining_class_table[$comp[1]])
{
$char_props[$comp[0]] |= 0x40 unless $char_props[$comp[0]] & 0x80;
$char_props[$comp[1]] |= 0x40;
}
else
{
$char_props[$comp[0]] = ($char_props[$comp[0]] & ~0x40) | 0x80;
$char_props[$comp[1]] |= 0xc0;
}
}
}
# surrogates
foreach my $i (0xd800..0xdbff) { $char_props[$i] = 0xdf; }
foreach my $i (0xdc00..0xdfff) { $char_props[$i] = 0x9f; }
# Hangul
if ($type eq "nfc") { foreach my $i (0x1100..0x117f) { $char_props[$i] = 0xff; } }
elsif ($compose) { foreach my $i (0x1100..0x11ff) { $char_props[$i] = 0xff; } }
foreach my $i (0xac00..0xd7ff) { $char_props[$i] = 0xff; }
# invalid chars
if ($type eq "idna") { foreach my $i (0x00..0x1f, 0x7f) { $char_props[$i] = 0xff; } }
foreach my $i (0xfdd0..0xfdef) { $char_props[$i] = 0xff; }
foreach my $i (0x00..0x10)
{
$char_props[($i << 16) | 0xfffe] = 0xff;
$char_props[($i << 16) | 0xffff] = 0xff;
}
# decomposition hash table
my @decomp_hash_table;
my @decomp_hash_index;
my @decomp_hash_data;
my $decomp_hash_size = 944;
# build string of character data, reusing substrings when possible
my $decomp_char_data = "";
foreach my $i (sort { @{$b} <=> @{$a} } grep defined, @decomposed)
{
my $str = pack "U*", @{$i};
$decomp_char_data .= $str if index( $decomp_char_data, $str) == -1;
}
for (my $i = 0; $i < @decomposed; $i++)
{
next unless defined $decomposed[$i];
my $pos = index( $decomp_char_data, pack( "U*", @{$decomposed[$i]} ));
die "sequence not found" if $pos == -1;
my $len = @{$decomposed[$i]};
$len = 7 if $len > 7;
my $hash = $i % $decomp_hash_size;
push @{$decomp_hash_table[$hash]}, [ $i, ($len << 13) | $pos ];
}
for (my $i = 0; $i < $decomp_hash_size; $i++)
{
$decomp_hash_index[$i] = @decomp_hash_data / 2;
next unless defined $decomp_hash_table[$i];
if (@{$decomp_hash_table[$i]} == 1)
{
my $entry = $decomp_hash_table[$i]->[0];
if ($char_props[$entry->[0]] == 0xbf)
{
$decomp_hash_index[$i] = $entry->[1];
next;
}
}
foreach my $entry (@{$decomp_hash_table[$i]})
{
push @decomp_hash_data, $entry->[0] & 0xffff, $entry->[1];
}
}
push @decomp_hash_data, 0, 0;
# composition hash table
my @comp_hash_index;
my @comp_hash_data;
if (@comp_hash_table)
{
for (my $i = 0; $i < $comp_hash_size; $i++)
{
$comp_hash_index[$i] = @comp_hash_data;
push @comp_hash_data, @{$comp_hash_table[$i]} if defined $comp_hash_table[$i];
}
$comp_hash_index[$comp_hash_size] = @comp_hash_data;
push @comp_hash_data, 0, 0, 0;
}
my $level1 = ($MAX_CHAR + 1) / 128;
my @rows = compress_char_props_table( $level1, @char_props[0..$MAX_CHAR] );
my @header = ( $version[0], $version[1], $version[2], 0, $forms{$type}, $compat ? 18 : 3,
0, $decomp_hash_size, $comp_hash_size, 0 );
my @tables = (0) x 8;
$tables[0] = 16 + @header + @tables;
$tables[1] = $tables[0] + @class_values / 2;
$tables[2] = $tables[1] + $level1 / 2;
$tables[3] = $tables[2] + (@rows - $level1) / 2;
$tables[4] = $tables[3] + @decomp_hash_index;
$tables[5] = $tables[4] + @decomp_hash_data;
$tables[6] = $tables[5] + length $decomp_char_data;
$tables[7] = $tables[6] + @comp_hash_index;
print OUTPUT pack "S<16", unpack "U*", "norm$type.nlp";
print OUTPUT pack "S<*", @header;
print OUTPUT pack "S<*", @tables;
print OUTPUT pack "C*", @class_values;
print OUTPUT pack "C*", @rows[0..$level1-1];
print OUTPUT pack "C*", @rows[$level1..$#rows];
print OUTPUT pack "S<*", @decomp_hash_index;
print OUTPUT pack "S<*", @decomp_hash_data;
print OUTPUT pack "S<*", unpack "U*", $decomp_char_data;
print OUTPUT pack "S<*", @comp_hash_index;
print OUTPUT pack "S<*", @comp_hash_data;
close OUTPUT;
save_file($filename);
add_registry_value( "Normalization", sprintf( "%x", $forms{$type} ), "norm$type.nls" );
}
################################################################
# output a codepage definition file from the global tables
sub output_codepage_file($)
{
my $codepage = shift;
my $output = sprintf "nls/c_%03d.nls", $codepage;
open OUTPUT,">$output.new" or die "Cannot create $output";
printf "Building %s\n", $output;
if (!@lead_bytes) { dump_binary_sbcs_table( $codepage ); }
else { dump_binary_dbcs_table( $codepage ); }
close OUTPUT;
save_file($output);
add_registry_value( "Codepage", sprintf( "%d", $codepage ), sprintf( "c_%03d.nls", $codepage ));
}
################################################################
# output a codepage table from a Microsoft-style mapping file
sub dump_msdata_codepage($)
{
my $filename = shift;
my $state = "";
my ($codepage, $width, $count);
my ($lb_cur, $lb_end);
@cp2uni = ();
@glyph2uni = ();
@lead_bytes = ();
@uni2cp = ();
$default_char = $DEF_CHAR;
$default_wchar = $DEF_CHAR;
my $INPUT = open_data_file( $MSCODEPAGES, $filename ) or die "Cannot open $filename";
while (<$INPUT>)
{
next if /^;/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
last if /^ENDCODEPAGE/;
if (/^CODEPAGE\s+(\d+)/)
{
$codepage = $1;
next;
}
if (/^CPINFO\s+(\d+)\s+0x([0-9a-fA-f]+)\s+0x([0-9a-fA-F]+)/)
{
$width = $1;
$default_char = hex $2;
$default_wchar = hex $3;
next;
}
if (/^(MBTABLE|GLYPHTABLE|WCTABLE|DBCSRANGE|DBCSTABLE)\s+(\d+)/)
{
$state = $1;
$count = $2;
next;
}
if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)/)
{
if ($state eq "MBTABLE")
{
my $cp = hex $1;
my $uni = hex $2;
$cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
next;
}
if ($state eq "GLYPHTABLE")
{
my $cp = hex $1;
my $uni = hex $2;
$glyph2uni[$cp] = $uni unless defined($glyph2uni[$cp]);
next;
}
if ($state eq "WCTABLE")
{
my $uni = hex $1;
my $cp = hex $2;
$uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
next;
}
if ($state eq "DBCSRANGE")
{
my $start = hex $1;
my $end = hex $2;
for (my $i = $start; $i <= $end; $i++) { add_lead_byte( $i ); }
$lb_cur = $start;
$lb_end = $end;
next;
}
if ($state eq "DBCSTABLE")
{
my $mb = hex $1;
my $uni = hex $2;
my $cp = ($lb_cur << 8) | $mb;
$cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
if (!--$count)
{
if (++$lb_cur > $lb_end) { $state = "DBCSRANGE"; }
}
next;
}
}
die "$filename: Unrecognized line $_\n";
}
close $INPUT;
output_codepage_file( $codepage );
if ($codepage == 949) { dump_krwansung_codepage( @uni2cp ); }
}
################################################################
# align a string length
sub align_string($$)
{
my ($align, $str) = @_;
$str .= pack "C*", (0) x ($align - length($str) % $align) if length($str) % $align;
return $str;
}
################################################################
# pad a string with zeros
sub pad_string($$)
{
my ($pad, $str) = @_;
$str .= pack "C*", (0) x ($pad - length($str)) if length($str) < $pad;
return $str;
}
################################################################
# pack a GUID string
sub pack_guid($)
{
$_ = shift;
/([0-9A-Fa-f]{8})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})/;
return pack "L<S<2C8", hex $1, hex $2, hex $3, hex $4, hex $5, hex $6, hex $7, hex $8, hex $9, hex $10, hex $11;
}
################################################################
# comparison function for compression sort
sub cmp_compression
{
return scalar @{$a} <=> scalar @{$b} ||
$a->[4] <=> $b->[4] ||
$a->[5] <=> $b->[5] ||
$a->[6] <=> $b->[6] ||
$a->[7] <=> $b->[7] ||
$a->[8] <=> $b->[8] ||
$a->[9] <=> $b->[9] ||
$a->[10] <=> $b->[10] ||
$a->[11] <=> $b->[11] ||
$a->[12] <=> $b->[12];
}
################################################################
# build a binary sort keys table
sub dump_sortkey_table($$)
{
my ($filename, $download) = @_;
my @keys;
my ($part, $section, $subsection, $guid, $version, $ling_flag);
my @multiple_weights;
my @expansions;
my @compressions;
my %exceptions;
my %guids;
my %compr_flags;
my %locales;
my $default_guid = "00000001-57ee-1e5c-00b4-d0000bb1e11e";
my $jamostr = "";
my $re_hex = '0x[0-9A-Fa-f]+';
my $re_key = '(\d+\s+\d+\s+\d+\s+\d+)';
$guids{$default_guid} = { };
my %flags = ( "HAS_3_BYTE_WEIGHTS" => 0x01, "REVERSEDIACRITICS" => 0x10, "DOUBLECOMPRESSION" => 0x20, "INVERSECASING" => 0x40 );
my $KEYS = open_data_file( $MSDATA, $download );
printf "Building $filename\n";
while (<$KEYS>)
{
s/\s*;.*$//;
next if /^\s*$/; # skip empty lines
if (/^\s*(SORTKEY|SORTTABLES)/)
{
$part = $1;
next;
}
if (/^\s*(ENDSORTKEY|ENDSORTTABLES)/)
{
$part = $section = "";
next;
}
if (/^\s*(DEFAULT|RELEASE|REVERSEDIACRITICS|DOUBLECOMPRESSION|INVERSECASING|MULTIPLEWEIGHTS|EXPANSION|COMPATIBILITY|COMPRESSION|EXCEPTION|JAMOSORT)\s+/)
{
$section = $1;
$guid = undef;
next;
}
next unless $part;
if ("$part.$section" eq "SORTKEY.DEFAULT")
{
if (/^\s*($re_hex)\s+$re_key/)
{
$keys[hex $1] = [ split(/\s+/,$2) ];
next;
}
}
elsif ("$part.$section" eq "SORTTABLES.RELEASE")
{
if (/^\s*NLSVERSION\s+0x([0-9A-Fa-f]+)/)
{
$version = hex $1;
next;
}
if (/^\s*DEFINEDVERSION\s+0x([0-9A-Fa-f]+)/)
{
# ignore for now
next;
}
}
elsif ("$part.$section" eq "SORTTABLES.REVERSEDIACRITICS" ||
"$part.$section" eq "SORTTABLES.DOUBLECOMPRESSION" ||
"$part.$section" eq "SORTTABLES.INVERSECASING")
{
if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)/)
{
$guid = lc $1;
$guids{$guid} = { } unless defined $guids{$guid};
$guids{$guid}->{flags} |= $flags{$section};
next;
}
if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
{
$locales{$1} = $guid;
next;
}
}
elsif ("$part.$section" eq "SORTTABLES.MULTIPLEWEIGHTS")
{
if (/^\s*(\d+)\s+(\d+)/)
{
push @multiple_weights, $1, $2;
next;
}
}
elsif ("$part.$section" eq "SORTTABLES.EXPANSION")
{
if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
{
my $pos = scalar @expansions / 2;
$keys[hex $1] = [ 2, 0, $pos & 0xff, $pos >> 8 ] unless defined $keys[hex $1];
push @expansions, hex $2, hex $3;
next;
}
}
elsif ("$part.$section" eq "SORTTABLES.COMPATIBILITY")
{
if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
{
$keys[hex $1] = $keys[hex $2];
next;
}
}
elsif ("$part.$section" eq "SORTTABLES.COMPRESSION")
{
if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*([A-Z0-9_]+)?/)
{
if ($subsection || !$guid) # start a new one
{
$guid = lc $1;
$subsection = "";
$guids{$guid} = { } unless defined $guids{$guid};
$guids{$guid}->{flags} |= $flags{$2} if $2;
$guids{$guid}->{compr} = @compressions;
$exceptions{"$guid-"} = [ ] unless defined $exceptions{"$guid-"};
$compr_flags{$guid} = [ ] unless defined $compr_flags{$guid};
push @compressions, [ ];
}
else # merge with current one
{
$guids{lc $1} = { } unless defined $guids{lc $1};
$guids{lc $1}->{flags} |= $flags{$2} if $2;
$guids{lc $1}->{compr} = $guids{$guid}->{compr};
$compr_flags{lc $1} = $compr_flags{$guid};
}
next;
}
if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
{
$locales{$1} = $guid;
next;
}
if (/^\s*(TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT)/)
{
$subsection = $1;
next;
}
if ($subsection && /^\s*(($re_hex\s+){2,8})$re_key/)
{
my @comp = map { hex $_; } split(/\s+/,$1);
push @{$compressions[$#compressions]}, [ split(/\s+/,$3), @comp ];
# add compression flags
$compr_flags{$guid}->[$comp[0]] |= @comp >= 6 ? 0xc0 : @comp >= 4 ? 0x80 : 0x40;
next;
}
}
elsif ("$part.$section" eq "SORTTABLES.EXCEPTION")
{
if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*(LINGUISTIC_CASING)?/)
{
$guid = lc $1;
$guids{$guid} = { } unless defined $guids{lc $1};
$ling_flag = ($2 ? "+" : "-");
$exceptions{"$guid$ling_flag"} = [ ] unless defined $exceptions{"$guid$ling_flag"};
next;
}
if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
{
$locales{$1} = $guid;
next;
}
if (/^\s*($re_hex)\s+$re_key/)
{
$exceptions{"$guid$ling_flag"}->[hex $1] = [ split(/\s+/,$2) ];
next;
}
}
elsif ("$part.$section" eq "SORTTABLES.JAMOSORT")
{
if (/^\s*$re_hex\s+(($re_hex\s*){5})/)
{
$jamostr .= pack "C8", map { hex $_; } split /\s+/, $1;
next;
}
}
die "$download: $part.$section: unrecognized line $_\n";
}
close $KEYS;
# Sortkey table
my $table;
for (my $i = 0; $i < 0x10000; $i++)
{
my @k = defined $keys[$i] ? @{$keys[$i]} : (0) x 4;
$table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
}
foreach my $id (sort keys %exceptions)
{
my $pos = length($table) / 4;
my @exc = @{$exceptions{$id}};
my @filled;
my $key = (substr( $id, -1 ) eq "+" ? "ling_except" : "except");
my $guid = substr( $id, 0, -1 );
$guids{$guid}->{$key} = $pos;
$pos += 0x100;
my @flags = @{$compr_flags{$guid}} if defined $compr_flags{$guid};
for (my $j = 0; $j < 0x10000; $j++)
{
next unless defined $exc[$j] || defined $flags[$j];
$filled[$j >> 8] = 1;
$j |= 0xff;
}
for (my $j = 0; $j < 0x100; $j++)
{
$table .= pack "L<", $filled[$j] ? $pos : $j * 0x100;
$pos += 0x100 if $filled[$j];
}
for (my $j = 0; $j < 0x10000; $j++)
{
next unless $filled[$j >> 8];
my @k = defined $exc[$j] ? @{$exc[$j]} : defined $keys[$j] ? @{$keys[$j]} : (0) x 4;
$k[3] |= $flags[$j] || 0;
$table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
}
}
# Case mapping tables
# standard table
my @casemaps;
my @upper = @toupper_table;
my @lower = @tolower_table;
remove_linguistic_mappings( \@upper, \@lower );
$casemaps[0] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
# linguistic table
$casemaps[1] = pack( "S<*", 1) . dump_binary_case_table( @toupper_table ) . dump_binary_case_table( @tolower_table );
# Turkish table
@upper = @toupper_table;
@lower = @tolower_table;
$upper[ord 'i'] = 0x130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
$lower[ord 'I'] = 0x131; # LATIN SMALL LETTER DOTLESS I
$casemaps[2] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
my $casemaps = align_string( 8, $casemaps[0] . $casemaps[1] . $casemaps[2] );
# Char type table
my @table;
my $types = "";
my %typestr;
for (my $i = 0; $i < 0x10000; $i++)
{
my $str = pack "S<3",
($category_table[$i] || 0) & 0xffff,
defined($direction_table[$i]) ? $c2_types{$direction_table[$i]} : 0,
($category_table[$i] || 0) >> 16;
if (!defined($typestr{$str}))
{
$typestr{$str} = length($types) / 6;
$types .= $str;
}
$table[$i] = $typestr{$str};
}
my @rows = compress_array( 4096, 0, @table[0..65535] );
my @array = compress_array( 256, 0, @rows[0..4095] );
for (my $i = 0; $i < 256; $i++) { $array[$i] *= 2; } # we need byte offsets
for (my $i = 256; $i < @array; $i++) { $array[$i] += 2 * @array - 4096; }
my $arraystr = pack("S<*", @array) . pack("C*", @rows[4096..$#rows]);
my $chartypes = pack "S<2", 4 + length($types) + length($arraystr), 2 + length($types);
$chartypes = align_string( 8, $chartypes . $types . $arraystr );
# Sort tables
# guids
my $sorttables = pack "L<2", $version, scalar %guids;
foreach my $id (sort keys %guids)
{
my %guid = %{$guids{$id}};
my $flags = $guid{flags} || 0;
my $map = length($casemaps[0]) + (defined $guid{ling_except} ? length($casemaps[1]) : 0);
$sorttables .= pack_guid($id) . pack "L<5",
$flags,
defined($guid{compr}) ? $guid{compr} : 0xffffffff,
$guid{except} || 0,
$guid{ling_except} || 0,
$map / 2;
}
# expansions
$sorttables .= pack "L<S<*", scalar @expansions / 2, @expansions;
# compressions
$sorttables .= pack "L<", scalar @compressions;
my $rowstr = "";
foreach my $c (@compressions)
{
my $pos = length($rowstr) / 2;
my $min = 0xffff;
my $max = 0;
my @lengths = (0) x 8;
foreach my $r (sort cmp_compression @{$c})
{
my @row = @{$r};
$lengths[scalar @row - 6]++;
foreach my $val (@row[4..$#row])
{
$min = $val if $min > $val;
$max = $val if $max < $val;
}
$rowstr .= align_string( 4, pack "S<*", @row[4..$#row] );
$rowstr .= pack "C4", $row[1], $row[0], $row[2], $row[3];
}
$sorttables .= pack "L<S<10", $pos, $min, $max, @lengths;
}
$sorttables .= $rowstr;
# multiple weights
$sorttables .= align_string( 4, pack "L<C*", scalar @multiple_weights / 2, @multiple_weights );
# jamo sort
$sorttables .= pack("L<", length($jamostr) / 8) . $jamostr;
# Locales
add_registry_key( "Sorting\\Ids", "{$default_guid}" );
foreach my $loc (sort keys %locales)
{
# skip specific locales that match more general ones
my @parts = split /[-_]/, $loc;
next if @parts > 1 && defined($locales{$parts[0]}) && $locales{$parts[0]} eq $locales{$loc};
next if @parts > 2 && defined($locales{"$parts[0]-$parts[1]"}) && $locales{"$parts[0]-$parts[1]"} eq $locales{$loc};
add_registry_value( "Sorting\\Ids", $loc, "\{$locales{$loc}\}" );
}
# File header
my @header;
$header[0] = 16;
$header[1] = $header[0] + length $table;
$header[2] = $header[1] + length $casemaps;
$header[3] = $header[2] + length $chartypes;
open OUTPUT, ">$filename.new" or die "Cannot create $filename";
print OUTPUT pack "L<*", @header;
print OUTPUT $table, $casemaps, $chartypes, $sorttables;
close OUTPUT;
save_file($filename);
return $chartypes;
}
my %lcnames;
sub locale_parent($)
{
my $loc = shift;
return undef unless $loc;
return $lcnames{$loc}->{sparent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{sparent};
return $lcnames{$loc}->{parent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{parent};
if ($loc =~ /(.*)-[0-9A-Za-z]+/) { return $1; }
return "";
}
sub compare_locales
{
(my $n1 = $a) =~ tr/A-Z_/a-z-/;
(my $n2 = $b) =~ tr/A-Z_/a-z-/;
return $n1 cmp $n2;
}
# query an xml key
sub xml_query($$)
{
my ($xml, $query) = @_;
my $ret = $xml->find( $query );
return undef unless $ret;
printf STDERR "multiple entries for %s\n", $query if (@{$ret} > 1);
return @{$ret}[0]->textContent;
}
# query an xml key for a locale, with fallback to the parents
sub loc_query($$)
{
my ($loc, $query) = @_;
$loc = $lcnames{"en-US"} unless $loc->{name}; # fallback to "en-US" for root locale
for (my $cur = $loc->{name}; defined $cur; $cur = locale_parent( $cur ))
{
next unless defined $lcnames{$cur};
my $xml = $lcnames{$cur}->{xml};
my $ret = $xml->find( $query );
next unless $ret;
printf STDERR "%s: multiple entries for %s\n", $cur, $query if (@{$ret} > 1);
next if @{$ret}[0]->textContent eq "\x{2191}\x{2191}\x{2191}"; # "↑↑↑"
return @{$ret}[0]->textContent;
}
return undef;
}
# retrieve a locale field entry by going up the parents tree
sub locale_entry($$$)
{
my ($loc, $field, $def) = @_;
return $loc->{$field} if defined $loc->{$field};
unless ($loc->{name}) # fallback to "en-US" for root locale
{
$loc = $lcnames{"en-US"};
return $loc->{$field} if defined $loc->{$field};
}
while (defined $loc->{alias}) # resolve aliases
{
$loc = $lcnames{$loc->{alias}};
return $loc->{$field} if defined $loc->{$field};
}
my $cur = $loc->{name};
while ($cur)
{
if (defined $lcnames{$cur} && defined $lcnames{$cur}->{sparent})
{
$cur = $lcnames{$cur}->{sparent};
}
elsif ($cur =~ /(.*)-[0-9A-Za-z]+/)
{
$cur = $1;
}
else
{
return $def;
}
return $lcnames{$cur}->{$field} if defined $lcnames{$cur} && defined $lcnames{$cur}->{$field};
}
return $def;
}
my $string_data;
sub add_str_data($)
{
my $txt = shift;
my $ret = index( $string_data, $txt );
if ($ret == -1)
{
$ret = length($string_data);
$string_data .= $txt
}
return $ret / 2;
}
sub add_string($)
{
my $str = shift;
return 0 unless defined($str) && $str ne "";
my $utf = encode( "UTF16LE", $str );
return add_str_data( (pack "S<", length($utf) / 2) . $utf . (pack "S", 0) );
}
sub add_fontsig(@)
{
return add_str_data( pack "S<L<*", scalar(@_) * 2, @_ );
}
sub add_strarray(@)
{
return 0 unless @_;
return add_str_data( pack "S<L<*", scalar @_, map { add_string($_) } @_);
}
sub format_to_grouping($)
{
my $format = shift;
if ($format =~ /#,(#+),(#+0)/) { return chr(length($2)) . chr(length($1)); }
if ($format =~ /#,(#+0)/) { return chr(length($1)); }
# printf STDERR "unknown format %s\n", $format;
return chr(3);
}
sub parse_currency_format($$)
{
my $name = shift;
my ($posfmt, $negfmt) = split /;/, shift;
my @pospatterns = ( "\xa4[^\xa0]*#", # $1.1
"00[^\xa0]*\xa4", # 1.1$
"\xa4.*\xa0.*#", # $ 1.1
"00.*\xa0.*\xa4" ); # 1.1 $
my @negpatterns = ( "\\(\xa4[^\xa0]*#", # ($1.1)
"-\xa4[^\xa0]*#", # -$1.1
"\xa4[^\xa0]*-#", # $-1.1
"\xa4[^\xa0]*#.*00-", # $1.1-
"00[^\xa0]*\xa4\\)", # (1.1$)
"-#.*00[^\xa0]*\xa4", # -1.1$
"00-[^\xa0]*\xa4", # 1.1-$
"00[^\xa0]*\xa4-", # 1.1$-
"-#.*00.*\xa0.*\xa4", # -1.1 $
"-\xa4.*\xa0.*#", # -$ 1.1
"00.*\xa0.*\xa4-", # 1.1 $-
"\xa4.*\xa0.*#.*00-", # $ 1.1-
"\xa4.*\xa0.*-#", # $ -1.1
"00-.*\xa0.*\xa4", # 1.1- $
"\\(\xa4.*\xa0.*#", # ($ 1.1)
"00.*\xa0.*\xa4\\)"); # (1.1 $)
my ($pos, $neg);
for ($pos = 0; $pos < @pospatterns; $pos++)
{
last if ($posfmt =~ /$pospatterns[$pos]/);
}
#printf STDERR "$name: unknown format '%s'\n", $posfmt if ($pos == @pospatterns);
$pos = 0 if ($pos == @pospatterns);
if (defined $negfmt)
{
for ($neg = 0; $neg < @negpatterns; $neg++)
{
last if ($negfmt =~ /$negpatterns[$neg]/);
}
#printf STDERR "$name: unknown format '%s'\n", $negfmt if ($neg == @negpatterns);
$neg = 0 if ($neg == @negpatterns);
}
elsif ($pos == 0) { $neg = 1; }
elsif ($pos == 1) { $neg = 5; }
elsif ($pos == 2) { $neg = 9; }
elsif ($pos == 3) { $neg = 8; }
return ($pos, $neg);
}
sub parse_percent_format($)
{
my $fmt = shift;
my @patterns = ( "0.+%", # 1 %
"0%", # 1%
"%#", # %1
"%.+#" ); # % 1
my $pos;
for ($pos = 0; $pos < @patterns; $pos++)
{
last if ($fmt =~ /$patterns[$pos]/);
}
printf STDERR "unknown format '%s'\n", $fmt if ($pos == @patterns);
return ($pos, ($pos == 3) ? 7 : $pos);
}
sub convert_date_format($)
{
my $fmt = shift;
$fmt =~ s/G+/gg/;
$fmt =~ s/LLLL/MMMM/;
$fmt =~ s/LLL/MMM/;
$fmt =~ s/E+/dddd/;
$fmt =~ s/ccc+/dddd/;
$fmt =~ s/([^gy])y([^y])/$1yyyy$2/;
$fmt =~ s/^y([^y])/yyyy$1/;
$fmt =~ s/([^gy])y$/$1yyyy/;
return $fmt;
}
sub convert_time_format($)
{
my $fmt = shift;
$fmt =~ s/a+/tt/;
$fmt =~ s/B+/tt/;
return $fmt;
}
sub load_iso639()
{
my %iso639;
my $DATA = open_data_file( $ISO639, "iso-639-3_Code_Tables_$ISO639VERSION/iso-639-3.tab" );
while (<$DATA>)
{
if (/^\s*[a-z]{3}\s+[a-z]{3}\s+([a-z]{3})\s+([a-z]{2})\s/) { $iso639{$2} = $1; }
}
close $DATA;
return %iso639;
}
################################################################
# build the locale table for locale.nls
sub build_locale_data()
{
my $base = "cldr-release-$CLDRVERSION";
my $suppl = load_xml_data_file( $CLDRDATA, "$base/common/supplemental/supplementalData.xml" );
my $subtags = load_xml_data_file( $CLDRDATA, "$base/common/supplemental/likelySubtags.xml" );
my $numbers = load_xml_data_file( $CLDRDATA, "$base/common/supplemental/numberingSystems.xml" );
# obsolete phone data from CLDR version 33
my $phone = load_xml_data_file( $CLDR33DATA, "common/supplemental/telephoneCodeData.xml" );
my %iso639 = load_iso639();
$string_data = pack "S2", 0, 0; # offset 0 == empty string
%lcnames = map { $_->{name} => $_ } @locales;
my %lcids;
foreach my $loc (@locales) { $lcids{$loc->{lcid}} = $loc if defined $loc->{lcid}; }
my %days = ( "sun" => 0, "mon" => 1, "tue" => 2, "wed" => 3, "thu" => 4, "fri" => 5, "sat" => 6 );
# assign locale parents
foreach my $loc (@locales)
{
next if $loc->{name} eq "";
next if defined $loc->{parent};
(my $unix_name = $loc->{name}) =~ s/-/_/g;
my $parent = xml_query( $suppl, "/supplementalData/parentLocales/parentLocale[contains(concat(' ',\@locales,' '),' $unix_name ')]/\@parent" );
if ($parent)
{
$parent =~ s/_/-/g;
$parent = "" if $parent eq "root";
}
elsif ($loc->{name} =~ /(.*)-[0-9A-Za-z]+/) { $parent = $1; }
$loc->{parent} = $parent || "";
}
# load per-locale XML files
foreach my $loc (@locales)
{
next if defined $loc->{alias};
(my $file = $loc->{file} || $loc->{name}) =~ s/-/_/g;
$file = "$base/" . ($loc->{dir} || "common") . "/main/$file.xml";
my $xml = load_xml_data_file( $CLDRDATA, $file );
$loc->{xml} = $xml;
$loc->{language} ||= xml_query( $xml, "/ldml/identity/language/\@type" );
$loc->{territory} ||= xml_query( $xml, "/ldml/identity/territory/\@type" );
$loc->{script} = xml_query( $xml, "/ldml/identity/script/\@type" );
if (!defined($loc->{territory}) && $loc->{name} =~ /-([A-Z]{2}|[0-9]{3})$/) { $loc->{territory} = $1; }
if (!defined($loc->{script}) && $loc->{name} =~ /-([A-Z][a-z]{3})(-[A-Z]{2})?$/) { $loc->{script} = $1; }
}
# assign a default territory and sort locale
foreach my $loc (@locales)
{
next if defined $loc->{alias};
next if defined $loc->{territory};
my $id = $loc->{sortlocale};
if (defined $id && ($id =~ /[-_]([A-Z0-9]+)$/))
{
$loc->{territory} = $1;
next;
}
my @children = grep /^$loc->{name}-[A-Z0-9]+$/ && !defined $lcnames{$_}->{alias}, keys %lcnames;
if (@children == 1)
{
$id = $children[0];
}
else
{
my $name = $loc->{file} || $loc->{name};
$name =~ s/-(Arab|Cyrl|Deva|Guru|Hans|Hant|Latn|Tfng|Vaii)$//;
$name =~ s/-/_/g;
$id = xml_query( $subtags, "/supplementalData/likelySubtags/likelySubtag[\@from='$name']/\@to" );
$id =~ s/_/-/g if $id;
}
if ($id =~ /[-_]([A-Z0-9]+)$/)
{
$loc->{territory} = $1;
next if defined $loc->{sortlocale};
next unless $id =~ /^$loc->{name}/;
while (defined $lcnames{$id} && defined $lcnames{$id}->{alias}) { $id = $lcnames{$id}->{alias}; }
$loc->{sortlocale} = $id if defined $lcnames{$id};
next;
}
print STDERR "no territory found for $loc->{name}\n";
}
# fill geoid table
my %geotable;
foreach my $geo (@geoids)
{
my $name = $geo->{name};
next unless defined $name;
$geo->{alias} = $geotable{$name} if defined $geotable{$name};
$geotable{$name} ||= $geo;
}
foreach my $loc (@locales)
{
next if defined $loc->{alias};
my $territory = $loc->{territory};
$geotable{$territory} ||= { name => $territory };
}
foreach my $name (keys %geotable)
{
my $geo = $geotable{$name};
$geo->{dialcode} = xml_query( $phone, "(/supplementalData/telephoneCodeData/codesByTerritory[\@territory='$name']/telephoneCountryCode)[1]/\@code" );
if ($name =~ /\d+/)
{
$geo->{uncode} = $name;
next;
}
$geo->{iso2} = $name;
$geo->{iso3} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@alpha3");
$geo->{uncode} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@numeric");
$geo->{sintlsymbol} ||= xml_query( $suppl, "(/supplementalData/currencyData/region[\@iso3166='$name']/currency[not(\@to)])[1]/\@iso4217") || "XXX";
$geo->{sintlsymbol} =~ s/XXX/XDR/;
}
foreach my $geo (@geoids)
{
$geo->{parentid} = $geotable{$geo->{parent}}->{id} if defined $geo->{parent};
next if defined $geo->{iso2};
next if defined $geo->{alias};
next unless defined $geo->{uncode};
my @contains;
my $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and not(\@status)]/\@contains");
push @contains, split /\s+/, $list if defined $list;
$list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and \@status='deprecated']/\@contains");
push @contains, split /\s+/, $list if defined $list;
while (@contains)
{
my $territory = pop @contains;
if (defined $geotable{$territory})
{
$geotable{$territory}->{parentid} ||= $geo->{id};
}
elsif ($territory =~ /\d+/)
{
# expand region recursively
$list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$territory' and not(\@status)]/\@contains" );
push @contains, split /\s+/, $list if defined $list;
}
}
}
# assign calendars to their locale
foreach my $cal (@calendars)
{
next unless defined $cal->{locale};
my $loc = $lcnames{$cal->{locale}};
$loc->{calendar} = [ ] unless defined $loc->{calendar};
push @{$loc->{calendar}}, $cal;
}
# assign default lcid to aliases
foreach my $loc (@locales)
{
next unless defined $loc->{alias};
next if defined $loc->{lcid};
my $alias = $loc->{alias};
my $lcid = $lcnames{$alias}->{lcid} || 0x1000;
$loc->{lcid} = $lcid | 0x80000000;
}
# assign sort aliases to parent locale
foreach my $loc (@locales)
{
next unless $loc->{name} =~ /_/;
next unless defined $loc->{alias};
my $alias = $loc->{alias};
my $parent = $lcnames{$alias};
my $basename = $parent->{name};
while (1)
{
@{$parent->{sortnames}}[($loc->{lcid} >> 16) - 1] = $loc->{name};
$alias = locale_parent( $alias );
last unless $alias && defined $lcnames{$alias};
$parent = $lcnames{$alias};
last if defined $parent->{sortbase} && $parent->{sortbase} ne $basename;
$parent->{sortbase} = $basename;
}
}
# assign an array index to all locales
my $idx = 0;
foreach my $loc (@locales)
{
next if defined $loc->{alias};
$loc->{idx} = $idx++;
}
foreach my $loc (@locales)
{
my $alias = $loc->{alias};
next unless defined $alias;
while (defined $lcnames{$alias}->{alias}) { $alias = $lcnames{$alias}->{alias}; }
$loc->{idx} = $lcnames{$alias}->{idx};
}
# output lcids table
my $lcid_data = "";
foreach my $id (sort { $a <=> $b } keys %lcids)
{
my $loc = $lcids{$id};
$lcid_data .= pack "L<S<2", $id, $loc->{idx}, add_string($loc->{name});
}
# output lcnames table
my $lcname_data = "";
foreach my $name (sort compare_locales keys %lcnames)
{
my $loc = $lcnames{$name};
$lcname_data .= pack "S<2L<", add_string($name), $loc->{idx}, $loc->{lcid} || 0x1000;
}
# output locales array
my $locale_data = "";
my $default_lcid = 0x8001;
foreach my $loc (@locales)
{
next if defined $loc->{alias};
my $sname = $loc->{name};
my $language = $loc->{language};
my $territory = $loc->{territory};
my $script = $loc->{script};
my $neutral = ($sname && $sname !~ /-$territory/);
my $sparent = $loc->{sparent} || (($sname =~ /(.*)-[0-9A-Za-z]+/) ? $1 : $loc->{parent});
my $unique_lcid = $loc->{lcid};
unless (defined $unique_lcid) { $unique_lcid = $default_lcid++; }
my $geo = $geotable{$territory};
my $territory_match = "contains(concat(' ',normalize-space(\@territories),' '),' $territory ')";
# languages and scripts
my $ssortlocale = $loc->{sortlocale} || ($neutral ? "$sname-$territory" : $sname);
my $idefaultlanguage = defined $lcnames{$ssortlocale} ? $lcnames{$ssortlocale}->{lcid} : undef;
$idefaultlanguage = $lcnames{"en-US"}->{lcid} unless $ssortlocale;
(my $siso639langname = $sname) =~ s/-.*$//;
my $siso639langname2 = $iso639{$siso639langname} || $siso639langname;
my $sopentypelang = sprintf "%-4s", locale_entry( $loc, "sopentypelang", uc $siso639langname2 );
my $sabbrevlangname = defined $loc->{lcid} ? locale_entry( $loc, "sabbrevlangname", uc $siso639langname2 ) : "ZZZ";
my $siso3166ctryname2 = $geo->{iso3} || $geo->{uncode};
my $senglanguage = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" ) || "";
my $sengcountry = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" ) || "";
my $snativelangname = loc_query( $loc, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" );
my $snativectryname = loc_query( $loc, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" );
$sengcountry =~ s/South Korea/Korea/;
$snativelangname ||= $senglanguage;
$snativectryname ||= $sengcountry;
if ($script)
{
my $engscript = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
my $nativescript = loc_query( $loc, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
$senglanguage .= " ($engscript)" if $engscript;
$snativelangname .= " ($nativescript)" if $nativescript;
}
my $sengdisplayname = $neutral ? $senglanguage : "$senglanguage ($sengcountry)";
my $snativedisplayname = $neutral ? $snativelangname : "$snativelangname ($snativectryname)";
$sengdisplayname =~ s/\) \(/, /;
$snativedisplayname =~ s/\) \(/, /;
my $sscripts = locale_entry( $loc, "sscripts", $script ) || xml_query( $suppl, "/supplementalData/languageData/language[\@type='$language' and not(\@alt)]/\@scripts" );
$sscripts = (join ";", (sort split / /, ($sscripts || "Latn"))) . ";";
my $ireadinglayout = locale_entry( $loc, "ireadinglayout", 0 );
my $charlayout = loc_query( $loc, "/ldml/layout/orientation/characterOrder" );
if ($charlayout eq "right-to-left")
{
$ireadinglayout = 1;
}
elsif ($charlayout eq "top-to-bottom")
{
my $linelayout = loc_query( $loc, "/ldml/layout/orientation/lineOrder" );
$ireadinglayout = $linelayout eq "right-to-left" ? 2 : 3;
}
my $igeoid = $geo->{id} || 0;
# numbers
my $sdecimal = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/decimal" );
my $slist = locale_entry( $loc, "slist", ";" );
my $smondecimalsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyDecimal" ) || $sdecimal;
my $sthousand = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/group" );
$sthousand =~ s/\x{202f}/\x{00a0}/;
my $smonthousandsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyGroup" ) || $sthousand;
my $spositivesign = "";
my $snegativesign = "-";
my $spercent = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/percentSign" );
my $snan = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/nan" );
my $sposinfinity = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/infinity" );
my $sneginfinity = $sposinfinity ? "-$sposinfinity" : "";
my $sgrouping = format_to_grouping( loc_query( $loc, "/ldml/numbers/decimalFormats[\@numberSystem='latn']/decimalFormatLength[not(\@type)]/decimalFormat/pattern" ));
my $percentformat = loc_query( $loc, "/ldml/numbers/percentFormats[\@numberSystem='latn']/percentFormatLength[not(\@type)]/percentFormat/pattern" );
my $currencyformat = loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='accounting']/pattern" ) ||
loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='standard']/pattern" );
my $smongrouping = format_to_grouping( $currencyformat );
my ($icurrency, $inegcurr) = parse_currency_format( $sname, $currencyformat );
my ($ipospercent, $inegpercent) = parse_percent_format( $percentformat );
my $native_numbering = loc_query( $loc, "/ldml/numbers/otherNumberingSystems/native" );
my @snativedigits = split //, xml_query( $numbers, "/supplementalData/numberingSystems/numberingSystem[\@id='$native_numbering']/\@digits" );
my $digitsubstitution = !(ord($snativedigits[0]) >= 0x600 && ord($snativedigits[0]) <= 0x6ff);
my $measure = defined xml_query( $suppl, "/supplementalData/measurementData/measurementSystem[\@type='US' and $territory_match]" );
my $papersize = defined xml_query( $suppl, "/supplementalData/measurementData/paperSize[\@type='US-Letter' and $territory_match]" );
# currencies
my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
my $scurrency = $geo->{scurrency} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[\@alt='narrow']" );
$scurrency ||= loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[not(\@alt)]" );
$geo->{scurrency} = $scurrency if $scurrency;
$scurrency ||= $sintlsymbol;
my $sengcurrname = $loc->{sengcurrname} || loc_query( $lcnames{en}, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" );
my $snativecurrname = $loc->{sengcurrname} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" ) || $sengcurrname;
my $icurrdigits = xml_query( $suppl, "/supplementalData/currencyData/fractions/info[\@iso4217='$sintlsymbol']/\@digits" );
$icurrdigits = 2 unless defined $icurrdigits;
# calendars
my $firstday = xml_query( $suppl, "/supplementalData/weekData/firstDay[not(\@alt) and $territory_match]/\@day" );
my $ifirstdayofweek = $firstday ? $days{$firstday} : 1;
my $firstweekofyear = (xml_query( $suppl, "/supplementalData/weekData/minDays[$territory_match]/\@count" ) || 0) == 4 ? 2 : 0;
my $serastring = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/eras/eraAbbr/era[\@type='1' and not(\@alt)]" );
my (@sdayname, @sabbrevdayname, @sshortestdayname);
foreach my $d (sort { $days{$a} <=> $days{$b} } keys %days)
{
my $n = $days{$d};
my %name;
foreach my $type (qw(wide abbreviated short))
{
$name{$type} = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/days/dayContext[\@type='format']/dayWidth[\@type='$type']/day[\@type='$d' and not(\@alt)]" );
}
push @sdayname, $name{wide};
push @sabbrevdayname, $name{abbreviated} || $name{wide};
push @sshortestdayname, $name{short} || $name{abbreviated} || $name{wide};
}
my (@smonthname, @sabbrevmonthname, @sgenitivemonth, @sabbrevgenitivemonth);
foreach my $n (1..13)
{
my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='wide']/month[\@type='$n']" );
my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
my $genitive = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n']" );
my $abbrevgen = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
push @smonthname, $name || $genitive || "";
push @sabbrevmonthname, $abbrev || $abbrevgen || $name || $genitive || "";
push @sgenitivemonth, $genitive || "";
push @sabbrevgenitivemonth, $abbrevgen || $genitive || "";
}
@sgenitivemonth = () if join("|",@smonthname) eq join("|",@sgenitivemonth);
@sabbrevgenitivemonth = () if join("|",@sabbrevmonthname) eq join("|",@sabbrevgenitivemonth);
my %caltypes = ( "gregorian" => 1, "japanese" => 3, "chinese" => 4, "dangi" => 5, "islamic" => 6, "buddhist" => 7, "hebrew" => 8,
"persian" => 22, "islamic-civil" => 23, "islamic-umalqura" => 23 );
my $calpref = xml_query( $suppl, "/supplementalData/calendarPreferenceData/calendarPreference[$territory_match]/\@ordering" ) || "gregorian";
my $icalendartype;
my @scalnames;
foreach my $c (split /\s+/, $calpref)
{
next unless defined $caltypes{$c};
$icalendartype .= chr($caltypes{$c});
$scalnames[$caltypes{$c} - 1] = loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$c']" );
}
# date/time formats
my $s1159 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='am' and not(\@alt)]" );
my $s2359 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='pm' and not (\@alt)]" );
my $sshortestam = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='am' and not(\@alt)]" );
my $sshortestpm = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='pm' and not (\@alt)]" );
my @stimeformat = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='medium']/timeFormat/pattern[not(\@alt)]" ));
push @stimeformat, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
pop @stimeformat if $stimeformat[0] eq $stimeformat[1];
@stimeformat = map convert_time_format($_), @stimeformat;
my @sshorttime = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='short']/timeFormat/pattern[not(\@alt)]" ));
push @sshorttime, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hm' and not(\@alt)]" );
pop @sshorttime if $sshorttime[0] eq $sshorttime[1];
@sshorttime = map convert_time_format($_), @sshorttime;
my @sshortdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
push @sshortdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
@sshortdate = map convert_date_format($_), @sshortdate;
my @slongdate = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" ));
push @slongdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
@slongdate = map convert_date_format($_), @slongdate;
my @smonthday = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMd' and not(\@alt)]" ));
push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Md' and not(\@alt)]" );
push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMd' and not(\@alt)]" );
@smonthday = map convert_date_format($_), @smonthday;
my @syearmonth = map convert_date_format($_), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMM' and not(\@alt)]" );
my @sduration = map convert_time_format( lc $_ ), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
my $srelativelongdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMEd' and not(\@alt)]" ) ||
loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMEd' and not(\@alt)]" );
$srelativelongdate = convert_date_format( $srelativelongdate );
if (defined $loc->{calendar})
{
foreach my $cal (@{$loc->{calendar}})
{
$cal->{sshortdate} = \@sshortdate;
$cal->{syearmonth} = \@syearmonth;
$cal->{slongdate} = \@slongdate;
$cal->{serastring} = [ $serastring ];
$cal->{sdayname} = \@sdayname;
$cal->{sabbrevdayname} = \@sabbrevdayname;
$cal->{smonthname} = \@smonthname;
$cal->{sabbrevmonthname} = \@sabbrevmonthname;
$cal->{scalname} = $scalnames[$cal->{id}];
$cal->{smonthday} = \@smonthday;
$cal->{sshortestdayname} = \@sshortestdayname;
$cal->{sabbreverastring} = [ $serastring ];
$cal->{sshortestdayname} = \@sshortestdayname;
$cal->{srelativelongdate} = $srelativelongdate;
}
}
# codepages
my %ansicpmap = ( 437 => 1252, 720 => 1256, 737 => 1253, 775 => 1257, 850 => 1252,
852 => 1250, 855 => 1251, 866 => 1251, 857 => 1254, 862 => 1255 );
my %maccpmap = ( 437 => 10000, 720 => 10004, 737 => 10006, 775 => 10029, 850 => 10000,
852 => 10029, 855 => 10007, 857 => 10081, 862 => 10005, 866 => 10007,
874 => 10021, 932 => 10001, 936 => 10008, 949 => 10003, 950 => 10002,
1258 => 10000 );
my %ebcdiccpmap = ( 437 => 37, 720 => 20420, 737 => 20273, 866 => 20880, 932 => 20290 );
my %codepagemasks = ( 874 => [ 0x01000000, 0x00000000, 0x00000000, 0, 0x00010000, 0x00000000, 0x00010000, 0x00000000 ],
932 => [ 0x00000000, 0x28c70000, 0x00000010, 0, 0x00020000, 0x00000000, 0x00020000, 0x00000000 ],
936 => [ 0x00000000, 0x28010000, 0x00000002, 0, 0x00040000, 0x00000000, 0x00040000, 0x00000000 ],
949 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00080000, 0x00000000, 0x00080000, 0x00000000 ],
950 => [ 0x00000000, 0x28c10000, 0x00000012, 0, 0x00100000, 0x00000000, 0x00100000, 0x00000000 ],
1258 => [ 0x2000000f, 0x00000000, 0x00000000, 0, 0x00000100, 0x00008000, 0x00000100, 0x00008000 ],
866 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x00020000, 0x00000004, 0x02020000 ],
862 => [ 0x00000800, 0x40000000, 0x00000000, 0, 0x00000020, 0x00200000, 0x00000020, 0x00200000 ],
857 => [ 0x0000001f, 0x00000000, 0x00000000, 0, 0x00000010, 0x01000000, 0x00000010, 0x01000000 ],
855 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x02000000, 0x00000004, 0x02000000 ],
852 => [ 0x00000027, 0x00000000, 0x00000000, 0, 0x00000002, 0x04000000, 0x00000002, 0x04000000 ],
775 => [ 0x00000007, 0x00000000, 0x00000000, 0, 0x00000080, 0x08000000, 0x00000080, 0x08000000 ],
737 => [ 0x00000080, 0x00000000, 0x00000000, 0, 0x00000008, 0x10000000, 0x00000008, 0x10010000 ],
720 => [ 0x00002000, 0x00000000, 0x00000000, 0, 0x00000040, 0x20000000, 0x00000040, 0x20080000 ],
850 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x40000000, 0x0000019f, 0xdfd70000 ],
437 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x80000000, 0x0000019f, 0xdfd70000 ],
65001 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00000000, 0x00000000, 0x0000019f, 0xdfd70000 ] );
my $oemcp = locale_entry( $loc, "oemcp", 65001 );
my $maccp = locale_entry( $loc, "maccp", undef ) || $maccpmap{$oemcp} || 65001;
my $ebcdiccp = locale_entry( $loc, "ebcdiccp", undef ) || $ebcdiccpmap{$oemcp} || 500;
$ebcdiccp = 500 if (defined $loc->{oemcp} && $loc->{oemcp} == 65001) || (defined $loc->{maccp} && $loc->{maccp} == 65001);
my $ansicp = $ansicpmap{$oemcp} || $oemcp;
my @fontsig = (0) x 8;
my $sig = locale_entry( $loc, "fontsig", [] );
foreach my $i (0..7) { $fontsig[$i] |= $codepagemasks{$oemcp}->[$i]; }
foreach my $i (0..$#{$sig}) { $fontsig[$i] |= $sig->[$i]; }
$fontsig[3] |= 1 << 31;
$fontsig[3] |= 1 << 27 if $ireadinglayout == 1;
$fontsig[3] |= 1 << 28 if $ireadinglayout == 3;
# special cases for invariant locale
unless ($loc->{name})
{
$siso639langname = "iv";
$siso639langname2 = "ivl";
$senglanguage = $snativelangname = "Invariant Language";
$sengcountry = $snativectryname = "Invariant Country";
$sengdisplayname = "Invariant Language (Invariant Country)";
$snativedisplayname = "Invariant Language (Invariant Region)";
$sengcurrname = $snativecurrname = "International Monetary Fund";
$scurrency = "\x{00a4}";
$ifirstdayofweek = 0;
$igeoid = $geotable{"US"}->{id};
@stimeformat = ("HH:mm:ss");
@sshortdate = ("MM/dd/yyyy", "yyyy-MM-dd");
@slongdate = ("dddd, dd MMMM yyyy");
@syearmonth = ("yyyy MMMM");
@smonthday = ("MMMM dd", "MMMM d", "M/d", "MMM d");
@sshorttime = ("HH:mm", "hh:mm tt", "H:mm", "h:mm tt");
$srelativelongdate = "dddd, MMMM dd";
$sposinfinity = "Infinity";
$sneginfinity = "-Infinity";
$spositivesign = "+";
$ipospercent = $inegpercent = 0;
}
# output data
$locale_data .= pack "L<2",
add_string( $sname ), # name
add_string( $sopentypelang ); # LOCALE_SOPENTYPELANGUAGETAG
$locale_data .= pack "S<14",
$loc->{lcid} || 0x1000, # LOCALE_ILANGUAGE
$unique_lcid, # unique_lcid
locale_entry( $loc, "idigits", 2 ), # LOCALE_IDIGITS
locale_entry( $loc, "inegnumber", 1 ), # LOCALE_INEGNUMBER
$icurrdigits, # LOCALE_ICURRDIGITS
$icurrency, # LOCALE_ICURRENCY
$inegcurr, # LOCALE_INEGCURR
locale_entry( $loc, "ilzero", 1 ), # LOCALE_ILZERO
!$neutral, # LOCALE_INEUTRAL
$ifirstdayofweek, # LOCALE_IFIRSTDAYOFWEEK
$firstweekofyear, # LOCALE_IFIRSTWEEKOFYEAR
$geo->{dialcode} || 1 , # LOCALE_ICOUNTRY,
$measure, # LOCALE_IMEASURE
$digitsubstitution; # LOCALE_IDIGITSUBSTITUTION
$locale_data .= pack "L<18",
add_string( $sgrouping ), # LOCALE_SGROUPING
add_string( $smongrouping ), # LOCALE_SMONGROUPING
add_string( $slist ), # LOCALE_SLIST
add_string( $sdecimal ), # LOCALE_SDECIMAL
add_string( $sthousand ), # LOCALE_STHOUSAND
add_string( $scurrency ), # LOCALE_SCURRENCY
add_string( $smondecimalsep ), # LOCALE_SMONDECIMALSEP
add_string( $smonthousandsep ), # LOCALE_SMONTHOUSANDSEP
add_string( $spositivesign ), # LOCALE_SPOSITIVESIGN
add_string( $snegativesign ), # LOCALE_SNEGATIVESIGN
add_string( $s1159 ), # LOCALE_S1159
add_string( $s2359 ), # LOCALE_S2359
add_strarray( @snativedigits ), # LOCALE_SNATIVEDIGITS
add_strarray( @stimeformat ), # LOCALE_STIMEFORMAT
add_strarray( @sshortdate ), # LOCALE_SSHORTDATE
add_strarray( @slongdate ), # LOCALE_SLONGDATE
add_strarray( @syearmonth ), # LOCALE_SYEARMONTH
add_strarray( @sduration ); # LOCALE_SDURATION
$locale_data .= pack "S<8",
$idefaultlanguage || 0x1000, # LOCALE_IDEFAULTLANGUAGE
$ansicp, # LOCALE_IDEFAULTANSICODEPAGE
$oemcp, # LOCALE_IDEFAULTCODEPAGE
$maccp, # LOCALE_IDEFAULTMACCODEPAGE
$ebcdiccp, # LOCALE_IDEFAULTEBCDICCODEPAGE
$igeoid < 65536 ? $igeoid : 39070, # old_geoid
$papersize ? 1 : 9, # LOCALE_IPAPERSIZE
0; # FIXME # islamic_cal
$locale_data .= pack "L<24",
add_string( $icalendartype ), # LOCALE_ICALENDARTYPE
add_string( $sabbrevlangname ), # LOCALE_SABBREVLANGNAME
add_string( $siso639langname ), # LOCALE_SISO639LANGNAME
add_string( $senglanguage ), # LOCALE_SENGLANGUAGE
add_string( $snativelangname ), # LOCALE_SNATIVELANGNAME
add_string( $sengcountry ), # LOCALE_SENGCOUNTRY
add_string( $snativectryname ), # LOCALE_SNATIVECTRYNAME
add_string( $siso3166ctryname2 ), # LOCALE_SABBREVCTRYNAME
add_string( $territory ), # LOCALE_SISO3166CTRYNAME
add_string( $sintlsymbol ), # LOCALE_SINTLSYMBOL
add_string( $sengcurrname ), # LOCALE_SENGCURRNAME
add_string( $snativecurrname ), # LOCALE_SNATIVECURRNAME
add_fontsig( @fontsig ), # LOCALE_FONTSIGNATURE
add_string( $siso639langname2 ), # LOCALE_SISO639LANGNAME2
add_string( $siso3166ctryname2 ), # LOCALE_SISO3166CTRYNAME2
add_string( $sparent ), # LOCALE_SPARENT
add_strarray( @sdayname ), # LOCALE_SDAYNAME
add_strarray( @sabbrevdayname ), # LOCALE_SABBREVDAYNAME
add_strarray( @smonthname ), # LOCALE_SMONTHNAME
add_strarray( @sabbrevmonthname ), # LOCALE_SABBREVMONTHNAME
add_strarray( @sgenitivemonth ), # LOCALE_SGENITIVEMONTH
add_strarray( @sabbrevgenitivemonth ), # LOCALE_SABBREVGENITIVEMONTH
add_strarray( @scalnames ), # LOCALE_SCALNAMES
add_strarray( @{$loc->{sortnames}} ); # LOCALE_SSORTNAMES
$locale_data .= pack "S<6",
$inegpercent, # LOCALE_INEGATIVEPERCENT
$ipospercent, # LOCALE_IPOSITIVEPERCENT
0, # unknown
$ireadinglayout, # LOCALE_IREADINGLAYOUT
0x2a, # unknown
0x2a; # unknown
$locale_data .= pack "L<24",
0, # unknown
add_string( $sengdisplayname ), # LOCALE_SENGLISHDISPLAYNAME
add_string( $snativedisplayname ), # LOCALE_SNATIVEDISPLAYNAME
add_string( $spercent ), # LOCALE_SPERCENT
add_string( $snan ), # LOCALE_SNAN
add_string( $sposinfinity ), # LOCALE_SPOSINFINITY
add_string( $sneginfinity ), # LOCALE_SNEGINFINITY
0, # unknown
add_string( $serastring ), # CAL_SERASTRING
add_string( $serastring ), # CAL_SABBREVERASTRING
0, # unknown
add_string( $ssortlocale ), # LOCALE_SCONSOLEFALLBACKNAME
add_strarray( @sshorttime ), # LOCALE_SSHORTTIME
add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
0, # unknown
add_string( $ssortlocale ), # LOCALE_SSORTLOCALE
add_string( "0409:00000409" ), # FIXME # LOCALE_SKEYBOARDSTOINSTALL
add_string( $sscripts ), # LOCALE_SSCRIPTS
add_string( $srelativelongdate ), # LOCALE_SRELATIVELONGDATE
$igeoid, # LOCALE_IGEOID
add_string( $sshortestam || "a" ), # LOCALE_SSHORTESTAM
add_string( $sshortestpm || "p" ), # LOCALE_SSHORTESTPM
add_strarray( @smonthday ), # LOCALE_SMONTHDAY
add_string( "k0-windows-us" ) # FIXME # keyboard_layout
}
# output language groups
my %groups;
add_registry_key( "Locale", "00000409" );
foreach my $loc (@locales)
{
next unless defined $loc->{lcid};
next if ($loc->{lcid} & 0x80000000);
next if !defined($loc->{alias}) && $loc->{name} !~ /-$loc->{territory}/; # skip neutral locales
my $group = locale_entry( $loc, "group", 1 );
my $name = sprintf( "%08x", $loc->{lcid} );
my $val = sprintf( "%x", $group );
add_registry_value( "Locale", $name, $val ) unless ($loc->{lcid} & 0x000f0000);
add_registry_value( "Locale\\Alternate Sorts", $name, $val ) if $loc->{name} =~ /_/;
$groups{$val} = 1;
}
foreach my $group (keys %groups) { add_registry_value( "Language Groups", $group, "1" ); }
# output calendar data
my $calendar_data = "";
foreach my $cal (@calendars)
{
my $scalname = $cal->{name};
my $iyearoffsetrange = 0;
my $itwodigityearmax = $cal->{itwodigityearmax};
my @sshortdate;
my @syearmonth;
my @slongdate;
my @serastring;
my @sdayname;
my @sabbrevdayname;
my @smonthname;
my @sabbrevmonthname;
my @smonthday;
my @sabbreverastring;
my @sshortestdayname;
my $type = $cal->{type};
if (defined $cal->{locale} && defined $type)
{
my $loc = $lcnames{$cal->{locale}};
my $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
push @sshortdate, $fmt if $fmt;
$fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMd' and not(\@alt)]" );
push @sshortdate, $fmt if $fmt;
$fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
push @sshortdate, $fmt if $fmt;
$fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMMMd' and not(\@alt)]" );
push @sshortdate, $fmt if $fmt;
@sshortdate = map convert_date_format($_), @sshortdate;
$fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" );
push @slongdate, $fmt if $fmt;
$fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
push @slongdate, $fmt if $fmt;
@slongdate = map convert_date_format($_), @slongdate;
foreach my $n (1..13)
{
my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n' and not(\@yeartype)]" );
my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n' and not(\@yeartype)]" );
push @smonthname, $name || "";
push @sabbrevmonthname, $abbrev || $name || "";
}
$scalname ||= loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$type']" );
if (defined $cal->{eras})
{
my @eras;
my $idx = 1;
foreach my $era (@{$cal->{eras}})
{
my $start = xml_query( $suppl, "/supplementalData/calendarData/calendar[\@type='$type']/eras/era[\@type='$era']/\@start" );
next unless $start =~ /^(-?\d+)-(\d+)-(\d+)/;
my ($year, $mon, $day, $zero, $first) = ($1, $2, $3, $1 - 1, 1);
if ($zero < 0)
{
$first -= $zero;
$year = 1;
$itwodigityearmax = 2049 - $zero;
}
unshift @eras, pack( "S<8", 6, $idx++, $year, $mon, $day, $zero, $first, 0 );
push @serastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraAbbr/era[\@type='$era']" );
push @sabbreverastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraNarrow/era[\@type='$era']" );
}
$iyearoffsetrange = add_str_data( pack "S<L<*", scalar @eras, map { add_str_data($_); } @eras );
}
}
@sshortdate = @{$cal->{sshortdate}} if defined $cal->{sshortdate} && !@sshortdate;
@syearmonth = @{$cal->{syearmonth}} if defined $cal->{syearmonth};
@slongdate = @{$cal->{slongdate}} if defined $cal->{slongdate} && !@slongdate;
@serastring = @{$cal->{serastring}} if defined $cal->{serastring} && !@serastring;
@sdayname = @{$cal->{sdayname}} if defined $cal->{sdayname};
@sabbrevdayname = @{$cal->{sabbrevdayname}} if defined $cal->{sabbrevdayname};
@smonthname = @{$cal->{smonthname}} if defined $cal->{smonthname} && !join("",@smonthname);
@sabbrevmonthname = @{$cal->{sabbrevmonthname}} if defined $cal->{sabbrevmonthname} && !join("",@sabbrevmonthname);
@smonthday = @{$cal->{smonthday}} if defined $cal->{smonthday};
@sabbreverastring = @{$cal->{sabbreverastring}} if defined $cal->{sabbreverastring} && !@sabbreverastring;
@sshortestdayname = @{$cal->{sshortestdayname}} if defined $cal->{sshortestdayname};
my $srelativelongdate = $cal->{srelativelongdate};
@serastring = ("A.D.") unless @serastring;
@sabbreverastring = ("AD") unless @sabbreverastring;
if ($cal->{id} != 1) # calendar 1 is a placeholder, information is fetched from locale instead
{
@sshortdate = ("") unless @sshortdate;
@syearmonth = ("") unless @syearmonth;
@slongdate = ("") unless @slongdate;
@sdayname = ("") x 7 unless @sdayname;
@sabbrevdayname = ("") x 7 unless @sabbrevdayname;
@sshortestdayname = ("") x 7 unless @sshortestdayname;
@smonthname = ("") x 13 unless @smonthname;
@sabbrevmonthname = ("") x 13 unless @sabbrevmonthname;
@smonthday = ("") unless @smonthday;
}
$calendar_data .= pack "S<2L<17",
$cal->{id}, # CAL_ICALINTVALUE
$itwodigityearmax || 99, # CAL_ITWODIGITYEARMAX
add_strarray( @sshortdate ), # CAL_SSHORTDATE
add_strarray( @syearmonth ), # CAL_SYEARMONTH
add_strarray( @slongdate ), # CAL_SLONGDATE
add_strarray( @serastring ), # CAL_SERASTRING
$iyearoffsetrange, # CAL_IYEAROFFSETRANGE
add_strarray( @sdayname ), # CAL_SDAYNAME
add_strarray( @sabbrevdayname ), # CAL_SABBREVDAYNAME
add_strarray( @smonthname ), # CAL_SMONTHNAME
add_strarray( @sabbrevmonthname ), # CAL_SABBREVMONTHNAME
add_string( $scalname ), # CAL_SCALNAME
add_strarray( @smonthday ), # CAL_SMONTHDAY
add_strarray( @sabbreverastring ), # CAL_SABBREVERASTRING
add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
add_string( $srelativelongdate ); # CAL_SRELATIVELONGDATE
}
# output locale header
my $nb_lcids = scalar keys %lcids;
my $nb_locales = scalar grep { !defined $_->{alias} } @locales;
my $nb_lcnames = scalar keys %lcnames;
my $locale_size = length($locale_data) / $nb_locales;
my $nb_calendars = scalar @calendars;
my $calendar_size = length($calendar_data) / $nb_calendars;
my $lcids_offset = 19 * 4; # size of header
my $lcnames_offset = $lcids_offset + length $lcid_data;
my $locales_offset = $lcnames_offset + length $lcname_data;
my $calendar_offset = $locales_offset + length $locale_data;
my $strings_offset = $calendar_offset + length $calendar_data;
my $locale_header = pack "L<7S<4L<S<2L<3S<2L<4",
8, # offset
0,
7, # version
0x5344534e, # magic
0, 0, 0,
0,
$nb_lcids,
$nb_locales,
$locale_size,
$locales_offset,
$nb_lcnames,
0,
$lcids_offset,
$lcnames_offset,
0,
$nb_calendars,
$calendar_size,
$calendar_offset,
$strings_offset,
0, 0;
return align_string( 4, $locale_header . $lcid_data . $lcname_data . $locale_data . $calendar_data . $string_data );
}
################################################################
# build the charmaps table for locale.nls
sub build_charmaps_data()
{
my $data = "";
# MAP_FOLDDIGITS
$data .= dump_binary_case_table( @digitmap_table );
# CJK compatibility map
$data .= dump_binary_case_table( @cjk_compat_table );
# LCMAP_HIRAGANA/KATAKANA
my (@hiragana_table, @katakana_table);
foreach my $ch (0x3041..0x3096, 0x309d..0x309e)
{
$hiragana_table[$ch + 0x60] = $ch;
$katakana_table[$ch] = $ch + 0x60;
}
$data .= dump_binary_case_table( @hiragana_table ) . dump_binary_case_table( @katakana_table );
# LCMAP_HALFWIDTH/FULLWIDTH
$halfwidth_table[0x2018] = 0x0027;
$halfwidth_table[0x2019] = 0x0027;
$halfwidth_table[0x201c] = 0x0022;
$halfwidth_table[0x201d] = 0x0022;
$halfwidth_table[0x309b] = 0xff9e;
$halfwidth_table[0x309c] = 0xff9f;
$fullwidth_table[0x309b] = 0x3099;
$fullwidth_table[0x309c] = 0x309a;
$data .= dump_binary_case_table( @halfwidth_table ) . dump_binary_case_table( @fullwidth_table );
# LCMAP_TRADITIONAL/SIMPLIFIED_CHINESE
$data .= dump_binary_case_table( @chinese_traditional_table ) . dump_binary_case_table( @chinese_simplified_table );
# FIXME: some more unknown tables here
return $data;
}
################################################################
# build the geoids table for locale.nls
sub build_geoids_data()
{
my $data = "";
my %index;
my $idx = 0;
my @geo_header = (0x00650067, 0x0000006f, 0, 4 * 7, scalar @geoids, 0, 0);
foreach my $geo (@geoids)
{
my $id = $geo->{id};
$geo = $geo->{alias} if defined $geo->{alias};
my $lat = "0.000";
my $long = "0.000";
my $iso2 = $geo->{iso2} || "XX";
my $iso3 = $geo->{iso3} || "XX";
my $isregion = $geo->{region} || (defined $geo->{uncode} && !defined $geo->{iso2});
my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
my $scurrency = $geo->{scurrency} || "\x{00a4}";
$data .= pack( "L<", $id );
$data .= pad_string( 24, encode( "UTF16LE", $lat ));
$data .= pad_string( 24, encode( "UTF16LE", $long ));
$data .= pack( "L<2", $isregion ? 14 : 16, $geo->{parentid} || 39070 );
$data .= pad_string( 8, encode( "UTF16LE", $iso2 ));
$data .= pad_string( 8, encode( "UTF16LE", $iso3 ));
$data .= pack( "S<2", $geo->{uncode} || 0, $geo->{dialcode} || 0 );
$data .= pad_string( 8, encode( "UTF16LE", $sintlsymbol ));
$data .= pad_string( 16, encode( "UTF16LE", $scurrency ));
$index{$geo->{name}} = $idx if $geo->{name};
$idx++;
}
$index{"XX"} = $index{"001"};
$geo_header[5] = $geo_header[3] + length $data;
$geo_header[6] = scalar keys %index;
foreach my $name (sort keys %index)
{
$data .= pad_string( 8, encode( "UTF16LE", $name ));
$data .= pack "L<", $index{$name};
}
$geo_header[2] = $geo_header[3] + length $data;
return pack( "L<7", @geo_header ) . $data;
}
################################################################
# build a binary locale table
sub dump_locales($$)
{
my ($filename, $chartypes) = @_;
printf "Building $filename\n";
my $locale_data = build_locale_data();
my $charmaps_data = build_charmaps_data();
my $geoids_data = build_geoids_data();
my $scripts_data = ""; # FIXME
my @header = ( 0 ) x 8;
$header[0] = 4 * scalar @header; # chartypes offset
$header[4] = $header[0] + length $chartypes; # locales offset
$header[5] = $header[4] + length $locale_data; # charmaps offset
$header[6] = $header[5] + length $charmaps_data; # geoids offset
$header[7] = $header[6] + length $geoids_data; # scripts offset
open OUTPUT, ">$filename.new" or die "Cannot create $filename";
print OUTPUT pack "L<*", @header;
print OUTPUT $chartypes, $locale_data, $charmaps_data, $geoids_data, $scripts_data;
close OUTPUT;
save_file($filename);
}
################################################################
# build the script to create registry keys
sub dump_registry_script($%)
{
my ($filename, %keys) = @_;
my $indent = 1;
printf "Building %s\n", $filename;
open OUTPUT, ">$filename.new" or die "Cannot create $filename";
print OUTPUT "HKLM\n{\n";
foreach my $k (split /\\/, "SYSTEM\\CurrentControlSet\\Control\\Nls")
{
printf OUTPUT "%*sNoRemove %s\n%*s{\n", 4 * $indent, "", $k, 4 * $indent, "";
$indent++;
}
foreach my $k (sort keys %keys)
{
my @subkeys = split /\\/, $k;
my ($def, @vals) = @{$keys{$k}};
for (my $i = 0; $i < @subkeys; $i++)
{
printf OUTPUT "%*s%s%s\n%*s{\n", 4 * $indent, "",
$subkeys[$i] =~ /\s/ ? "'$subkeys[$i]'" : $subkeys[$i],
$i == $#subkeys && $def ? " = s '$def'" : "", 4 * $indent, "";
$indent++;
}
foreach my $v (sort @vals) { printf OUTPUT "%*sval $v\n", 4 * $indent, ""; }
for (my $i = 0; $i < @subkeys; $i++) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; }
}
while ($indent) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; }
close OUTPUT;
save_file($filename);
}
################################################################
# save a file if modified
sub save_file($)
{
my $file = shift;
if (-f $file && !system "cmp $file $file.new >/dev/null")
{
unlink "$file.new";
}
else
{
rename "$file.new", "$file";
}
}
################################################################
# main routine
chdir ".." if -f "./make_unicode";
load_data();
dump_sortkeys( "dlls/kernelbase/collation.c" );
dump_bidi_dir_table( "dlls/gdi32/uniscribe/direction.c" );
dump_bidi_dir_table( "dlls/dwrite/direction.c" );
dump_mirroring( "dlls/gdi32/uniscribe/mirror.c" );
dump_mirroring( "dlls/dwrite/mirror.c" );
dump_bracket( "dlls/gdi32/uniscribe/bracket.c" );
dump_bracket( "dlls/dwrite/bracket.c" );
dump_shaping( "dlls/gdi32/uniscribe/shaping.c" );
dump_arabic_shaping( "dlls/dwrite/shapers/arabic_table.c" );
dump_linebreak( "dlls/gdi32/uniscribe/linebreak.c" );
dump_linebreak( "dlls/dwrite/linebreak.c" );
dump_scripts( "dlls/dwrite/scripts" );
dump_indic( "dlls/gdi32/uniscribe/indicsyllable.c" );
dump_vertical( "dlls/win32u/vertical.c", 1 );
dump_vertical( "dlls/wineps.drv/vertical.c", 0 );
dump_intl_nls("nls/l_intl.nls");
dump_norm_table( "nls/normnfc.nls" );
dump_norm_table( "nls/normnfd.nls" );
dump_norm_table( "nls/normnfkc.nls" );
dump_norm_table( "nls/normnfkd.nls" );
dump_norm_table( "nls/normidna.nls" );
my $chartypes = dump_sortkey_table( "nls/sortdefault.nls", "Windows 10 Sorting Weight Table.txt" );
dump_locales( "nls/locale.nls", $chartypes );
foreach my $file (@allfiles) { dump_msdata_codepage( $file ); }
dump_eucjp_codepage();
dump_registry_script( "dlls/kernelbase/kernelbase.rgs", %registry_keys );
exit 0;
# Local Variables:
# compile-command: "./make_unicode"
# End: