From 2b9d720ea018c746907b0a7f0543ec9718352109 Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Mon, 17 Feb 2020 11:54:07 +0100 Subject: [PATCH] unicode: Take into account explicit composition exclusions. Signed-off-by: Alexandre Julliard --- dlls/kernelbase/compose.c | 96 ++++++++------------------------------- dlls/ntdll/compose.c | 96 ++++++++------------------------------- tools/make_unicode | 19 ++++++++ 3 files changed, 59 insertions(+), 152 deletions(-) diff --git a/dlls/kernelbase/compose.c b/dlls/kernelbase/compose.c index 5ee1150c850..618a5d6bdad 100644 --- a/dlls/kernelbase/compose.c +++ b/dlls/kernelbase/compose.c @@ -4,30 +4,25 @@ #include "windef.h" -static const unsigned int table[2208] = +static const unsigned int table[2006] = { /* second chars + offsets */ - 0x00300, 0x00053, 0x00301, 0x000a7, 0x00302, 0x0011c, 0x00303, 0x0013c, - 0x00304, 0x00158, 0x00306, 0x00184, 0x00307, 0x001a4, 0x00308, 0x001d2, - 0x00309, 0x00208, 0x0030a, 0x00220, 0x0030b, 0x00226, 0x0030c, 0x0022c, - 0x0030f, 0x00251, 0x00311, 0x0025f, 0x00313, 0x0026b, 0x00314, 0x00279, - 0x0031b, 0x00289, 0x00323, 0x0028d, 0x00324, 0x002b7, 0x00325, 0x002b9, - 0x00326, 0x002bb, 0x00327, 0x002bf, 0x00328, 0x002d5, 0x0032d, 0x002df, - 0x0032e, 0x002eb, 0x00330, 0x002ed, 0x00331, 0x002f3, 0x00338, 0x00304, - 0x00342, 0x00331, 0x00345, 0x0034e, 0x005b4, 0x0038d, 0x005b7, 0x0038e, - 0x005b8, 0x00390, 0x005b9, 0x00391, 0x005bc, 0x00392, 0x005bf, 0x003a8, - 0x005c1, 0x003ab, 0x005c2, 0x003ad, 0x00653, 0x003af, 0x00654, 0x003b0, - 0x00655, 0x003b6, 0x0093c, 0x003b7, 0x009bc, 0x003c2, 0x009be, 0x003c5, - 0x009d7, 0x003c6, 0x00a3c, 0x003c7, 0x00b3c, 0x003cd, 0x00b3e, 0x003cf, - 0x00b56, 0x003d0, 0x00b57, 0x003d1, 0x00bbe, 0x003d2, 0x00bd7, 0x003d4, - 0x00c56, 0x003d6, 0x00cc2, 0x003d7, 0x00cd5, 0x003d8, 0x00cd6, 0x003db, - 0x00d3e, 0x003dc, 0x00d57, 0x003de, 0x00dca, 0x003df, 0x00dcf, 0x003e1, - 0x00ddf, 0x003e2, 0x00f80, 0x003e3, 0x00fb5, 0x003e5, 0x00fb7, 0x003e7, - 0x0102e, 0x003f1, 0x01b35, 0x003f2, 0x03099, 0x003fd, 0x0309a, 0x0042d, - 0x110ba, 0x00437, 0x11127, 0x0043a, 0x1133e, 0x0043c, 0x11357, 0x0043d, - 0x114b0, 0x0043e, 0x114ba, 0x0043f, 0x114bd, 0x00440, 0x115af, 0x00441, - 0x1d165, 0x00443, 0x1d16e, 0x00447, 0x1d16f, 0x0044a, 0x1d170, 0x0044d, - 0x1d171, 0x0044e, 0x1d172, 0x0044f, 0x00000, 0x00450, + 0x00300, 0x0003f, 0x00301, 0x00093, 0x00302, 0x00108, 0x00303, 0x00128, + 0x00304, 0x00144, 0x00306, 0x00170, 0x00307, 0x00190, 0x00308, 0x001be, + 0x00309, 0x001f4, 0x0030a, 0x0020c, 0x0030b, 0x00212, 0x0030c, 0x00218, + 0x0030f, 0x0023d, 0x00311, 0x0024b, 0x00313, 0x00257, 0x00314, 0x00265, + 0x0031b, 0x00275, 0x00323, 0x00279, 0x00324, 0x002a3, 0x00325, 0x002a5, + 0x00326, 0x002a7, 0x00327, 0x002ab, 0x00328, 0x002c1, 0x0032d, 0x002cb, + 0x0032e, 0x002d7, 0x00330, 0x002d9, 0x00331, 0x002df, 0x00338, 0x002f0, + 0x00342, 0x0031c, 0x00345, 0x00339, 0x00653, 0x00378, 0x00654, 0x00379, + 0x00655, 0x0037f, 0x0093c, 0x00380, 0x009be, 0x00383, 0x009d7, 0x00384, + 0x00b3e, 0x00385, 0x00b56, 0x00386, 0x00b57, 0x00387, 0x00bbe, 0x00388, + 0x00bd7, 0x0038a, 0x00c56, 0x0038c, 0x00cc2, 0x0038d, 0x00cd5, 0x0038e, + 0x00cd6, 0x00391, 0x00d3e, 0x00392, 0x00d57, 0x00394, 0x00dca, 0x00395, + 0x00dcf, 0x00397, 0x00ddf, 0x00398, 0x0102e, 0x00399, 0x01b35, 0x0039a, + 0x03099, 0x003a5, 0x0309a, 0x003d5, 0x110ba, 0x003df, 0x11127, 0x003e2, + 0x1133e, 0x003e4, 0x11357, 0x003e5, 0x114b0, 0x003e6, 0x114ba, 0x003e7, + 0x114bd, 0x003e8, 0x115af, 0x003e9, 0x00000, 0x003eb, /* 0x0300 */ 0x00041, 0x000c0, 0x00045, 0x000c8, 0x00049, 0x000cc, 0x0004e, 0x001f8, 0x0004f, 0x000d2, 0x00055, 0x000d9, 0x00057, 0x01e80, 0x00059, 0x01ef2, @@ -248,7 +243,6 @@ static const unsigned int table[2208] = 0x02286, 0x02288, 0x02287, 0x02289, 0x02291, 0x022e2, 0x02292, 0x022e3, 0x022a2, 0x022ac, 0x022a8, 0x022ad, 0x022a9, 0x022ae, 0x022ab, 0x022af, 0x022b2, 0x022ea, 0x022b3, 0x022eb, 0x022b4, 0x022ec, 0x022b5, 0x022ed, - 0x02add, 0x02adc, /* 0x0342 */ 0x000a8, 0x01fc1, 0x003b1, 0x01fb6, 0x003b7, 0x01fc6, 0x003b9, 0x01fd6, 0x003c5, 0x01fe6, 0x003c9, 0x01ff6, 0x003ca, 0x01fd7, 0x003cb, 0x01fe7, @@ -275,27 +269,6 @@ static const unsigned int table[2208] = 0x01f6b, 0x01fab, 0x01f6c, 0x01fac, 0x01f6d, 0x01fad, 0x01f6e, 0x01fae, 0x01f6f, 0x01faf, 0x01f70, 0x01fb2, 0x01f74, 0x01fc2, 0x01f7c, 0x01ff2, 0x01fb6, 0x01fb7, 0x01fc6, 0x01fc7, 0x01ff6, 0x01ff7, - /* 0x05b4 */ - 0x005d9, 0x0fb1d, - /* 0x05b7 */ - 0x005d0, 0x0fb2e, 0x005f2, 0x0fb1f, - /* 0x05b8 */ - 0x005d0, 0x0fb2f, - /* 0x05b9 */ - 0x005d5, 0x0fb4b, - /* 0x05bc */ - 0x005d0, 0x0fb30, 0x005d1, 0x0fb31, 0x005d2, 0x0fb32, 0x005d3, 0x0fb33, - 0x005d4, 0x0fb34, 0x005d5, 0x0fb35, 0x005d6, 0x0fb36, 0x005d8, 0x0fb38, - 0x005d9, 0x0fb39, 0x005da, 0x0fb3a, 0x005db, 0x0fb3b, 0x005dc, 0x0fb3c, - 0x005de, 0x0fb3e, 0x005e0, 0x0fb40, 0x005e1, 0x0fb41, 0x005e3, 0x0fb43, - 0x005e4, 0x0fb44, 0x005e6, 0x0fb46, 0x005e7, 0x0fb47, 0x005e8, 0x0fb48, - 0x005e9, 0x0fb49, 0x005ea, 0x0fb4a, - /* 0x05bf */ - 0x005d1, 0x0fb4c, 0x005db, 0x0fb4d, 0x005e4, 0x0fb4e, - /* 0x05c1 */ - 0x005e9, 0x0fb2a, 0x0fb49, 0x0fb2c, - /* 0x05c2 */ - 0x005e9, 0x0fb2b, 0x0fb49, 0x0fb2d, /* 0x0653 */ 0x00627, 0x00622, /* 0x0654 */ @@ -304,20 +277,11 @@ static const unsigned int table[2208] = /* 0x0655 */ 0x00627, 0x00625, /* 0x093c */ - 0x00915, 0x00958, 0x00916, 0x00959, 0x00917, 0x0095a, 0x0091c, 0x0095b, - 0x00921, 0x0095c, 0x00922, 0x0095d, 0x00928, 0x00929, 0x0092b, 0x0095e, - 0x0092f, 0x0095f, 0x00930, 0x00931, 0x00933, 0x00934, - /* 0x09bc */ - 0x009a1, 0x009dc, 0x009a2, 0x009dd, 0x009af, 0x009df, + 0x00928, 0x00929, 0x00930, 0x00931, 0x00933, 0x00934, /* 0x09be */ 0x009c7, 0x009cb, /* 0x09d7 */ 0x009c7, 0x009cc, - /* 0x0a3c */ - 0x00a16, 0x00a59, 0x00a17, 0x00a5a, 0x00a1c, 0x00a5b, 0x00a2b, 0x00a5e, - 0x00a32, 0x00a33, 0x00a38, 0x00a36, - /* 0x0b3c */ - 0x00b21, 0x00b5c, 0x00b22, 0x00b5d, /* 0x0b3e */ 0x00b47, 0x00b4b, /* 0x0b56 */ @@ -346,14 +310,6 @@ static const unsigned int table[2208] = 0x00dd9, 0x00ddc, /* 0x0ddf */ 0x00dd9, 0x00dde, - /* 0x0f80 */ - 0x00fb2, 0x00f76, 0x00fb3, 0x00f78, - /* 0x0fb5 */ - 0x00f40, 0x00f69, 0x00f90, 0x00fb9, - /* 0x0fb7 */ - 0x00f42, 0x00f43, 0x00f4c, 0x00f4d, 0x00f51, 0x00f52, 0x00f56, 0x00f57, - 0x00f5b, 0x00f5c, 0x00f92, 0x00f93, 0x00f9c, 0x00f9d, 0x00fa1, 0x00fa2, - 0x00fa6, 0x00fa7, 0x00fab, 0x00fac, /* 0x102e */ 0x01025, 0x01026, /* 0x1b35 */ @@ -392,19 +348,7 @@ static const unsigned int table[2208] = /* 0x114bd */ 0x114b9, 0x114be, /* 0x115af */ - 0x115b8, 0x115ba, 0x115b9, 0x115bb, - /* 0x1d165 */ - 0x1d157, 0x1d15e, 0x1d158, 0x1d15f, 0x1d1b9, 0x1d1bb, 0x1d1ba, 0x1d1bc, - /* 0x1d16e */ - 0x1d15f, 0x1d160, 0x1d1bb, 0x1d1bd, 0x1d1bc, 0x1d1be, - /* 0x1d16f */ - 0x1d15f, 0x1d161, 0x1d1bb, 0x1d1bf, 0x1d1bc, 0x1d1c0, - /* 0x1d170 */ - 0x1d15f, 0x1d162, - /* 0x1d171 */ - 0x1d15f, 0x1d163, - /* 0x1d172 */ - 0x1d15f, 0x1d164 + 0x115b8, 0x115ba, 0x115b9, 0x115bb }; static inline int binary_search( unsigned int ch, int low, int high ) @@ -423,7 +367,7 @@ unsigned int DECLSPEC_HIDDEN wine_compose( unsigned int ch1, unsigned int ch2 ) { int pos; - if ((pos = binary_search( ch2, 0, 82 - 1 )) == -1) return 0; + if ((pos = binary_search( ch2, 0, 62 - 1 )) == -1) return 0; if ((pos = binary_search( ch1, table[2 * pos + 1], table[2 * pos + 3] - 1 )) == -1) return 0; return table[2 * pos + 1]; } diff --git a/dlls/ntdll/compose.c b/dlls/ntdll/compose.c index 5ee1150c850..618a5d6bdad 100644 --- a/dlls/ntdll/compose.c +++ b/dlls/ntdll/compose.c @@ -4,30 +4,25 @@ #include "windef.h" -static const unsigned int table[2208] = +static const unsigned int table[2006] = { /* second chars + offsets */ - 0x00300, 0x00053, 0x00301, 0x000a7, 0x00302, 0x0011c, 0x00303, 0x0013c, - 0x00304, 0x00158, 0x00306, 0x00184, 0x00307, 0x001a4, 0x00308, 0x001d2, - 0x00309, 0x00208, 0x0030a, 0x00220, 0x0030b, 0x00226, 0x0030c, 0x0022c, - 0x0030f, 0x00251, 0x00311, 0x0025f, 0x00313, 0x0026b, 0x00314, 0x00279, - 0x0031b, 0x00289, 0x00323, 0x0028d, 0x00324, 0x002b7, 0x00325, 0x002b9, - 0x00326, 0x002bb, 0x00327, 0x002bf, 0x00328, 0x002d5, 0x0032d, 0x002df, - 0x0032e, 0x002eb, 0x00330, 0x002ed, 0x00331, 0x002f3, 0x00338, 0x00304, - 0x00342, 0x00331, 0x00345, 0x0034e, 0x005b4, 0x0038d, 0x005b7, 0x0038e, - 0x005b8, 0x00390, 0x005b9, 0x00391, 0x005bc, 0x00392, 0x005bf, 0x003a8, - 0x005c1, 0x003ab, 0x005c2, 0x003ad, 0x00653, 0x003af, 0x00654, 0x003b0, - 0x00655, 0x003b6, 0x0093c, 0x003b7, 0x009bc, 0x003c2, 0x009be, 0x003c5, - 0x009d7, 0x003c6, 0x00a3c, 0x003c7, 0x00b3c, 0x003cd, 0x00b3e, 0x003cf, - 0x00b56, 0x003d0, 0x00b57, 0x003d1, 0x00bbe, 0x003d2, 0x00bd7, 0x003d4, - 0x00c56, 0x003d6, 0x00cc2, 0x003d7, 0x00cd5, 0x003d8, 0x00cd6, 0x003db, - 0x00d3e, 0x003dc, 0x00d57, 0x003de, 0x00dca, 0x003df, 0x00dcf, 0x003e1, - 0x00ddf, 0x003e2, 0x00f80, 0x003e3, 0x00fb5, 0x003e5, 0x00fb7, 0x003e7, - 0x0102e, 0x003f1, 0x01b35, 0x003f2, 0x03099, 0x003fd, 0x0309a, 0x0042d, - 0x110ba, 0x00437, 0x11127, 0x0043a, 0x1133e, 0x0043c, 0x11357, 0x0043d, - 0x114b0, 0x0043e, 0x114ba, 0x0043f, 0x114bd, 0x00440, 0x115af, 0x00441, - 0x1d165, 0x00443, 0x1d16e, 0x00447, 0x1d16f, 0x0044a, 0x1d170, 0x0044d, - 0x1d171, 0x0044e, 0x1d172, 0x0044f, 0x00000, 0x00450, + 0x00300, 0x0003f, 0x00301, 0x00093, 0x00302, 0x00108, 0x00303, 0x00128, + 0x00304, 0x00144, 0x00306, 0x00170, 0x00307, 0x00190, 0x00308, 0x001be, + 0x00309, 0x001f4, 0x0030a, 0x0020c, 0x0030b, 0x00212, 0x0030c, 0x00218, + 0x0030f, 0x0023d, 0x00311, 0x0024b, 0x00313, 0x00257, 0x00314, 0x00265, + 0x0031b, 0x00275, 0x00323, 0x00279, 0x00324, 0x002a3, 0x00325, 0x002a5, + 0x00326, 0x002a7, 0x00327, 0x002ab, 0x00328, 0x002c1, 0x0032d, 0x002cb, + 0x0032e, 0x002d7, 0x00330, 0x002d9, 0x00331, 0x002df, 0x00338, 0x002f0, + 0x00342, 0x0031c, 0x00345, 0x00339, 0x00653, 0x00378, 0x00654, 0x00379, + 0x00655, 0x0037f, 0x0093c, 0x00380, 0x009be, 0x00383, 0x009d7, 0x00384, + 0x00b3e, 0x00385, 0x00b56, 0x00386, 0x00b57, 0x00387, 0x00bbe, 0x00388, + 0x00bd7, 0x0038a, 0x00c56, 0x0038c, 0x00cc2, 0x0038d, 0x00cd5, 0x0038e, + 0x00cd6, 0x00391, 0x00d3e, 0x00392, 0x00d57, 0x00394, 0x00dca, 0x00395, + 0x00dcf, 0x00397, 0x00ddf, 0x00398, 0x0102e, 0x00399, 0x01b35, 0x0039a, + 0x03099, 0x003a5, 0x0309a, 0x003d5, 0x110ba, 0x003df, 0x11127, 0x003e2, + 0x1133e, 0x003e4, 0x11357, 0x003e5, 0x114b0, 0x003e6, 0x114ba, 0x003e7, + 0x114bd, 0x003e8, 0x115af, 0x003e9, 0x00000, 0x003eb, /* 0x0300 */ 0x00041, 0x000c0, 0x00045, 0x000c8, 0x00049, 0x000cc, 0x0004e, 0x001f8, 0x0004f, 0x000d2, 0x00055, 0x000d9, 0x00057, 0x01e80, 0x00059, 0x01ef2, @@ -248,7 +243,6 @@ static const unsigned int table[2208] = 0x02286, 0x02288, 0x02287, 0x02289, 0x02291, 0x022e2, 0x02292, 0x022e3, 0x022a2, 0x022ac, 0x022a8, 0x022ad, 0x022a9, 0x022ae, 0x022ab, 0x022af, 0x022b2, 0x022ea, 0x022b3, 0x022eb, 0x022b4, 0x022ec, 0x022b5, 0x022ed, - 0x02add, 0x02adc, /* 0x0342 */ 0x000a8, 0x01fc1, 0x003b1, 0x01fb6, 0x003b7, 0x01fc6, 0x003b9, 0x01fd6, 0x003c5, 0x01fe6, 0x003c9, 0x01ff6, 0x003ca, 0x01fd7, 0x003cb, 0x01fe7, @@ -275,27 +269,6 @@ static const unsigned int table[2208] = 0x01f6b, 0x01fab, 0x01f6c, 0x01fac, 0x01f6d, 0x01fad, 0x01f6e, 0x01fae, 0x01f6f, 0x01faf, 0x01f70, 0x01fb2, 0x01f74, 0x01fc2, 0x01f7c, 0x01ff2, 0x01fb6, 0x01fb7, 0x01fc6, 0x01fc7, 0x01ff6, 0x01ff7, - /* 0x05b4 */ - 0x005d9, 0x0fb1d, - /* 0x05b7 */ - 0x005d0, 0x0fb2e, 0x005f2, 0x0fb1f, - /* 0x05b8 */ - 0x005d0, 0x0fb2f, - /* 0x05b9 */ - 0x005d5, 0x0fb4b, - /* 0x05bc */ - 0x005d0, 0x0fb30, 0x005d1, 0x0fb31, 0x005d2, 0x0fb32, 0x005d3, 0x0fb33, - 0x005d4, 0x0fb34, 0x005d5, 0x0fb35, 0x005d6, 0x0fb36, 0x005d8, 0x0fb38, - 0x005d9, 0x0fb39, 0x005da, 0x0fb3a, 0x005db, 0x0fb3b, 0x005dc, 0x0fb3c, - 0x005de, 0x0fb3e, 0x005e0, 0x0fb40, 0x005e1, 0x0fb41, 0x005e3, 0x0fb43, - 0x005e4, 0x0fb44, 0x005e6, 0x0fb46, 0x005e7, 0x0fb47, 0x005e8, 0x0fb48, - 0x005e9, 0x0fb49, 0x005ea, 0x0fb4a, - /* 0x05bf */ - 0x005d1, 0x0fb4c, 0x005db, 0x0fb4d, 0x005e4, 0x0fb4e, - /* 0x05c1 */ - 0x005e9, 0x0fb2a, 0x0fb49, 0x0fb2c, - /* 0x05c2 */ - 0x005e9, 0x0fb2b, 0x0fb49, 0x0fb2d, /* 0x0653 */ 0x00627, 0x00622, /* 0x0654 */ @@ -304,20 +277,11 @@ static const unsigned int table[2208] = /* 0x0655 */ 0x00627, 0x00625, /* 0x093c */ - 0x00915, 0x00958, 0x00916, 0x00959, 0x00917, 0x0095a, 0x0091c, 0x0095b, - 0x00921, 0x0095c, 0x00922, 0x0095d, 0x00928, 0x00929, 0x0092b, 0x0095e, - 0x0092f, 0x0095f, 0x00930, 0x00931, 0x00933, 0x00934, - /* 0x09bc */ - 0x009a1, 0x009dc, 0x009a2, 0x009dd, 0x009af, 0x009df, + 0x00928, 0x00929, 0x00930, 0x00931, 0x00933, 0x00934, /* 0x09be */ 0x009c7, 0x009cb, /* 0x09d7 */ 0x009c7, 0x009cc, - /* 0x0a3c */ - 0x00a16, 0x00a59, 0x00a17, 0x00a5a, 0x00a1c, 0x00a5b, 0x00a2b, 0x00a5e, - 0x00a32, 0x00a33, 0x00a38, 0x00a36, - /* 0x0b3c */ - 0x00b21, 0x00b5c, 0x00b22, 0x00b5d, /* 0x0b3e */ 0x00b47, 0x00b4b, /* 0x0b56 */ @@ -346,14 +310,6 @@ static const unsigned int table[2208] = 0x00dd9, 0x00ddc, /* 0x0ddf */ 0x00dd9, 0x00dde, - /* 0x0f80 */ - 0x00fb2, 0x00f76, 0x00fb3, 0x00f78, - /* 0x0fb5 */ - 0x00f40, 0x00f69, 0x00f90, 0x00fb9, - /* 0x0fb7 */ - 0x00f42, 0x00f43, 0x00f4c, 0x00f4d, 0x00f51, 0x00f52, 0x00f56, 0x00f57, - 0x00f5b, 0x00f5c, 0x00f92, 0x00f93, 0x00f9c, 0x00f9d, 0x00fa1, 0x00fa2, - 0x00fa6, 0x00fa7, 0x00fab, 0x00fac, /* 0x102e */ 0x01025, 0x01026, /* 0x1b35 */ @@ -392,19 +348,7 @@ static const unsigned int table[2208] = /* 0x114bd */ 0x114b9, 0x114be, /* 0x115af */ - 0x115b8, 0x115ba, 0x115b9, 0x115bb, - /* 0x1d165 */ - 0x1d157, 0x1d15e, 0x1d158, 0x1d15f, 0x1d1b9, 0x1d1bb, 0x1d1ba, 0x1d1bc, - /* 0x1d16e */ - 0x1d15f, 0x1d160, 0x1d1bb, 0x1d1bd, 0x1d1bc, 0x1d1be, - /* 0x1d16f */ - 0x1d15f, 0x1d161, 0x1d1bb, 0x1d1bf, 0x1d1bc, 0x1d1c0, - /* 0x1d170 */ - 0x1d15f, 0x1d162, - /* 0x1d171 */ - 0x1d15f, 0x1d163, - /* 0x1d172 */ - 0x1d15f, 0x1d164 + 0x115b8, 0x115ba, 0x115b9, 0x115bb }; static inline int binary_search( unsigned int ch, int low, int high ) @@ -423,7 +367,7 @@ unsigned int DECLSPEC_HIDDEN wine_compose( unsigned int ch1, unsigned int ch2 ) { int pos; - if ((pos = binary_search( ch2, 0, 82 - 1 )) == -1) return 0; + if ((pos = binary_search( ch2, 0, 62 - 1 )) == -1) return 0; if ((pos = binary_search( ch1, table[2 * pos + 1], table[2 * pos + 3] - 1 )) == -1) return 0; return table[2 * pos + 1]; } diff --git a/tools/make_unicode b/tools/make_unicode index 2263a966a82..b922b6a7085 100755 --- a/tools/make_unicode +++ b/tools/make_unicode @@ -407,6 +407,7 @@ my @direction_table = (); my @decomp_table = (); my @combining_class_table = (); my @decomp_compat_table = (); +my @comp_exclusions = (); my $default_char; my $default_wchar; @@ -490,6 +491,7 @@ sub get_composition($$) return () unless defined $decomp_table[$ch]; # no decomposition my @ret = @{$decomp_table[$ch]}; return () if @ret < 2; # singleton decomposition + return () if $comp_exclusions[$ch]; # composition exclusion return () if $combining_class_table[$ch]; # non-starter return () if $combining_class_table[$ret[0]]; # first char is non-starter return () if $compat && !defined $decomp_table[$ret[0]] && @@ -648,6 +650,23 @@ sub load_data() my $ch = $tolower_table[$i]; $tolower_table[$i] = undef unless defined $toupper_table[$ch] && $toupper_table[$ch] == $i; } + + # load the composition exclusions + + my $EXCL = open_data_file( $UNIDATA, "CompositionExclusions.txt" ); + while (<$EXCL>) + { + s/\#.*//; # remove comments + if (/^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s*$/) + { + foreach my $i (hex $1 .. hex $2) { $comp_exclusions[$i] = 1; } + } + elsif (/^([0-9a-fA-F]+)\s*$/) + { + $comp_exclusions[hex $1] = 1; + } + } + close $EXCL; }