unicode: Generate compositions on the fly based on the decomposition table.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2020-02-17 11:48:30 +01:00
parent fdc89e02e6
commit ce41b32117

View file

@ -405,7 +405,6 @@ my @category_table = ();
my @joining_table = (); my @joining_table = ();
my @direction_table = (); my @direction_table = ();
my @decomp_table = (); my @decomp_table = ();
my @compose_table = ();
my @combining_class_table = (); my @combining_class_table = ();
my @decomp_compat_table = (); my @decomp_compat_table = ();
my $default_char; my $default_char;
@ -419,12 +418,22 @@ my %joining_forms =
"medial" => [] "medial" => []
); );
sub get_utf16($) sub to_utf16(@)
{ {
my $ch = shift; my @ret;
return $ch if ($ch < 0x10000); foreach my $ch (@_)
$ch -= 0x10000; {
return ( 0xd800 | ($ch >> 10), 0xdc00 | ($ch & 0x3ff) ); if ($ch < 0x10000)
{
push @ret, $ch;
}
else
{
my $val = $ch - 0x10000;
push @ret, 0xd800 | ($val >> 10), 0xdc00 | ($val & 0x3ff);
}
}
return @ret;
} }
################################################################ ################################################################
@ -465,14 +474,25 @@ sub get_decomposition($$)
my ($char, $table) = @_; my ($char, $table) = @_;
my @ret; my @ret;
return get_utf16($char) unless defined ${$table}[$char]; return $char unless defined $table->[$char];
foreach my $ch (@{${$table}[$char]}) foreach my $ch (@{$table->[$char]})
{ {
push @ret, get_decomposition( $ch, $table ); push @ret, get_decomposition( $ch, $table );
} }
return @ret; return @ret;
} }
################################################################
# get the composition that results in a given character
sub get_composition($$)
{
my ($ch, $compat) = @_;
return () unless defined $decomp_table[$ch]; # no decomposition
my @ret = @{$decomp_table[$ch]};
return () if @ret < 2; # singleton decomposition
return @ret;
}
################################################################ ################################################################
# recursively build decompositions # recursively build decompositions
sub build_decompositions(@) sub build_decompositions(@)
@ -483,7 +503,7 @@ sub build_decompositions(@)
for (my $i = 0; $i < @src; $i++) for (my $i = 0; $i < @src; $i++)
{ {
next unless defined $src[$i]; next unless defined $src[$i];
my @decomp = get_decomposition( $i, \@src ); my @decomp = to_utf16( get_decomposition( $i, \@src ));
$dst[$i] = \@decomp; $dst[$i] = \@decomp;
} }
return @dst; return @dst;
@ -588,7 +608,6 @@ sub load_data()
if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/) if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
{ {
$decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ]; $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ];
push @compose_table, [ hex $1, hex $2, $src ];
} }
elsif ($decomp =~ /^([0-9a-fA-F]+)$/) elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
{ {
@ -2010,21 +2029,13 @@ sub dump_compose_table($)
print OUTPUT "/* DO NOT EDIT!! */\n\n"; print OUTPUT "/* DO NOT EDIT!! */\n\n";
print OUTPUT "#include \"windef.h\"\n\n"; print OUTPUT "#include \"windef.h\"\n\n";
my @filled = (); my @filled;
foreach my $i (@compose_table)
{
my @comp = @$i;
push @{$filled[$comp[1]]}, [ $comp[0], $comp[2] ];
}
# count how many different second chars we have
my $count = 0;
for (my $i = 0; $i <= $MAX_CHAR; $i++) for (my $i = 0; $i <= $MAX_CHAR; $i++)
{ {
next unless defined $filled[$i]; my @comp = get_composition( $i, 0 );
$count++; push @{$filled[$comp[1]]}, [ $comp[0], $i ] if @comp;
} }
my $count = scalar grep defined, @filled;
# build the table of second chars and offsets # build the table of second chars and offsets
@ -2047,10 +2058,9 @@ sub dump_compose_table($)
{ {
next unless defined $filled[$i]; next unless defined $filled[$i];
my @table = (); my @table = ();
my @list = sort { $a->[0] <=> $b->[0] } @{$filled[$i]}; foreach my $map (sort { $a->[0] <=> $b->[0] } @{$filled[$i]})
for (my $j = 0; $j <= $#list; $j++)
{ {
push @table, $list[$j][0], $list[$j][1]; push @table, $map->[0], $map->[1];
} }
printf OUTPUT ",\n /* 0x%04x */\n%s", $i, dump_array( 20, 0, @table ); printf OUTPUT ",\n /* 0x%04x */\n%s", $i, dump_array( 20, 0, @table );
} }