unicode/utf16: add AppendRune

AppendRune appends the UTF-16 encoding of a rune to a []uint16.

    BenchmarkEncodeValidASCII-12                24.61ns 16B 1allocs
    BenchmarkEncodeValidJapaneseChars-12        18.79ns 8B  1allocs
    BenchmarkAppendRuneValidASCII-12            6.826ns 0B  0allocs
    BenchmarkAppendRuneValidJapaneseChars-12    3.547ns 0B  0allocs

The ASCII case is written to be inlineable.

Fixes #51896

Change-Id: I593b1029f603297ef6e80e036f2fee2a0938d38d
Reviewed-on: https://go-review.googlesource.com/c/go/+/409054
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Joedian Reid <joedian@golang.org>
This commit is contained in:
qmuntal 2022-05-27 15:44:55 +02:00 committed by Gopher Robot
parent f65ded5626
commit 48297f1fb1
3 changed files with 52 additions and 0 deletions

1
api/next/51896.txt Normal file
View file

@ -0,0 +1 @@
pkg unicode/utf16, func AppendRune([]uint16, int32) []uint16 #51896

View file

@ -83,6 +83,23 @@ func Encode(s []rune) []uint16 {
return a[:n]
}
// AppendRune appends the UTF-16 encoding of the Unicode code point r
// to the end of p and returns the extended buffer. If the rune is not
// a valid Unicode code point, it appends the encoding of U+FFFD.
func AppendRune(a []uint16, r rune) []uint16 {
// This function is inlineable for fast handling of ASCII.
switch {
case 0 <= r && r < surr1, surr3 <= r && r < surrSelf:
// normal rune
return append(a, uint16(r))
case surrSelf <= r && r <= maxRune:
// needs surrogate sequence
r1, r2 := EncodeRune(r)
return append(a, uint16(r1), uint16(r2))
}
return append(a, replacementChar)
}
// Decode returns the Unicode code point sequence represented
// by the UTF-16 encoding s.
func Decode(s []uint16) []rune {

View file

@ -43,6 +43,18 @@ func TestEncode(t *testing.T) {
}
}
func TestAppendRune(t *testing.T) {
for _, tt := range encodeTests {
var out []uint16
for _, u := range tt.in {
out = AppendRune(out, u)
}
if !reflect.DeepEqual(out, tt.out) {
t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out)
}
}
}
func TestEncodeRune(t *testing.T) {
for i, tt := range encodeTests {
j := 0
@ -193,6 +205,28 @@ func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
}
}
func BenchmarkAppendRuneValidASCII(b *testing.B) {
data := []rune{'h', 'e', 'l', 'l', 'o'}
a := make([]uint16, 0, len(data)*2)
for i := 0; i < b.N; i++ {
for _, u := range data {
a = AppendRune(a, u)
}
a = a[:0]
}
}
func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) {
data := []rune{'日', '本', '語'}
a := make([]uint16, 0, len(data)*2)
for i := 0; i < b.N; i++ {
for _, u := range data {
a = AppendRune(a, u)
}
a = a[:0]
}
}
func BenchmarkEncodeRune(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {