github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/unicode/utf16/utf16_test.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package utf16_test 6 7 import ( 8 "reflect" 9 "testing" 10 "unicode" 11 . "unicode/utf16" 12 ) 13 14 // Validate the constants redefined from unicode. 15 func TestConstants(t *testing.T) { 16 if MaxRune != unicode.MaxRune { 17 t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune) 18 } 19 if ReplacementChar != unicode.ReplacementChar { 20 t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar) 21 } 22 } 23 24 type encodeTest struct { 25 in []rune 26 out []uint16 27 } 28 29 var encodeTests = []encodeTest{ 30 {[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}}, 31 {[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}, 32 []uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}}, 33 {[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1}, 34 []uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}}, 35 } 36 37 func TestEncode(t *testing.T) { 38 for _, tt := range encodeTests { 39 out := Encode(tt.in) 40 if !reflect.DeepEqual(out, tt.out) { 41 t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out) 42 } 43 } 44 } 45 46 func TestAppendRune(t *testing.T) { 47 for _, tt := range encodeTests { 48 var out []uint16 49 for _, u := range tt.in { 50 out = AppendRune(out, u) 51 } 52 if !reflect.DeepEqual(out, tt.out) { 53 t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out) 54 } 55 } 56 } 57 58 func TestEncodeRune(t *testing.T) { 59 for i, tt := range encodeTests { 60 j := 0 61 for _, r := range tt.in { 62 r1, r2 := EncodeRune(r) 63 if r < 0x10000 || r > unicode.MaxRune { 64 if j >= len(tt.out) { 65 t.Errorf("#%d: ran out of tt.out", i) 66 break 67 } 68 if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar { 69 t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2) 70 } 71 j++ 72 } else { 73 if j+1 >= len(tt.out) { 74 t.Errorf("#%d: ran out of tt.out", i) 75 break 76 } 77 if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) { 78 t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1]) 79 } 80 j += 2 81 dec := DecodeRune(r1, r2) 82 if dec != r { 83 t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r) 84 } 85 } 86 } 87 if j != len(tt.out) { 88 t.Errorf("#%d: EncodeRune didn't generate enough output", i) 89 } 90 } 91 } 92 93 type decodeTest struct { 94 in []uint16 95 out []rune 96 } 97 98 var decodeTests = []decodeTest{ 99 {[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}}, 100 {[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}, 101 []rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}}, 102 {[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}}, 103 {[]uint16{0xdfff}, []rune{0xfffd}}, 104 } 105 106 func TestDecode(t *testing.T) { 107 for _, tt := range decodeTests { 108 out := Decode(tt.in) 109 if !reflect.DeepEqual(out, tt.out) { 110 t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out) 111 } 112 } 113 } 114 115 var decodeRuneTests = []struct { 116 r1, r2 rune 117 want rune 118 }{ 119 {0xd800, 0xdc00, 0x10000}, 120 {0xd800, 0xdc01, 0x10001}, 121 {0xd808, 0xdf45, 0x12345}, 122 {0xdbff, 0xdfff, 0x10ffff}, 123 {0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted 124 } 125 126 func TestDecodeRune(t *testing.T) { 127 for i, tt := range decodeRuneTests { 128 got := DecodeRune(tt.r1, tt.r2) 129 if got != tt.want { 130 t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want) 131 } 132 } 133 } 134 135 var surrogateTests = []struct { 136 r rune 137 want bool 138 }{ 139 // from https://en.wikipedia.org/wiki/UTF-16 140 {'\u007A', false}, // LATIN SMALL LETTER Z 141 {'\u6C34', false}, // CJK UNIFIED IDEOGRAPH-6C34 (water) 142 {'\uFEFF', false}, // Byte Order Mark 143 {'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point) 144 {'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF 145 {'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point) 146 147 {rune(0xd7ff), false}, // surr1-1 148 {rune(0xd800), true}, // surr1 149 {rune(0xdc00), true}, // surr2 150 {rune(0xe000), false}, // surr3 151 {rune(0xdfff), true}, // surr3-1 152 } 153 154 func TestIsSurrogate(t *testing.T) { 155 for i, tt := range surrogateTests { 156 got := IsSurrogate(tt.r) 157 if got != tt.want { 158 t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want) 159 } 160 } 161 } 162 163 func BenchmarkDecodeValidASCII(b *testing.B) { 164 // "hello world" 165 data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100} 166 for i := 0; i < b.N; i++ { 167 Decode(data) 168 } 169 } 170 171 func BenchmarkDecodeValidJapaneseChars(b *testing.B) { 172 // "日本語日本語日本語" 173 data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486} 174 for i := 0; i < b.N; i++ { 175 Decode(data) 176 } 177 } 178 179 func BenchmarkDecodeRune(b *testing.B) { 180 rs := make([]rune, 10) 181 // U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS 182 for i, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} { 183 rs[2*i], rs[2*i+1] = EncodeRune(u) 184 } 185 186 b.ResetTimer() 187 for i := 0; i < b.N; i++ { 188 for j := 0; j < 5; j++ { 189 DecodeRune(rs[2*j], rs[2*j+1]) 190 } 191 } 192 } 193 194 func BenchmarkEncodeValidASCII(b *testing.B) { 195 data := []rune{'h', 'e', 'l', 'l', 'o'} 196 for i := 0; i < b.N; i++ { 197 Encode(data) 198 } 199 } 200 201 func BenchmarkEncodeValidJapaneseChars(b *testing.B) { 202 data := []rune{'日', '本', '語'} 203 for i := 0; i < b.N; i++ { 204 Encode(data) 205 } 206 } 207 208 func BenchmarkAppendRuneValidASCII(b *testing.B) { 209 data := []rune{'h', 'e', 'l', 'l', 'o'} 210 a := make([]uint16, 0, len(data)*2) 211 for i := 0; i < b.N; i++ { 212 for _, u := range data { 213 a = AppendRune(a, u) 214 } 215 a = a[:0] 216 } 217 } 218 219 func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) { 220 data := []rune{'日', '本', '語'} 221 a := make([]uint16, 0, len(data)*2) 222 for i := 0; i < b.N; i++ { 223 for _, u := range data { 224 a = AppendRune(a, u) 225 } 226 a = a[:0] 227 } 228 } 229 230 func BenchmarkEncodeRune(b *testing.B) { 231 for i := 0; i < b.N; i++ { 232 for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} { 233 EncodeRune(u) 234 } 235 } 236 }