github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/gnovm/stdlibs/unicode/utf16/utf16_test.gno (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package utf16 6 7 import ( 8 "testing" 9 "unicode" 10 "unicode/utf16" 11 ) 12 13 type encodeTest struct { 14 in []rune 15 out []uint16 16 } 17 18 var encodeTests = []encodeTest{ 19 {[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}}, 20 { 21 []rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}, 22 []uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}, 23 }, 24 { 25 []rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1}, 26 []uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}, 27 }, 28 } 29 30 func slicesEqual(a, b []uint16) bool { 31 if len(a) != len(b) { 32 return false 33 } 34 for i, v := range a { 35 if v != b[i] { 36 return false 37 } 38 } 39 return true 40 } 41 42 func TestEncode(t *testing.T) { 43 for _, tt := range encodeTests { 44 out := Encode(tt.in) 45 if !slicesEqual(out, tt.out) { 46 t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out) 47 } 48 } 49 } 50 51 func TestEncodeRune(t *testing.T) { 52 for i, tt := range encodeTests { 53 j := 0 54 for _, r := range tt.in { 55 r1, r2 := EncodeRune(r) 56 if r < 0x10000 || r > unicode.MaxRune { 57 if j >= len(tt.out) { 58 t.Errorf("#%d: ran out of tt.out", i) 59 break 60 } 61 if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar { 62 t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2) 63 } 64 j++ 65 } else { 66 if j+1 >= len(tt.out) { 67 t.Errorf("#%d: ran out of tt.out", i) 68 break 69 } 70 if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) { 71 t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1]) 72 } 73 j += 2 74 dec := DecodeRune(r1, r2) 75 if dec != r { 76 t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r) 77 } 78 } 79 } 80 if j != len(tt.out) { 81 t.Errorf("#%d: EncodeRune didn't generate enough output", i) 82 } 83 } 84 } 85 86 type decodeTest struct { 87 in []uint16 88 out []rune 89 } 90 91 var decodeTests = []decodeTest{ 92 {[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}}, 93 { 94 []uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}, 95 []rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}, 96 }, 97 {[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}}, 98 {[]uint16{0xdfff}, []rune{0xfffd}}, 99 } 100 101 func TestDecode(t *testing.T) { 102 for _, tt := range decodeTests { 103 out := Decode(tt.in) 104 if !runesEqual(out, tt.out) { 105 t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out) 106 } 107 } 108 } 109 110 func runesEqual(a, b []rune) bool { 111 if len(a) != len(b) { 112 return false 113 } 114 for i, v := range a { 115 if v != b[i] { 116 return false 117 } 118 } 119 return true 120 } 121 122 var decodeRuneTests = []struct { 123 r1, r2 rune 124 want rune 125 }{ 126 {0xd800, 0xdc00, 0x10000}, 127 {0xd800, 0xdc01, 0x10001}, 128 {0xd808, 0xdf45, 0x12345}, 129 {0xdbff, 0xdfff, 0x10ffff}, 130 {0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted 131 } 132 133 func TestDecodeRune(t *testing.T) { 134 for i, tt := range decodeRuneTests { 135 got := DecodeRune(tt.r1, tt.r2) 136 if got != tt.want { 137 t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want) 138 } 139 } 140 } 141 142 var surrogateTests = []struct { 143 r rune 144 want bool 145 }{ 146 // from https://en.wikipedia.org/wiki/UTF-16 147 {'\u007A', false}, // LATIN SMALL LETTER Z 148 {'\u6C34', false}, // CJK UNIFIED IDEOGRAPH-6C34 (water) 149 {'\uFEFF', false}, // Byte Order Mark 150 {'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point) 151 {'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF 152 {'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point) 153 154 {rune(0xd7ff), false}, // surr1-1 155 {rune(0xd800), true}, // surr1 156 {rune(0xdc00), true}, // surr2 157 {rune(0xe000), false}, // surr3 158 {rune(0xdfff), true}, // surr3-1 159 } 160 161 func TestIsSurrogate(t *testing.T) { 162 for i, tt := range surrogateTests { 163 got := IsSurrogate(tt.r) 164 if got != tt.want { 165 t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want) 166 } 167 } 168 } 169 170 func BenchmarkDecodeValidASCII(b *testing.B) { 171 // "hello world" 172 data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100} 173 for i := 0; i < b.N; i++ { 174 Decode(data) 175 } 176 } 177 178 func BenchmarkDecodeValidJapaneseChars(b *testing.B) { 179 // "日本語日本語日本語" 180 data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486} 181 for i := 0; i < b.N; i++ { 182 Decode(data) 183 } 184 } 185 186 func BenchmarkDecodeRune(b *testing.B) { 187 rs := make([]rune, 10) 188 // U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS 189 for i, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} { 190 rs[2*i], rs[2*i+1] = EncodeRune(u) 191 } 192 193 b.ResetTimer() 194 for i := 0; i < b.N; i++ { 195 for j := 0; j < 5; j++ { 196 DecodeRune(rs[2*j], rs[2*j+1]) 197 } 198 } 199 } 200 201 func BenchmarkEncodeValidASCII(b *testing.B) { 202 data := []rune{'h', 'e', 'l', 'l', 'o'} 203 for i := 0; i < b.N; i++ { 204 Encode(data) 205 } 206 } 207 208 func BenchmarkEncodeValidJapaneseChars(b *testing.B) { 209 data := []rune{'日', '本', '語'} 210 for i := 0; i < b.N; i++ { 211 Encode(data) 212 } 213 } 214 215 func BenchmarkEncodeRune(b *testing.B) { 216 for i := 0; i < b.N; i++ { 217 for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} { 218 EncodeRune(u) 219 } 220 } 221 }