github.com/go-xe2/third@v1.0.3/golang.org/x/text/unicode/norm/ucd_test.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package norm 6 7 import ( 8 "bufio" 9 "bytes" 10 "fmt" 11 "regexp" 12 "runtime" 13 "strconv" 14 "strings" 15 "sync" 16 "testing" 17 "time" 18 "unicode/utf8" 19 20 "github.com/go-xe2/third/golang.org/x/text/internal/gen" 21 "github.com/go-xe2/third/golang.org/x/text/internal/testtext" 22 ) 23 24 var once sync.Once 25 26 func skipShort(t *testing.T) { 27 testtext.SkipIfNotLong(t) 28 29 once.Do(func() { loadTestData(t) }) 30 } 31 32 // This regression test runs the test set in NormalizationTest.txt 33 // (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/). 34 // 35 // NormalizationTest.txt has form: 36 // @Part0 # Specific cases 37 // # 38 // 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE 39 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW 40 // 41 // Each test has 5 columns (c1, c2, c3, c4, c5), where 42 // (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1)) 43 // 44 // CONFORMANCE: 45 // 1. The following invariants must be true for all conformant implementations 46 // 47 // NFC 48 // c2 == NFC(c1) == NFC(c2) == NFC(c3) 49 // c4 == NFC(c4) == NFC(c5) 50 // 51 // NFD 52 // c3 == NFD(c1) == NFD(c2) == NFD(c3) 53 // c5 == NFD(c4) == NFD(c5) 54 // 55 // NFKC 56 // c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 57 // 58 // NFKD 59 // c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 60 // 61 // 2. For every code point X assigned in this version of Unicode that is not 62 // specifically listed in Part 1, the following invariants must be true 63 // for all conformant implementations: 64 // 65 // X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X) 66 // 67 68 // Column types. 69 const ( 70 cRaw = iota 71 cNFC 72 cNFD 73 cNFKC 74 cNFKD 75 cMaxColumns 76 ) 77 78 // Holds data from NormalizationTest.txt 79 var part []Part 80 81 type Part struct { 82 name string 83 number int 84 tests []Test 85 } 86 87 type Test struct { 88 name string 89 partnr int 90 number int 91 r rune // used for character by character test 92 cols [cMaxColumns]string // Each has 5 entries, see below. 93 } 94 95 func (t Test) Name() string { 96 if t.number < 0 { 97 return part[t.partnr].name 98 } 99 return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number) 100 } 101 102 var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`) 103 var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`) 104 105 var counter int 106 107 // Load the data form NormalizationTest.txt 108 func loadTestData(t *testing.T) { 109 f := gen.OpenUCDFile("NormalizationTest.txt") 110 defer f.Close() 111 scanner := bufio.NewScanner(f) 112 for scanner.Scan() { 113 line := scanner.Text() 114 if len(line) == 0 || line[0] == '#' { 115 continue 116 } 117 m := partRe.FindStringSubmatch(line) 118 if m != nil { 119 if len(m) < 3 { 120 t.Fatal("Failed to parse Part: ", line) 121 } 122 i, err := strconv.Atoi(m[1]) 123 if err != nil { 124 t.Fatal(err) 125 } 126 name := m[2] 127 part = append(part, Part{name: name[:len(name)-1], number: i}) 128 continue 129 } 130 m = testRe.FindStringSubmatch(line) 131 if m == nil || len(m) < 7 { 132 t.Fatalf(`Failed to parse: "%s" result: %#v`, line, m) 133 } 134 test := Test{name: m[6], partnr: len(part) - 1, number: counter} 135 counter++ 136 for j := 1; j < len(m)-1; j++ { 137 for _, split := range strings.Split(m[j], " ") { 138 r, err := strconv.ParseUint(split, 16, 64) 139 if err != nil { 140 t.Fatal(err) 141 } 142 if test.r == 0 { 143 // save for CharacterByCharacterTests 144 test.r = rune(r) 145 } 146 var buf [utf8.UTFMax]byte 147 sz := utf8.EncodeRune(buf[:], rune(r)) 148 test.cols[j-1] += string(buf[:sz]) 149 } 150 } 151 part := &part[len(part)-1] 152 part.tests = append(part.tests, test) 153 } 154 if scanner.Err() != nil { 155 t.Fatal(scanner.Err()) 156 } 157 } 158 159 func cmpResult(t *testing.T, tc *Test, name string, f Form, gold, test, result string) { 160 if gold != result { 161 t.Errorf("%s:%s: %s(%+q)=%+q; want %+q: %s", 162 tc.Name(), name, fstr[f], test, result, gold, tc.name) 163 } 164 } 165 166 func cmpIsNormal(t *testing.T, tc *Test, name string, f Form, test string, result, want bool) { 167 if result != want { 168 t.Errorf("%s:%s: %s(%+q)=%v; want %v", tc.Name(), name, fstr[f], test, result, want) 169 } 170 } 171 172 func doTest(t *testing.T, tc *Test, f Form, gold, test string) { 173 testb := []byte(test) 174 result := f.Bytes(testb) 175 cmpResult(t, tc, "Bytes", f, gold, test, string(result)) 176 177 sresult := f.String(test) 178 cmpResult(t, tc, "String", f, gold, test, sresult) 179 180 acc := []byte{} 181 i := Iter{} 182 i.InitString(f, test) 183 for !i.Done() { 184 acc = append(acc, i.Next()...) 185 } 186 cmpResult(t, tc, "Iter.Next", f, gold, test, string(acc)) 187 188 buf := make([]byte, 128) 189 acc = nil 190 for p := 0; p < len(testb); { 191 nDst, nSrc, _ := f.Transform(buf, testb[p:], true) 192 acc = append(acc, buf[:nDst]...) 193 p += nSrc 194 } 195 cmpResult(t, tc, "Transform", f, gold, test, string(acc)) 196 197 for i := range test { 198 out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...) 199 cmpResult(t, tc, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out)) 200 } 201 cmpIsNormal(t, tc, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold) 202 cmpIsNormal(t, tc, "IsNormalString", f, test, f.IsNormalString(test), test == gold) 203 } 204 205 func doConformanceTests(t *testing.T, tc *Test, partn int) { 206 for i := 0; i <= 2; i++ { 207 doTest(t, tc, NFC, tc.cols[1], tc.cols[i]) 208 doTest(t, tc, NFD, tc.cols[2], tc.cols[i]) 209 doTest(t, tc, NFKC, tc.cols[3], tc.cols[i]) 210 doTest(t, tc, NFKD, tc.cols[4], tc.cols[i]) 211 } 212 for i := 3; i <= 4; i++ { 213 doTest(t, tc, NFC, tc.cols[3], tc.cols[i]) 214 doTest(t, tc, NFD, tc.cols[4], tc.cols[i]) 215 doTest(t, tc, NFKC, tc.cols[3], tc.cols[i]) 216 doTest(t, tc, NFKD, tc.cols[4], tc.cols[i]) 217 } 218 } 219 220 func TestCharacterByCharacter(t *testing.T) { 221 skipShort(t) 222 tests := part[1].tests 223 var last rune = 0 224 for i := 0; i <= len(tests); i++ { // last one is special case 225 var r rune 226 if i == len(tests) { 227 r = 0x2FA1E // Don't have to go to 0x10FFFF 228 } else { 229 r = tests[i].r 230 } 231 for last++; last < r; last++ { 232 // Check all characters that were not explicitly listed in the test. 233 tc := &Test{partnr: 1, number: -1} 234 char := string(last) 235 doTest(t, tc, NFC, char, char) 236 doTest(t, tc, NFD, char, char) 237 doTest(t, tc, NFKC, char, char) 238 doTest(t, tc, NFKD, char, char) 239 } 240 if i < len(tests) { 241 doConformanceTests(t, &tests[i], 1) 242 } 243 } 244 } 245 246 func TestStandardTests(t *testing.T) { 247 skipShort(t) 248 for _, j := range []int{0, 2, 3} { 249 for _, test := range part[j].tests { 250 doConformanceTests(t, &test, j) 251 } 252 } 253 } 254 255 // TestPerformance verifies that normalization is O(n). If any of the 256 // code does not properly check for maxCombiningChars, normalization 257 // may exhibit O(n**2) behavior. 258 func TestPerformance(t *testing.T) { 259 skipShort(t) 260 runtime.GOMAXPROCS(2) 261 success := make(chan bool, 1) 262 go func() { 263 buf := bytes.Repeat([]byte("\u035D"), 1024*1024) 264 buf = append(buf, "\u035B"...) 265 NFC.Append(nil, buf...) 266 success <- true 267 }() 268 timeout := time.After(1 * time.Second) 269 select { 270 case <-success: 271 // test completed before the timeout 272 case <-timeout: 273 t.Errorf(`unexpectedly long time to complete PerformanceTest`) 274 } 275 }