github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/unicode/norm/normregtest.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 package main 8 9 import ( 10 "bufio" 11 "bytes" 12 "flag" 13 "fmt" 14 "log" 15 "net/http" 16 "os" 17 "path" 18 "regexp" 19 "runtime" 20 "strconv" 21 "strings" 22 "time" 23 "unicode" 24 "unicode/utf8" 25 26 "golang.org/x/text/unicode/norm" 27 ) 28 29 func main() { 30 flag.Parse() 31 loadTestData() 32 CharacterByCharacterTests() 33 StandardTests() 34 PerformanceTest() 35 if errorCount == 0 { 36 fmt.Println("PASS") 37 } 38 } 39 40 const file = "NormalizationTest.txt" 41 42 var url = flag.String("url", 43 "http://www.unicode.org/Public/"+unicode.Version+"/ucd/"+file, 44 "URL of Unicode database directory") 45 var localFiles = flag.Bool("local", 46 false, 47 "data files have been copied to the current directory; for debugging only") 48 49 var logger = log.New(os.Stderr, "", log.Lshortfile) 50 51 // This regression test runs the test set in NormalizationTest.txt 52 // (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/). 53 // 54 // NormalizationTest.txt has form: 55 // @Part0 # Specific cases 56 // # 57 // 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE 58 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW 59 // 60 // Each test has 5 columns (c1, c2, c3, c4, c5), where 61 // (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1)) 62 // 63 // CONFORMANCE: 64 // 1. The following invariants must be true for all conformant implementations 65 // 66 // NFC 67 // c2 == NFC(c1) == NFC(c2) == NFC(c3) 68 // c4 == NFC(c4) == NFC(c5) 69 // 70 // NFD 71 // c3 == NFD(c1) == NFD(c2) == NFD(c3) 72 // c5 == NFD(c4) == NFD(c5) 73 // 74 // NFKC 75 // c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 76 // 77 // NFKD 78 // c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 79 // 80 // 2. For every code point X assigned in this version of Unicode that is not 81 // specifically listed in Part 1, the following invariants must be true 82 // for all conformant implementations: 83 // 84 // X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X) 85 // 86 87 // Column types. 88 const ( 89 cRaw = iota 90 cNFC 91 cNFD 92 cNFKC 93 cNFKD 94 cMaxColumns 95 ) 96 97 // Holds data from NormalizationTest.txt 98 var part []Part 99 100 type Part struct { 101 name string 102 number int 103 tests []Test 104 } 105 106 type Test struct { 107 name string 108 partnr int 109 number int 110 r rune // used for character by character test 111 cols [cMaxColumns]string // Each has 5 entries, see below. 112 } 113 114 func (t Test) Name() string { 115 if t.number < 0 { 116 return part[t.partnr].name 117 } 118 return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number) 119 } 120 121 var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`) 122 var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`) 123 124 var counter int 125 126 // Load the data form NormalizationTest.txt 127 func loadTestData() { 128 if *localFiles { 129 pwd, _ := os.Getwd() 130 *url = "file://" + path.Join(pwd, file) 131 } 132 t := &http.Transport{} 133 t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/"))) 134 c := &http.Client{Transport: t} 135 resp, err := c.Get(*url) 136 if err != nil { 137 logger.Fatal(err) 138 } 139 if resp.StatusCode != 200 { 140 logger.Fatal("bad GET status for "+file, resp.Status) 141 } 142 f := resp.Body 143 defer f.Close() 144 scanner := bufio.NewScanner(f) 145 for scanner.Scan() { 146 line := scanner.Text() 147 if len(line) == 0 || line[0] == '#' { 148 continue 149 } 150 m := partRe.FindStringSubmatch(line) 151 if m != nil { 152 if len(m) < 3 { 153 logger.Fatal("Failed to parse Part: ", line) 154 } 155 i, err := strconv.Atoi(m[1]) 156 if err != nil { 157 logger.Fatal(err) 158 } 159 name := m[2] 160 part = append(part, Part{name: name[:len(name)-1], number: i}) 161 continue 162 } 163 m = testRe.FindStringSubmatch(line) 164 if m == nil || len(m) < 7 { 165 logger.Fatalf(`Failed to parse: "%s" result: %#v`, line, m) 166 } 167 test := Test{name: m[6], partnr: len(part) - 1, number: counter} 168 counter++ 169 for j := 1; j < len(m)-1; j++ { 170 for _, split := range strings.Split(m[j], " ") { 171 r, err := strconv.ParseUint(split, 16, 64) 172 if err != nil { 173 logger.Fatal(err) 174 } 175 if test.r == 0 { 176 // save for CharacterByCharacterTests 177 test.r = rune(r) 178 } 179 var buf [utf8.UTFMax]byte 180 sz := utf8.EncodeRune(buf[:], rune(r)) 181 test.cols[j-1] += string(buf[:sz]) 182 } 183 } 184 part := &part[len(part)-1] 185 part.tests = append(part.tests, test) 186 } 187 if scanner.Err() != nil { 188 logger.Fatal(scanner.Err()) 189 } 190 } 191 192 var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"} 193 194 var errorCount int 195 196 func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) { 197 if gold != result { 198 errorCount++ 199 if errorCount > 20 { 200 return 201 } 202 logger.Printf("%s:%s: %s(%+q)=%+q; want %+q: %s", 203 t.Name(), name, fstr[f], test, result, gold, t.name) 204 } 205 } 206 207 func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bool) { 208 if result != want { 209 errorCount++ 210 if errorCount > 20 { 211 return 212 } 213 logger.Printf("%s:%s: %s(%+q)=%v; want %v", t.Name(), name, fstr[f], test, result, want) 214 } 215 } 216 217 func doTest(t *Test, f norm.Form, gold, test string) { 218 testb := []byte(test) 219 result := f.Bytes(testb) 220 cmpResult(t, "Bytes", f, gold, test, string(result)) 221 222 sresult := f.String(test) 223 cmpResult(t, "String", f, gold, test, sresult) 224 225 acc := []byte{} 226 i := norm.Iter{} 227 i.InitString(f, test) 228 for !i.Done() { 229 acc = append(acc, i.Next()...) 230 } 231 cmpResult(t, "Iter.Next", f, gold, test, string(acc)) 232 233 buf := make([]byte, 128) 234 acc = nil 235 for p := 0; p < len(testb); { 236 nDst, nSrc, _ := f.Transform(buf, testb[p:], true) 237 acc = append(acc, buf[:nDst]...) 238 p += nSrc 239 } 240 cmpResult(t, "Transform", f, gold, test, string(acc)) 241 242 for i := range test { 243 out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...) 244 cmpResult(t, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out)) 245 } 246 cmpIsNormal(t, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold) 247 cmpIsNormal(t, "IsNormalString", f, test, f.IsNormalString(test), test == gold) 248 } 249 250 func doConformanceTests(t *Test, partn int) { 251 for i := 0; i <= 2; i++ { 252 doTest(t, norm.NFC, t.cols[1], t.cols[i]) 253 doTest(t, norm.NFD, t.cols[2], t.cols[i]) 254 doTest(t, norm.NFKC, t.cols[3], t.cols[i]) 255 doTest(t, norm.NFKD, t.cols[4], t.cols[i]) 256 } 257 for i := 3; i <= 4; i++ { 258 doTest(t, norm.NFC, t.cols[3], t.cols[i]) 259 doTest(t, norm.NFD, t.cols[4], t.cols[i]) 260 doTest(t, norm.NFKC, t.cols[3], t.cols[i]) 261 doTest(t, norm.NFKD, t.cols[4], t.cols[i]) 262 } 263 } 264 265 func CharacterByCharacterTests() { 266 tests := part[1].tests 267 var last rune = 0 268 for i := 0; i <= len(tests); i++ { // last one is special case 269 var r rune 270 if i == len(tests) { 271 r = 0x2FA1E // Don't have to go to 0x10FFFF 272 } else { 273 r = tests[i].r 274 } 275 for last++; last < r; last++ { 276 // Check all characters that were not explicitly listed in the test. 277 t := &Test{partnr: 1, number: -1} 278 char := string(last) 279 doTest(t, norm.NFC, char, char) 280 doTest(t, norm.NFD, char, char) 281 doTest(t, norm.NFKC, char, char) 282 doTest(t, norm.NFKD, char, char) 283 } 284 if i < len(tests) { 285 doConformanceTests(&tests[i], 1) 286 } 287 } 288 } 289 290 func StandardTests() { 291 for _, j := range []int{0, 2, 3} { 292 for _, test := range part[j].tests { 293 doConformanceTests(&test, j) 294 } 295 } 296 } 297 298 // PerformanceTest verifies that normalization is O(n). If any of the 299 // code does not properly check for maxCombiningChars, normalization 300 // may exhibit O(n**2) behavior. 301 func PerformanceTest() { 302 runtime.GOMAXPROCS(2) 303 success := make(chan bool, 1) 304 go func() { 305 buf := bytes.Repeat([]byte("\u035D"), 1024*1024) 306 buf = append(buf, "\u035B"...) 307 norm.NFC.Append(nil, buf...) 308 success <- true 309 }() 310 timeout := time.After(1 * time.Second) 311 select { 312 case <-success: 313 // test completed before the timeout 314 case <-timeout: 315 errorCount++ 316 logger.Printf(`unexpectedly long time to complete PerformanceTest`) 317 } 318 }