golang.org/x/text@v0.14.0/collate/tools/colcmp/colcmp.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main // import "golang.org/x/text/collate/tools/colcmp" 6 7 import ( 8 "bytes" 9 "flag" 10 "fmt" 11 "io" 12 "log" 13 "os" 14 "runtime/pprof" 15 "sort" 16 "strconv" 17 "strings" 18 "text/template" 19 "time" 20 21 "golang.org/x/text/unicode/norm" 22 ) 23 24 var ( 25 doNorm = flag.Bool("norm", false, "normalize input strings") 26 cases = flag.Bool("case", false, "generate case variants") 27 verbose = flag.Bool("verbose", false, "print results") 28 debug = flag.Bool("debug", false, "output debug information") 29 locales = flag.String("locale", "en_US", "the locale to use. May be a comma-separated list for some commands.") 30 col = flag.String("col", "go", "collator to test") 31 gold = flag.String("gold", "go", "collator used as the gold standard") 32 usecmp = flag.Bool("usecmp", false, 33 `use comparison instead of sort keys when sorting. Must be "test", "gold" or "both"`) 34 cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") 35 exclude = flag.String("exclude", "", "exclude errors that contain any of the characters") 36 limit = flag.Int("limit", 5000000, "maximum number of samples to generate for one run") 37 ) 38 39 func failOnError(err error) { 40 if err != nil { 41 log.Panic(err) 42 } 43 } 44 45 // Test holds test data for testing a locale-collator pair. 46 // Test also provides functionality that is commonly used by the various commands. 47 type Test struct { 48 ctxt *Context 49 Name string 50 Locale string 51 ColName string 52 53 Col Collator 54 UseCompare bool 55 56 Input []Input 57 Duration time.Duration 58 59 start time.Time 60 msg string 61 count int 62 } 63 64 func (t *Test) clear() { 65 t.Col = nil 66 t.Input = nil 67 } 68 69 const ( 70 msgGeneratingInput = "generating input" 71 msgGeneratingKeys = "generating keys" 72 msgSorting = "sorting" 73 ) 74 75 var lastLen = 0 76 77 func (t *Test) SetStatus(msg string) { 78 if *debug || *verbose { 79 fmt.Printf("%s: %s...\n", t.Name, msg) 80 } else if t.ctxt.out != nil { 81 fmt.Fprint(t.ctxt.out, strings.Repeat(" ", lastLen)) 82 fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen)) 83 fmt.Fprint(t.ctxt.out, msg, "...") 84 lastLen = len(msg) + 3 85 fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen)) 86 } 87 } 88 89 // Start is used by commands to signal the start of an operation. 90 func (t *Test) Start(msg string) { 91 t.SetStatus(msg) 92 t.count = 0 93 t.msg = msg 94 t.start = time.Now() 95 } 96 97 // Stop is used by commands to signal the end of an operation. 98 func (t *Test) Stop() (time.Duration, int) { 99 d := time.Now().Sub(t.start) 100 t.Duration += d 101 if *debug || *verbose { 102 fmt.Printf("%s: %s done. (%.3fs /%dK ops)\n", t.Name, t.msg, d.Seconds(), t.count/1000) 103 } 104 return d, t.count 105 } 106 107 // generateKeys generates sort keys for all the inputs. 108 func (t *Test) generateKeys() { 109 for i, s := range t.Input { 110 b := t.Col.Key(s) 111 t.Input[i].key = b 112 if *debug { 113 fmt.Printf("%s (%X): %X\n", string(s.UTF8), s.UTF16, b) 114 } 115 } 116 } 117 118 // Sort sorts the inputs. It generates sort keys if this is required by the 119 // chosen sort method. 120 func (t *Test) Sort() (tkey, tsort time.Duration, nkey, nsort int) { 121 if *cpuprofile != "" { 122 f, err := os.Create(*cpuprofile) 123 failOnError(err) 124 pprof.StartCPUProfile(f) 125 defer pprof.StopCPUProfile() 126 } 127 if t.UseCompare || t.Col.Key(t.Input[0]) == nil { 128 t.Start(msgSorting) 129 sort.Sort(&testCompare{*t}) 130 tsort, nsort = t.Stop() 131 } else { 132 t.Start(msgGeneratingKeys) 133 t.generateKeys() 134 t.count = len(t.Input) 135 tkey, nkey = t.Stop() 136 t.Start(msgSorting) 137 sort.Sort(t) 138 tsort, nsort = t.Stop() 139 } 140 return 141 } 142 143 func (t *Test) Swap(a, b int) { 144 t.Input[a], t.Input[b] = t.Input[b], t.Input[a] 145 } 146 147 func (t *Test) Less(a, b int) bool { 148 t.count++ 149 return bytes.Compare(t.Input[a].key, t.Input[b].key) == -1 150 } 151 152 func (t Test) Len() int { 153 return len(t.Input) 154 } 155 156 type testCompare struct { 157 Test 158 } 159 160 func (t *testCompare) Less(a, b int) bool { 161 t.count++ 162 return t.Col.Compare(t.Input[a], t.Input[b]) == -1 163 } 164 165 type testRestore struct { 166 Test 167 } 168 169 func (t *testRestore) Less(a, b int) bool { 170 return t.Input[a].index < t.Input[b].index 171 } 172 173 // GenerateInput generates input phrases for the locale tested by t. 174 func (t *Test) GenerateInput() { 175 t.Input = nil 176 if t.ctxt.lastLocale != t.Locale { 177 gen := phraseGenerator{} 178 gen.init(t.Locale) 179 t.SetStatus(msgGeneratingInput) 180 t.ctxt.lastInput = nil // allow the previous value to be garbage collected. 181 t.Input = gen.generate(*doNorm) 182 t.ctxt.lastInput = t.Input 183 t.ctxt.lastLocale = t.Locale 184 } else { 185 t.Input = t.ctxt.lastInput 186 for i := range t.Input { 187 t.Input[i].key = nil 188 } 189 sort.Sort(&testRestore{*t}) 190 } 191 } 192 193 // Context holds all tests and settings translated from command line options. 194 type Context struct { 195 test []*Test 196 last *Test 197 198 lastLocale string 199 lastInput []Input 200 201 out io.Writer 202 } 203 204 func (ts *Context) Printf(format string, a ...interface{}) { 205 ts.assertBuf() 206 fmt.Fprintf(ts.out, format, a...) 207 } 208 209 func (ts *Context) Print(a ...interface{}) { 210 ts.assertBuf() 211 fmt.Fprint(ts.out, a...) 212 } 213 214 // assertBuf sets up an io.Writer for output, if it doesn't already exist. 215 // In debug and verbose mode, output is buffered so that the regular output 216 // will not interfere with the additional output. Otherwise, output is 217 // written directly to stdout for a more responsive feel. 218 func (ts *Context) assertBuf() { 219 if ts.out != nil { 220 return 221 } 222 if *debug || *verbose { 223 ts.out = &bytes.Buffer{} 224 } else { 225 ts.out = os.Stdout 226 } 227 } 228 229 // flush flushes the contents of ts.out to stdout, if it is not stdout already. 230 func (ts *Context) flush() { 231 if ts.out != nil { 232 if _, ok := ts.out.(io.ReadCloser); !ok { 233 io.Copy(os.Stdout, ts.out.(io.Reader)) 234 } 235 } 236 } 237 238 // parseTests creates all tests from command lines and returns 239 // a Context to hold them. 240 func parseTests() *Context { 241 ctxt := &Context{} 242 colls := strings.Split(*col, ",") 243 for _, loc := range strings.Split(*locales, ",") { 244 loc = strings.TrimSpace(loc) 245 for _, name := range colls { 246 name = strings.TrimSpace(name) 247 col := getCollator(name, loc) 248 ctxt.test = append(ctxt.test, &Test{ 249 ctxt: ctxt, 250 Locale: loc, 251 ColName: name, 252 UseCompare: *usecmp, 253 Col: col, 254 }) 255 } 256 } 257 return ctxt 258 } 259 260 func (c *Context) Len() int { 261 return len(c.test) 262 } 263 264 func (c *Context) Test(i int) *Test { 265 if c.last != nil { 266 c.last.clear() 267 } 268 c.last = c.test[i] 269 return c.last 270 } 271 272 func parseInput(args []string) []Input { 273 input := []Input{} 274 for _, s := range args { 275 rs := []rune{} 276 for len(s) > 0 { 277 var r rune 278 r, _, s, _ = strconv.UnquoteChar(s, '\'') 279 rs = append(rs, r) 280 } 281 s = string(rs) 282 if *doNorm { 283 s = norm.NFD.String(s) 284 } 285 input = append(input, makeInputString(s)) 286 } 287 return input 288 } 289 290 // A Command is an implementation of a colcmp command. 291 type Command struct { 292 Run func(cmd *Context, args []string) 293 Usage string 294 Short string 295 Long string 296 } 297 298 func (cmd Command) Name() string { 299 return strings.SplitN(cmd.Usage, " ", 2)[0] 300 } 301 302 var commands = []*Command{ 303 cmdSort, 304 cmdBench, 305 cmdRegress, 306 } 307 308 const sortHelp = ` 309 Sort sorts a given list of strings. Strings are separated by whitespace. 310 ` 311 312 var cmdSort = &Command{ 313 Run: runSort, 314 Usage: "sort <string>*", 315 Short: "sort a given list of strings", 316 Long: sortHelp, 317 } 318 319 func runSort(ctxt *Context, args []string) { 320 input := parseInput(args) 321 if len(input) == 0 { 322 log.Fatalf("Nothing to sort.") 323 } 324 if ctxt.Len() > 1 { 325 ctxt.Print("COLL LOCALE RESULT\n") 326 } 327 for i := 0; i < ctxt.Len(); i++ { 328 t := ctxt.Test(i) 329 t.Input = append(t.Input, input...) 330 t.Sort() 331 if ctxt.Len() > 1 { 332 ctxt.Printf("%-5s %-5s ", t.ColName, t.Locale) 333 } 334 for _, s := range t.Input { 335 ctxt.Print(string(s.UTF8), " ") 336 } 337 ctxt.Print("\n") 338 } 339 } 340 341 const benchHelp = ` 342 Bench runs a benchmark for the given list of collator implementations. 343 If no collator implementations are given, the go collator will be used. 344 ` 345 346 var cmdBench = &Command{ 347 Run: runBench, 348 Usage: "bench", 349 Short: "benchmark a given list of collator implementations", 350 Long: benchHelp, 351 } 352 353 func runBench(ctxt *Context, args []string) { 354 ctxt.Printf("%-7s %-5s %-6s %-24s %-24s %-5s %s\n", "LOCALE", "COLL", "N", "KEYS", "SORT", "AVGLN", "TOTAL") 355 for i := 0; i < ctxt.Len(); i++ { 356 t := ctxt.Test(i) 357 ctxt.Printf("%-7s %-5s ", t.Locale, t.ColName) 358 t.GenerateInput() 359 ctxt.Printf("%-6s ", fmt.Sprintf("%dK", t.Len()/1000)) 360 tkey, tsort, nkey, nsort := t.Sort() 361 p := func(dur time.Duration, n int) { 362 s := "" 363 if dur > 0 { 364 s = fmt.Sprintf("%6.3fs ", dur.Seconds()) 365 if n > 0 { 366 s += fmt.Sprintf("%15s", fmt.Sprintf("(%4.2f ns/op)", float64(dur)/float64(n))) 367 } 368 } 369 ctxt.Printf("%-24s ", s) 370 } 371 p(tkey, nkey) 372 p(tsort, nsort) 373 374 total := 0 375 for _, s := range t.Input { 376 total += len(s.key) 377 } 378 ctxt.Printf("%-5d ", total/t.Len()) 379 ctxt.Printf("%6.3fs\n", t.Duration.Seconds()) 380 if *debug { 381 for _, s := range t.Input { 382 fmt.Print(string(s.UTF8), " ") 383 } 384 fmt.Println() 385 } 386 } 387 } 388 389 const regressHelp = ` 390 Regress runs a monkey test by comparing the results of randomly generated tests 391 between two implementations of a collator. The user may optionally pass a list 392 of strings to regress against instead of the default test set. 393 ` 394 395 var cmdRegress = &Command{ 396 Run: runRegress, 397 Usage: "regress -gold=<col> -test=<col> [string]*", 398 Short: "run a monkey test between two collators", 399 Long: regressHelp, 400 } 401 402 const failedKeyCompare = ` 403 %s:%d: incorrect comparison result for input: 404 a: %q (%.4X) 405 key: %s 406 b: %q (%.4X) 407 key: %s 408 Compare(a, b) = %d; want %d. 409 410 gold keys: 411 a: %s 412 b: %s 413 ` 414 415 const failedCompare = ` 416 %s:%d: incorrect comparison result for input: 417 a: %q (%.4X) 418 b: %q (%.4X) 419 Compare(a, b) = %d; want %d. 420 ` 421 422 func keyStr(b []byte) string { 423 buf := &bytes.Buffer{} 424 for _, v := range b { 425 fmt.Fprintf(buf, "%.2X ", v) 426 } 427 return buf.String() 428 } 429 430 func runRegress(ctxt *Context, args []string) { 431 input := parseInput(args) 432 for i := 0; i < ctxt.Len(); i++ { 433 t := ctxt.Test(i) 434 if len(input) > 0 { 435 t.Input = append(t.Input, input...) 436 } else { 437 t.GenerateInput() 438 } 439 t.Sort() 440 count := 0 441 gold := getCollator(*gold, t.Locale) 442 for i := 1; i < len(t.Input); i++ { 443 ia := t.Input[i-1] 444 ib := t.Input[i] 445 if bytes.IndexAny(ib.UTF8, *exclude) != -1 { 446 i++ 447 continue 448 } 449 if bytes.IndexAny(ia.UTF8, *exclude) != -1 { 450 continue 451 } 452 goldCmp := gold.Compare(ia, ib) 453 if cmp := bytes.Compare(ia.key, ib.key); cmp != goldCmp { 454 count++ 455 a := string(ia.UTF8) 456 b := string(ib.UTF8) 457 fmt.Printf(failedKeyCompare, t.Locale, i-1, a, []rune(a), keyStr(ia.key), b, []rune(b), keyStr(ib.key), cmp, goldCmp, keyStr(gold.Key(ia)), keyStr(gold.Key(ib))) 458 } else if cmp := t.Col.Compare(ia, ib); cmp != goldCmp { 459 count++ 460 a := string(ia.UTF8) 461 b := string(ib.UTF8) 462 fmt.Printf(failedCompare, t.Locale, i-1, a, []rune(a), b, []rune(b), cmp, goldCmp) 463 } 464 } 465 if count > 0 { 466 ctxt.Printf("Found %d inconsistencies in %d entries.\n", count, t.Len()-1) 467 } 468 } 469 } 470 471 const helpTemplate = ` 472 colcmp is a tool for testing and benchmarking collation 473 474 Usage: colcmp command [arguments] 475 476 The commands are: 477 {{range .}} 478 {{.Name | printf "%-11s"}} {{.Short}}{{end}} 479 480 Use "col help [topic]" for more information about that topic. 481 ` 482 483 const detailedHelpTemplate = ` 484 Usage: colcmp {{.Usage}} 485 486 {{.Long | trim}} 487 ` 488 489 func runHelp(args []string) { 490 t := template.New("help") 491 t.Funcs(template.FuncMap{"trim": strings.TrimSpace}) 492 if len(args) < 1 { 493 template.Must(t.Parse(helpTemplate)) 494 failOnError(t.Execute(os.Stderr, &commands)) 495 } else { 496 for _, cmd := range commands { 497 if cmd.Name() == args[0] { 498 template.Must(t.Parse(detailedHelpTemplate)) 499 failOnError(t.Execute(os.Stderr, cmd)) 500 os.Exit(0) 501 } 502 } 503 log.Fatalf("Unknown command %q. Run 'colcmp help'.", args[0]) 504 } 505 os.Exit(0) 506 } 507 508 func main() { 509 flag.Parse() 510 log.SetFlags(0) 511 512 ctxt := parseTests() 513 514 if flag.NArg() < 1 { 515 runHelp(nil) 516 } 517 args := flag.Args()[1:] 518 if flag.Arg(0) == "help" { 519 runHelp(args) 520 } 521 for _, cmd := range commands { 522 if cmd.Name() == flag.Arg(0) { 523 cmd.Run(ctxt, args) 524 ctxt.flush() 525 return 526 } 527 } 528 runHelp(flag.Args()) 529 }