github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/collate/regtest.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 package main 8 9 import ( 10 "archive/zip" 11 "bufio" 12 "bytes" 13 "flag" 14 "fmt" 15 "io" 16 "io/ioutil" 17 "log" 18 "net/http" 19 "os" 20 "path" 21 "regexp" 22 "strconv" 23 "strings" 24 "unicode" 25 "unicode/utf8" 26 27 "golang.org/x/text/collate" 28 "golang.org/x/text/collate/build" 29 "golang.org/x/text/language" 30 ) 31 32 // This regression test runs tests for the test files in CollationTest.zip 33 // (taken from http://www.unicode.org/Public/UCA/<unicode.Version>/). 34 // 35 // The test files have the following form: 36 // # header 37 // 0009 0021; # ('\u0009') <CHARACTER TABULATION> [| | | 0201 025E] 38 // 0009 003F; # ('\u0009') <CHARACTER TABULATION> [| | | 0201 0263] 39 // 000A 0021; # ('\u000A') <LINE FEED (LF)> [| | | 0202 025E] 40 // 000A 003F; # ('\u000A') <LINE FEED (LF)> [| | | 0202 0263] 41 // 42 // The part before the semicolon is the hex representation of a sequence 43 // of runes. After the hash mark is a comment. The strings 44 // represented by rune sequence are in the file in sorted order, as 45 // defined by the DUCET. 46 47 var testdata = flag.String("testdata", 48 "http://www.unicode.org/Public/UCA/"+unicode.Version+"/CollationTest.zip", 49 "URL of Unicode collation tests zip file") 50 var ducet = flag.String("ducet", 51 "http://unicode.org/Public/UCA/"+unicode.Version+"/allkeys.txt", 52 "URL of the Default Unicode Collation Element Table (DUCET).") 53 var localFiles = flag.Bool("local", 54 false, 55 "data files have been copied to the current directory; for debugging only") 56 57 type Test struct { 58 name string 59 str [][]byte 60 comment []string 61 } 62 63 var versionRe = regexp.MustCompile(`# UCA Version: (.*)\n?$`) 64 var testRe = regexp.MustCompile(`^([\dA-F ]+);.*# (.*)\n?$`) 65 66 func Error(e error) { 67 if e != nil { 68 log.Fatal(e) 69 } 70 } 71 72 // openReader opens the url or file given by url and returns it as an io.ReadCloser 73 // or nil on error. 74 func openReader(url string) io.ReadCloser { 75 if *localFiles { 76 pwd, _ := os.Getwd() 77 url = "file://" + path.Join(pwd, path.Base(url)) 78 } 79 t := &http.Transport{} 80 t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/"))) 81 c := &http.Client{Transport: t} 82 resp, err := c.Get(url) 83 Error(err) 84 if resp.StatusCode != 200 { 85 Error(fmt.Errorf(`bad GET status for "%s": %s`, url, resp.Status)) 86 } 87 return resp.Body 88 } 89 90 // parseUCA parses a Default Unicode Collation Element Table of the format 91 // specified in http://www.unicode.org/reports/tr10/#File_Format. 92 // It returns the variable top. 93 func parseUCA(builder *build.Builder) { 94 r := openReader(*ducet) 95 defer r.Close() 96 input := bufio.NewReader(r) 97 colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) 98 for i := 1; true; i++ { 99 l, prefix, err := input.ReadLine() 100 if err == io.EOF { 101 break 102 } 103 Error(err) 104 line := string(l) 105 if prefix { 106 log.Fatalf("%d: buffer overflow", i) 107 } 108 if len(line) == 0 || line[0] == '#' { 109 continue 110 } 111 if line[0] == '@' { 112 if strings.HasPrefix(line[1:], "version ") { 113 if v := strings.Split(line[1:], " ")[1]; v != unicode.Version { 114 log.Fatalf("incompatible version %s; want %s", v, unicode.Version) 115 } 116 } 117 } else { 118 // parse entries 119 part := strings.Split(line, " ; ") 120 if len(part) != 2 { 121 log.Fatalf("%d: production rule without ';': %v", i, line) 122 } 123 lhs := []rune{} 124 for _, v := range strings.Split(part[0], " ") { 125 if v != "" { 126 lhs = append(lhs, rune(convHex(i, v))) 127 } 128 } 129 vars := []int{} 130 rhs := [][]int{} 131 for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { 132 if m[1] == "*" { 133 vars = append(vars, i) 134 } 135 elem := []int{} 136 for _, h := range strings.Split(m[2], ".") { 137 elem = append(elem, convHex(i, h)) 138 } 139 rhs = append(rhs, elem) 140 } 141 builder.Add(lhs, rhs, vars) 142 } 143 } 144 } 145 146 func convHex(line int, s string) int { 147 r, e := strconv.ParseInt(s, 16, 32) 148 if e != nil { 149 log.Fatalf("%d: %v", line, e) 150 } 151 return int(r) 152 } 153 154 func loadTestData() []Test { 155 f := openReader(*testdata) 156 buffer, err := ioutil.ReadAll(f) 157 f.Close() 158 Error(err) 159 archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) 160 Error(err) 161 tests := []Test{} 162 for _, f := range archive.File { 163 // Skip the short versions, which are simply duplicates of the long versions. 164 if strings.Contains(f.Name, "SHORT") || f.FileInfo().IsDir() { 165 continue 166 } 167 ff, err := f.Open() 168 Error(err) 169 defer ff.Close() 170 scanner := bufio.NewScanner(ff) 171 test := Test{name: path.Base(f.Name)} 172 for scanner.Scan() { 173 line := scanner.Text() 174 if len(line) <= 1 || line[0] == '#' { 175 if m := versionRe.FindStringSubmatch(line); m != nil { 176 if m[1] != unicode.Version { 177 log.Printf("warning:%s: version is %s; want %s", f.Name, m[1], unicode.Version) 178 } 179 } 180 continue 181 } 182 m := testRe.FindStringSubmatch(line) 183 if m == nil || len(m) < 3 { 184 log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m) 185 } 186 str := []byte{} 187 // In the regression test data (unpaired) surrogates are assigned a weight 188 // corresponding to their code point value. However, utf8.DecodeRune, 189 // which is used to compute the implicit weight, assigns FFFD to surrogates. 190 // We therefore skip tests with surrogates. This skips about 35 entries 191 // per test. 192 valid := true 193 for _, split := range strings.Split(m[1], " ") { 194 r, err := strconv.ParseUint(split, 16, 64) 195 Error(err) 196 valid = valid && utf8.ValidRune(rune(r)) 197 str = append(str, string(rune(r))...) 198 } 199 if valid { 200 test.str = append(test.str, str) 201 test.comment = append(test.comment, m[2]) 202 } 203 } 204 if scanner.Err() != nil { 205 log.Fatal(scanner.Err()) 206 } 207 tests = append(tests, test) 208 } 209 return tests 210 } 211 212 var errorCount int 213 214 func fail(t Test, pattern string, args ...interface{}) { 215 format := fmt.Sprintf("error:%s:%s", t.name, pattern) 216 log.Printf(format, args...) 217 errorCount++ 218 if errorCount > 30 { 219 log.Fatal("too many errors") 220 } 221 } 222 223 func runes(b []byte) []rune { 224 return []rune(string(b)) 225 } 226 227 var shifted = language.MustParse("und-u-ka-shifted-ks-level4") 228 229 func doTest(t Test) { 230 bld := build.NewBuilder() 231 parseUCA(bld) 232 w, err := bld.Build() 233 Error(err) 234 var tag language.Tag 235 if !strings.Contains(t.name, "NON_IGNOR") { 236 tag = shifted 237 } 238 c := collate.NewFromTable(w, collate.OptionsFromTag(tag)) 239 b := &collate.Buffer{} 240 prev := t.str[0] 241 for i := 1; i < len(t.str); i++ { 242 b.Reset() 243 s := t.str[i] 244 ka := c.Key(b, prev) 245 kb := c.Key(b, s) 246 if r := bytes.Compare(ka, kb); r == 1 { 247 fail(t, "%d: Key(%.4X) < Key(%.4X) (%X < %X) == %d; want -1 or 0", i, []rune(string(prev)), []rune(string(s)), ka, kb, r) 248 prev = s 249 continue 250 } 251 if r := c.Compare(prev, s); r == 1 { 252 fail(t, "%d: Compare(%.4X, %.4X) == %d; want -1 or 0", i, runes(prev), runes(s), r) 253 } 254 if r := c.Compare(s, prev); r == -1 { 255 fail(t, "%d: Compare(%.4X, %.4X) == %d; want 1 or 0", i, runes(s), runes(prev), r) 256 } 257 prev = s 258 } 259 } 260 261 func main() { 262 flag.Parse() 263 for _, test := range loadTestData() { 264 doTest(test) 265 } 266 if errorCount == 0 { 267 fmt.Println("PASS") 268 } 269 }