golang.org/x/text@v0.14.0/language/match_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package language 6 7 import ( 8 "bytes" 9 "flag" 10 "fmt" 11 "os" 12 "path" 13 "path/filepath" 14 "strings" 15 "testing" 16 "unicode/utf8" 17 18 "golang.org/x/text/internal/testtext" 19 "golang.org/x/text/internal/ucd" 20 ) 21 22 var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers") 23 24 func TestCompliance(t *testing.T) { 25 filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error { 26 if info.IsDir() { 27 return nil 28 } 29 r, err := os.Open(file) 30 if err != nil { 31 t.Fatal(err) 32 } 33 ucd.Parse(r, func(p *ucd.Parser) { 34 name := strings.ReplaceAll(path.Join(p.String(0), p.String(1)), " ", "") 35 if skip[name] { 36 return 37 } 38 t.Run(info.Name()+"/"+short(name), func(t *testing.T) { 39 supported := makeTagList(p.String(0)) 40 desired := makeTagList(p.String(1)) 41 gotCombined, index, conf := NewMatcher(supported).Match(desired...) 42 43 gotMatch := supported[index] 44 wantMatch := Raw.Make(p.String(2)) // wantMatch may be null 45 if gotMatch != wantMatch { 46 t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf) 47 } 48 if tag := strings.TrimSpace(p.String(3)); tag != "" { 49 wantCombined := Raw.MustParse(tag) 50 if err == nil && gotCombined != wantCombined { 51 t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf) 52 } 53 } 54 }) 55 }) 56 return nil 57 }) 58 } 59 60 func short(s string) string { 61 if len(s) <= 50 { 62 return s 63 } 64 var i int 65 for i = 1; i < utf8.UTFMax && !utf8.RuneStart(s[50-i]); i++ { 66 } 67 return s[:50-i] + "…" 68 } 69 70 var skip = map[string]bool{ 71 // TODO: bugs 72 // Honor the wildcard match. This may only be useful to select non-exact 73 // stuff. 74 "mul,af/nl": true, // match: got "af"; want "mul" 75 76 // TODO: include other extensions. 77 // combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab" 78 "und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true, 79 80 // Inconsistencies with Mark Davis' implementation where it is not clear 81 // which is better. 82 83 // Inconsistencies in combined. I think the Go approach is more appropriate. 84 // We could use -u-rg- as alternative. 85 "und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa" 86 "und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa" 87 "und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa" 88 "und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa" 89 "50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa" 90 91 // The initial number is a threshold. As we don't use scoring, we will not 92 // implement this. 93 "50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true, 94 // match: got "und"; want "fr-Cyrl-CA-fonupa" 95 // combined: got "und"; want "fr-Cyrl-BE-fonipa" 96 97 // Other interesting cases to test: 98 // - Should same language or same script have the preference if there is 99 // usually no understanding of the other script? 100 // - More specific region in desired may replace enclosing supported. 101 } 102 103 func makeTagList(s string) (tags []Tag) { 104 for _, s := range strings.Split(s, ",") { 105 tags = append(tags, mk(strings.TrimSpace(s))) 106 } 107 return tags 108 } 109 110 func TestMatchStrings(t *testing.T) { 111 testCases := []struct { 112 supported string 113 desired string // strings separated by | 114 tag string 115 index int 116 }{{ 117 supported: "en", 118 desired: "", 119 tag: "en", 120 index: 0, 121 }, { 122 supported: "en", 123 desired: "nl", 124 tag: "en", 125 index: 0, 126 }, { 127 supported: "en,nl", 128 desired: "nl", 129 tag: "nl", 130 index: 1, 131 }, { 132 supported: "en,nl", 133 desired: "nl|en", 134 tag: "nl", 135 index: 1, 136 }, { 137 supported: "en-GB,nl", 138 desired: "en ; q=0.1,nl", 139 tag: "nl", 140 index: 1, 141 }, { 142 supported: "en-GB,nl", 143 desired: "en;q=0.005 | dk; q=0.1,nl ", 144 tag: "en-GB", 145 index: 0, 146 }, { 147 // do not match faulty tags with und 148 supported: "en,und", 149 desired: "|en", 150 tag: "en", 151 index: 0, 152 }} 153 for _, tc := range testCases { 154 t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) { 155 m := NewMatcher(makeTagList(tc.supported)) 156 tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...) 157 if tag.String() != tc.tag || index != tc.index { 158 t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index) 159 } 160 }) 161 } 162 } 163 164 func TestRegionGroups(t *testing.T) { 165 testCases := []struct { 166 a, b string 167 distance uint8 168 }{ 169 {"zh-TW", "zh-HK", 5}, 170 {"zh-MO", "zh-HK", 4}, 171 {"es-ES", "es-AR", 5}, 172 {"es-ES", "es", 4}, 173 {"es-419", "es-MX", 4}, 174 {"es-AR", "es-MX", 4}, 175 {"es-ES", "es-MX", 5}, 176 {"es-PT", "es-MX", 5}, 177 } 178 for _, tc := range testCases { 179 a := MustParse(tc.a) 180 aScript, _ := a.Script() 181 b := MustParse(tc.b) 182 bScript, _ := b.Script() 183 184 if aScript != bScript { 185 t.Errorf("scripts differ: %q vs %q", aScript, bScript) 186 continue 187 } 188 d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang()) 189 if d != tc.distance { 190 t.Errorf("got %q; want %q", d, tc.distance) 191 } 192 } 193 } 194 195 func TestIsParadigmLocale(t *testing.T) { 196 testCases := map[string]bool{ 197 "en-US": true, 198 "en-GB": true, 199 "en-VI": false, 200 "es-GB": false, 201 "es-ES": true, 202 "es-419": true, 203 } 204 for str, want := range testCases { 205 tt := Make(str) 206 tag := tt.tag() 207 got := isParadigmLocale(tag.LangID, tag.RegionID) 208 if got != want { 209 t.Errorf("isPL(%q) = %v; want %v", str, got, want) 210 } 211 } 212 } 213 214 // Implementation of String methods for various types for debugging purposes. 215 216 func (m *matcher) String() string { 217 w := &bytes.Buffer{} 218 fmt.Fprintln(w, "Default:", m.default_) 219 for tag, h := range m.index { 220 fmt.Fprintf(w, " %s: %v\n", tag, h) 221 } 222 return w.String() 223 } 224 225 func (h *matchHeader) String() string { 226 w := &bytes.Buffer{} 227 fmt.Fprint(w, "haveTag: ") 228 for _, h := range h.haveTags { 229 fmt.Fprintf(w, "%v, ", h) 230 } 231 return w.String() 232 } 233 234 func (t haveTag) String() string { 235 return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript) 236 } 237 238 func TestIssue43834(t *testing.T) { 239 matcher := NewMatcher([]Tag{English}) 240 241 // ZZ is the largest region code and should not cause overflow. 242 desired, _, err := ParseAcceptLanguage("en-ZZ") 243 if err != nil { 244 t.Error(err) 245 } 246 _, i, _ := matcher.Match(desired...) 247 if i != 0 { 248 t.Errorf("got %v; want 0", i) 249 } 250 } 251 252 func TestBestMatchAlloc(t *testing.T) { 253 m := NewMatcher(makeTagList("en sr nl")) 254 // Go allocates when creating a list of tags from a single tag! 255 list := []Tag{English} 256 avg := testtext.AllocsPerRun(100, func() { 257 m.Match(list...) 258 }) 259 if avg > 0 { 260 t.Errorf("got %f; want 0", avg) 261 } 262 } 263 264 var benchHave = []Tag{ 265 mk("en"), 266 mk("en-GB"), 267 mk("za"), 268 mk("zh-Hant"), 269 mk("zh-Hans-CN"), 270 mk("zh"), 271 mk("zh-HK"), 272 mk("ar-MK"), 273 mk("en-CA"), 274 mk("fr-CA"), 275 mk("fr-US"), 276 mk("fr-CH"), 277 mk("fr"), 278 mk("lt"), 279 mk("lv"), 280 mk("iw"), 281 mk("iw-NL"), 282 mk("he"), 283 mk("he-IT"), 284 mk("tlh"), 285 mk("ja"), 286 mk("ja-Jpan"), 287 mk("ja-Jpan-JP"), 288 mk("de"), 289 mk("de-CH"), 290 mk("de-AT"), 291 mk("de-DE"), 292 mk("sr"), 293 mk("sr-Latn"), 294 mk("sr-Cyrl"), 295 mk("sr-ME"), 296 } 297 298 var benchWant = [][]Tag{ 299 []Tag{ 300 mk("en"), 301 }, 302 []Tag{ 303 mk("en-AU"), 304 mk("de-HK"), 305 mk("nl"), 306 mk("fy"), 307 mk("lv"), 308 }, 309 []Tag{ 310 mk("en-AU"), 311 mk("de-HK"), 312 mk("nl"), 313 mk("fy"), 314 }, 315 []Tag{ 316 mk("ja-Hant"), 317 mk("da-HK"), 318 mk("nl"), 319 mk("zh-TW"), 320 }, 321 []Tag{ 322 mk("ja-Hant"), 323 mk("da-HK"), 324 mk("nl"), 325 mk("hr"), 326 }, 327 } 328 329 func BenchmarkMatch(b *testing.B) { 330 m := newMatcher(benchHave, nil) 331 for i := 0; i < b.N; i++ { 332 for _, want := range benchWant { 333 m.getBest(want...) 334 } 335 } 336 } 337 338 func BenchmarkMatchExact(b *testing.B) { 339 want := mk("en") 340 m := newMatcher(benchHave, nil) 341 for i := 0; i < b.N; i++ { 342 m.getBest(want) 343 } 344 } 345 346 func BenchmarkMatchAltLanguagePresent(b *testing.B) { 347 want := mk("hr") 348 m := newMatcher(benchHave, nil) 349 for i := 0; i < b.N; i++ { 350 m.getBest(want) 351 } 352 } 353 354 func BenchmarkMatchAltLanguageNotPresent(b *testing.B) { 355 want := mk("nn") 356 m := newMatcher(benchHave, nil) 357 for i := 0; i < b.N; i++ { 358 m.getBest(want) 359 } 360 } 361 362 func BenchmarkMatchAltScriptPresent(b *testing.B) { 363 want := mk("zh-Hant-CN") 364 m := newMatcher(benchHave, nil) 365 for i := 0; i < b.N; i++ { 366 m.getBest(want) 367 } 368 } 369 370 func BenchmarkMatchAltScriptNotPresent(b *testing.B) { 371 want := mk("fr-Cyrl") 372 m := newMatcher(benchHave, nil) 373 for i := 0; i < b.N; i++ { 374 m.getBest(want) 375 } 376 } 377 378 func BenchmarkMatchLimitedExact(b *testing.B) { 379 want := []Tag{mk("he-NL"), mk("iw-NL")} 380 m := newMatcher(benchHave, nil) 381 for i := 0; i < b.N; i++ { 382 m.getBest(want...) 383 } 384 }