github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/language/match_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package language 6 7 import ( 8 "bytes" 9 "flag" 10 "fmt" 11 "strings" 12 "testing" 13 ) 14 15 var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers") 16 17 func TestAddLikelySubtags(t *testing.T) { 18 tests := []struct{ in, out string }{ 19 {"aa", "aa-Latn-ET"}, 20 {"aa-Latn", "aa-Latn-ET"}, 21 {"aa-Arab", "aa-Arab-ET"}, 22 {"aa-Arab-ER", "aa-Arab-ER"}, 23 {"kk", "kk-Cyrl-KZ"}, 24 {"kk-CN", "kk-Arab-CN"}, 25 {"cmn", "cmn"}, 26 {"zh-AU", "zh-Hant-AU"}, 27 {"zh-VN", "zh-Hant-VN"}, 28 {"zh-SG", "zh-Hans-SG"}, 29 {"zh-Hant", "zh-Hant-TW"}, 30 {"zh-Hani", "zh-Hani-CN"}, 31 {"und-Hani", "zh-Hani-CN"}, 32 {"und", "en-Latn-US"}, 33 {"und-GB", "en-Latn-GB"}, 34 {"und-CW", "pap-Latn-CW"}, 35 {"und-YT", "fr-Latn-YT"}, 36 {"und-Arab", "ar-Arab-EG"}, 37 {"und-AM", "hy-Armn-AM"}, 38 {"und-002", "en-Latn-NG"}, 39 {"und-Latn-002", "en-Latn-NG"}, 40 {"en-Latn-002", "en-Latn-NG"}, 41 {"en-002", "en-Latn-NG"}, 42 {"en-001", "en-Latn-US"}, 43 {"und-003", "en-Latn-US"}, 44 {"und-GB", "en-Latn-GB"}, 45 {"Latn-001", "en-Latn-US"}, 46 {"en-001", "en-Latn-US"}, 47 {"es-419", "es-Latn-419"}, 48 {"he-145", "he-Hebr-IL"}, 49 {"ky-145", "ky-Latn-TR"}, 50 {"kk", "kk-Cyrl-KZ"}, 51 // Don't specialize duplicate and ambiguous matches. 52 {"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab. 53 {"ku-145", "ku-Latn-TR"}, // Matches IQ, TR, and LB, but kk -> TR. 54 {"und-Arab-CC", "ms-Arab-CC"}, 55 {"und-Arab-GB", "ks-Arab-GB"}, 56 {"und-Hans-CC", "zh-Hans-CC"}, 57 {"und-CC", "en-Latn-CC"}, 58 {"sr", "sr-Cyrl-RS"}, 59 {"sr-151", "sr-Latn-151"}, // Matches RO and RU. 60 // We would like addLikelySubtags to generate the same results if the input 61 // only changes by adding tags that would otherwise have been added 62 // by the expansion. 63 // In other words: 64 // und-AA -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA 65 // und-AA -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA 66 // und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA 67 // und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA 68 // xx -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA 69 // xx -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA 70 // 71 // The algorithm specified in 72 // http://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data, 73 // Section C.10, does not handle the first case. For example, 74 // the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not 75 // there is no rule for und-Latn-BJ. According to spec, und-Latn-BJ 76 // would expand to en-Latn-BJ, violating the aforementioned principle. 77 // We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA 78 // if a rule of the form und-AA -> xx-Scrp-AA is defined. 79 // Note that as of version 23, CLDR has some explicitly specified 80 // entries that do not conform to these rules. The implementation 81 // will not correct these explicit inconsistencies. A later versions of CLDR 82 // is supposed to fix this. 83 {"und-Latn-BJ", "fr-Latn-BJ"}, 84 {"und-Bugi-ID", "bug-Bugi-ID"}, 85 // regions, scripts and languages without definitions 86 {"und-Arab-AA", "ar-Arab-AA"}, 87 {"und-Afak-RE", "fr-Afak-RE"}, 88 {"und-Arab-GB", "ks-Arab-GB"}, 89 {"abp-Arab-GB", "abp-Arab-GB"}, 90 // script has preference over region 91 {"und-Arab-NL", "ar-Arab-NL"}, 92 {"zza", "zza-Latn-TR"}, 93 // preserve variants and extensions 94 {"de-1901", "de-Latn-DE-1901"}, 95 {"de-x-abc", "de-Latn-DE-x-abc"}, 96 {"de-1901-x-abc", "de-Latn-DE-1901-x-abc"}, 97 {"x-abc", "x-abc"}, // TODO: is this the desired behavior? 98 } 99 for i, tt := range tests { 100 in, _ := Parse(tt.in) 101 out, _ := Parse(tt.out) 102 in, _ = in.addLikelySubtags() 103 if in.String() != out.String() { 104 t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out) 105 } 106 } 107 } 108 func TestMinimize(t *testing.T) { 109 tests := []struct{ in, out string }{ 110 {"aa", "aa"}, 111 {"aa-Latn", "aa"}, 112 {"aa-Latn-ET", "aa"}, 113 {"aa-ET", "aa"}, 114 {"aa-Arab", "aa-Arab"}, 115 {"aa-Arab-ER", "aa-Arab-ER"}, 116 {"aa-Arab-ET", "aa-Arab"}, 117 {"und", "und"}, 118 {"und-Latn", "und"}, 119 {"und-Latn-US", "und"}, 120 {"en-Latn-US", "en"}, 121 {"cmn", "cmn"}, 122 {"cmn-Hans", "cmn-Hans"}, 123 {"cmn-Hant", "cmn-Hant"}, 124 {"zh-AU", "zh-AU"}, 125 {"zh-VN", "zh-VN"}, 126 {"zh-SG", "zh-SG"}, 127 {"zh-Hant", "zh-Hant"}, 128 {"zh-Hant-TW", "zh-TW"}, 129 {"zh-Hans", "zh"}, 130 {"zh-Hani", "zh-Hani"}, 131 {"und-Hans", "und-Hans"}, 132 {"und-Hani", "und-Hani"}, 133 134 {"und-CW", "und-CW"}, 135 {"und-YT", "und-YT"}, 136 {"und-Arab", "und-Arab"}, 137 {"und-AM", "und-AM"}, 138 {"und-Arab-CC", "und-Arab-CC"}, 139 {"und-CC", "und-CC"}, 140 {"und-Latn-BJ", "und-BJ"}, 141 {"und-Bugi-ID", "und-Bugi"}, 142 {"bug-Bugi-ID", "bug-Bugi"}, 143 // regions, scripts and languages without definitions 144 {"und-Arab-AA", "und-Arab-AA"}, 145 // preserve variants and extensions 146 {"de-Latn-1901", "de-1901"}, 147 {"de-Latn-x-abc", "de-x-abc"}, 148 {"de-DE-1901-x-abc", "de-1901-x-abc"}, 149 {"x-abc", "x-abc"}, // TODO: is this the desired behavior? 150 } 151 for i, tt := range tests { 152 in, _ := Parse(tt.in) 153 out, _ := Parse(tt.out) 154 min, _ := in.minimize() 155 if min.String() != out.String() { 156 t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out) 157 } 158 max, _ := min.addLikelySubtags() 159 if x, _ := in.addLikelySubtags(); x.String() != max.String() { 160 t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x) 161 } 162 } 163 } 164 165 func TestRegionDistance(t *testing.T) { 166 tests := []struct { 167 a, b string 168 d int 169 }{ 170 {"NL", "NL", 0}, 171 {"NL", "EU", 1}, 172 {"EU", "NL", 1}, 173 {"005", "005", 0}, 174 {"NL", "BE", 2}, 175 {"CO", "005", 1}, 176 {"005", "CO", 1}, 177 {"CO", "419", 2}, 178 {"419", "CO", 2}, 179 {"005", "419", 1}, 180 {"419", "005", 1}, 181 {"001", "013", 2}, 182 {"013", "001", 2}, 183 {"CO", "CW", 4}, 184 {"CO", "PW", 6}, 185 {"CO", "BV", 6}, 186 {"ZZ", "QQ", 2}, 187 } 188 for i, tt := range tests { 189 ra, _ := getRegionID([]byte(tt.a)) 190 rb, _ := getRegionID([]byte(tt.b)) 191 if d := regionDistance(ra, rb); d != tt.d { 192 t.Errorf("%d: d(%s, %s) = %v; want %v", i, tt.a, tt.b, d, tt.d) 193 } 194 } 195 } 196 197 func TestParentDistance(t *testing.T) { 198 tests := []struct { 199 parent string 200 tag string 201 d uint8 202 }{ 203 {"en-001", "en-AU", 1}, 204 {"pt-PT", "pt-AO", 1}, 205 {"pt", "pt-AO", 2}, 206 {"en-AU", "en-GB", 255}, 207 {"en-NL", "en-AU", 255}, 208 // Note that pt-BR and en-US are not automatically minimized. 209 {"pt-BR", "pt-AO", 255}, 210 {"en-US", "en-AU", 255}, 211 } 212 for _, tt := range tests { 213 r := Raw.MustParse(tt.parent).region 214 tag := Raw.MustParse(tt.tag) 215 if d := parentDistance(r, tag); d != tt.d { 216 t.Errorf("d(%s, %s) was %d; want %d", r, tag, d, tt.d) 217 } 218 } 219 } 220 221 // Implementation of String methods for various types for debugging purposes. 222 223 func (m *matcher) String() string { 224 w := &bytes.Buffer{} 225 fmt.Fprintln(w, "Default:", m.default_) 226 for tag, h := range m.index { 227 fmt.Fprintf(w, " %s: %v\n", tag, h) 228 } 229 return w.String() 230 } 231 232 func (h *matchHeader) String() string { 233 w := &bytes.Buffer{} 234 fmt.Fprintf(w, "exact: ") 235 for _, h := range h.exact { 236 fmt.Fprintf(w, "%v, ", h) 237 } 238 fmt.Fprint(w, "; max: ") 239 for _, h := range h.max { 240 fmt.Fprintf(w, "%v, ", h) 241 } 242 return w.String() 243 } 244 245 func (t haveTag) String() string { 246 return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript) 247 } 248 249 // The test set for TestBestMatch is defined in data_test.go. 250 func TestBestMatch(t *testing.T) { 251 parse := func(list string) (out []Tag) { 252 for _, s := range strings.Split(list, ",") { 253 out = append(out, mk(strings.TrimSpace(s))) 254 } 255 return out 256 } 257 for i, tt := range matchTests { 258 supported := parse(tt.supported) 259 m := newMatcher(supported) 260 if *verbose { 261 fmt.Printf("%s:\n%v\n", tt.comment, m) 262 } 263 for _, tm := range tt.test { 264 tag, _, conf := m.Match(parse(tm.desired)...) 265 if tag.String() != tm.match { 266 t.Errorf("%d:%s: find %s in %q: have %s; want %s (%v)\n", i, tt.comment, tm.desired, tt.supported, tag, tm.match, conf) 267 } 268 } 269 } 270 } 271 272 var benchHave = []Tag{ 273 mk("en"), 274 mk("en-GB"), 275 mk("za"), 276 mk("zh-Hant"), 277 mk("zh-Hans-CN"), 278 mk("zh"), 279 mk("zh-HK"), 280 mk("ar-MK"), 281 mk("en-CA"), 282 mk("fr-CA"), 283 mk("fr-US"), 284 mk("fr-CH"), 285 mk("fr"), 286 mk("lt"), 287 mk("lv"), 288 mk("iw"), 289 mk("iw-NL"), 290 mk("he"), 291 mk("he-IT"), 292 mk("tlh"), 293 mk("ja"), 294 mk("ja-Jpan"), 295 mk("ja-Jpan-JP"), 296 mk("de"), 297 mk("de-CH"), 298 mk("de-AT"), 299 mk("de-DE"), 300 mk("sr"), 301 mk("sr-Latn"), 302 mk("sr-Cyrl"), 303 mk("sr-ME"), 304 } 305 306 var benchWant = [][]Tag{ 307 []Tag{ 308 mk("en"), 309 }, 310 []Tag{ 311 mk("en-AU"), 312 mk("de-HK"), 313 mk("nl"), 314 mk("fy"), 315 mk("lv"), 316 }, 317 []Tag{ 318 mk("en-AU"), 319 mk("de-HK"), 320 mk("nl"), 321 mk("fy"), 322 }, 323 []Tag{ 324 mk("ja-Hant"), 325 mk("da-HK"), 326 mk("nl"), 327 mk("zh-TW"), 328 }, 329 []Tag{ 330 mk("ja-Hant"), 331 mk("da-HK"), 332 mk("nl"), 333 mk("hr"), 334 }, 335 } 336 337 func BenchmarkMatch(b *testing.B) { 338 m := newMatcher(benchHave) 339 for i := 0; i < b.N; i++ { 340 for _, want := range benchWant { 341 m.getBest(want...) 342 } 343 } 344 } 345 346 func BenchmarkMatchExact(b *testing.B) { 347 want := mk("en") 348 m := newMatcher(benchHave) 349 for i := 0; i < b.N; i++ { 350 m.getBest(want) 351 } 352 } 353 354 func BenchmarkMatchAltLanguagePresent(b *testing.B) { 355 want := mk("hr") 356 m := newMatcher(benchHave) 357 for i := 0; i < b.N; i++ { 358 m.getBest(want) 359 } 360 } 361 362 func BenchmarkMatchAltLanguageNotPresent(b *testing.B) { 363 want := mk("nn") 364 m := newMatcher(benchHave) 365 for i := 0; i < b.N; i++ { 366 m.getBest(want) 367 } 368 } 369 370 func BenchmarkMatchAltScriptPresent(b *testing.B) { 371 want := mk("zh-Hant-CN") 372 m := newMatcher(benchHave) 373 for i := 0; i < b.N; i++ { 374 m.getBest(want) 375 } 376 } 377 378 func BenchmarkMatchAltScriptNotPresent(b *testing.B) { 379 want := mk("fr-Cyrl") 380 m := newMatcher(benchHave) 381 for i := 0; i < b.N; i++ { 382 m.getBest(want) 383 } 384 } 385 386 func BenchmarkMatchLimitedExact(b *testing.B) { 387 want := []Tag{mk("he-NL"), mk("iw-NL")} 388 m := newMatcher(benchHave) 389 for i := 0; i < b.N; i++ { 390 m.getBest(want...) 391 } 392 }