golang.org/x/text@v0.14.0/internal/language/lookup_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package language 6 7 import ( 8 "testing" 9 10 "golang.org/x/text/internal/tag" 11 ) 12 13 func b(s string) []byte { 14 return []byte(s) 15 } 16 17 func TestLangID(t *testing.T) { 18 tests := []struct { 19 id, bcp47, iso3, norm string 20 err error 21 }{ 22 {id: "", bcp47: "und", iso3: "und", err: ErrSyntax}, 23 {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax}, 24 {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax}, 25 {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax}, 26 {id: "xxx", bcp47: "und", iso3: "und", err: NewValueError([]byte("xxx"))}, 27 {id: "und", bcp47: "und", iso3: "und"}, 28 {id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"}, 29 {id: "jrb", bcp47: "jrb", iso3: "jrb"}, 30 {id: "es", bcp47: "es", iso3: "spa"}, 31 {id: "spa", bcp47: "es", iso3: "spa"}, 32 {id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"}, 33 {id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"}, 34 {id: "ar", bcp47: "ar", iso3: "ara"}, 35 {id: "kw", bcp47: "kw", iso3: "cor"}, 36 {id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"}, 37 {id: "ar", bcp47: "ar", iso3: "ara"}, 38 {id: "kur", bcp47: "ku", iso3: "kur"}, 39 {id: "nl", bcp47: "nl", iso3: "nld"}, 40 {id: "NL", bcp47: "nl", iso3: "nld"}, 41 {id: "gsw", bcp47: "gsw", iso3: "gsw"}, 42 {id: "gSW", bcp47: "gsw", iso3: "gsw"}, 43 {id: "und", bcp47: "und", iso3: "und"}, 44 {id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"}, 45 {id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"}, 46 {id: "no", bcp47: "no", iso3: "nor", norm: "no"}, 47 {id: "nor", bcp47: "no", iso3: "nor", norm: "no"}, 48 {id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"}, 49 } 50 for i, tt := range tests { 51 want, err := getLangID(b(tt.id)) 52 if err != tt.err { 53 t.Errorf("%d:err(%s): found %q; want %q", i, tt.id, err, tt.err) 54 } 55 if err != nil { 56 continue 57 } 58 if id, _ := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id { 59 t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want) 60 } 61 if len(tt.iso3) == 3 { 62 if id, _ := getLangISO3(b(tt.iso3)); want != id { 63 t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want) 64 } 65 if id, _ := getLangID(b(tt.iso3)); want != id { 66 t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want) 67 } 68 } 69 norm := want 70 if tt.norm != "" { 71 norm, _ = getLangID(b(tt.norm)) 72 } 73 id, _ := normLang(want) 74 if id != norm { 75 t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm) 76 } 77 if id := want.String(); tt.bcp47 != id { 78 t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47) 79 } 80 if id := want.ISO3(); tt.iso3[:3] != id { 81 t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3]) 82 } 83 } 84 } 85 86 func TestGrandfathered(t *testing.T) { 87 for _, tt := range []struct{ in, out string }{ 88 {"art-lojban", "jbo"}, 89 {"i-ami", "ami"}, 90 {"i-bnn", "bnn"}, 91 {"i-hak", "hak"}, 92 {"i-klingon", "tlh"}, 93 {"i-lux", "lb"}, 94 {"i-navajo", "nv"}, 95 {"i-pwn", "pwn"}, 96 {"i-tao", "tao"}, 97 {"i-tay", "tay"}, 98 {"i-tsu", "tsu"}, 99 {"no-bok", "nb"}, 100 {"no-nyn", "nn"}, 101 {"sgn-BE-FR", "sfb"}, 102 {"sgn-BE-NL", "vgt"}, 103 {"sgn-CH-DE", "sgg"}, 104 {"sgn-ch-de", "sgg"}, 105 {"zh-guoyu", "cmn"}, 106 {"zh-hakka", "hak"}, 107 {"zh-min-nan", "nan"}, 108 {"zh-xiang", "hsn"}, 109 110 // Grandfathered tags with no modern replacement will be converted as follows: 111 {"cel-gaulish", "xtg-x-cel-gaulish"}, 112 {"en-GB-oed", "en-GB-oxendict"}, 113 {"en-gb-oed", "en-GB-oxendict"}, 114 {"i-default", "en-x-i-default"}, 115 {"i-enochian", "und-x-i-enochian"}, 116 {"i-mingo", "see-x-i-mingo"}, 117 {"zh-min", "nan-x-zh-min"}, 118 119 {"root", "und"}, 120 {"en_US_POSIX", "en-US-u-va-posix"}, 121 {"en_us_posix", "en-US-u-va-posix"}, 122 {"en-us-posix", "en-US-u-va-posix"}, 123 } { 124 got := Make(tt.in) 125 want := MustParse(tt.out) 126 if got != want { 127 t.Errorf("%s: got %q; want %q", tt.in, got, want) 128 } 129 } 130 } 131 132 func TestRegionID(t *testing.T) { 133 tests := []struct { 134 in, out string 135 }{ 136 {"_ ", ""}, 137 {"_000", ""}, 138 {"419", "419"}, 139 {"AA", "AA"}, 140 {"ATF", "TF"}, 141 {"HV", "HV"}, 142 {"CT", "CT"}, 143 {"DY", "DY"}, 144 {"IC", "IC"}, 145 {"FQ", "FQ"}, 146 {"JT", "JT"}, 147 {"ZZ", "ZZ"}, 148 {"EU", "EU"}, 149 {"QO", "QO"}, 150 {"FX", "FX"}, 151 } 152 for i, tt := range tests { 153 if tt.in[0] == '_' { 154 id := tt.in[1:] 155 if _, err := getRegionID(b(id)); err == nil { 156 t.Errorf("%d:err(%s): found nil; want error", i, id) 157 } 158 continue 159 } 160 want, _ := getRegionID(b(tt.in)) 161 if s := want.String(); s != tt.out { 162 t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out) 163 } 164 if len(tt.in) == 2 { 165 want, _ := getRegionISO2(b(tt.in)) 166 if s := want.String(); s != tt.out { 167 t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out) 168 } 169 } 170 } 171 } 172 173 func TestRegionType(t *testing.T) { 174 for _, tt := range []struct { 175 r string 176 t byte 177 }{ 178 {"NL", bcp47Region | ccTLD}, 179 {"EU", bcp47Region | ccTLD}, // exceptionally reserved 180 {"AN", bcp47Region | ccTLD}, // transitionally reserved 181 182 {"DD", bcp47Region}, // deleted in ISO, deprecated in BCP 47 183 {"NT", bcp47Region}, // transitionally reserved, deprecated in BCP 47 184 185 {"XA", iso3166UserAssigned | bcp47Region}, 186 {"ZZ", iso3166UserAssigned | bcp47Region}, 187 {"AA", iso3166UserAssigned | bcp47Region}, 188 {"QO", iso3166UserAssigned | bcp47Region}, 189 {"QM", iso3166UserAssigned | bcp47Region}, 190 {"XK", iso3166UserAssigned | bcp47Region}, 191 192 {"CT", 0}, // deleted in ISO, not in BCP 47, canonicalized in CLDR 193 } { 194 r := MustParseRegion(tt.r) 195 if tp := r.typ(); tp != tt.t { 196 t.Errorf("Type(%s): got %x; want %x", tt.r, tp, tt.t) 197 } 198 } 199 } 200 201 func TestRegionISO3(t *testing.T) { 202 tests := []struct { 203 from, iso3, to string 204 }{ 205 {" ", "ZZZ", "ZZ"}, 206 {"000", "ZZZ", "ZZ"}, 207 {"AA", "AAA", ""}, 208 {"CT", "CTE", ""}, 209 {"DY", "DHY", ""}, 210 {"EU", "QUU", ""}, 211 {"HV", "HVO", ""}, 212 {"IC", "ZZZ", "ZZ"}, 213 {"JT", "JTN", ""}, 214 {"PZ", "PCZ", ""}, 215 {"QU", "QUU", "EU"}, 216 {"QO", "QOO", ""}, 217 {"YD", "YMD", ""}, 218 {"FQ", "ATF", "TF"}, 219 {"TF", "ATF", ""}, 220 {"FX", "FXX", ""}, 221 {"ZZ", "ZZZ", ""}, 222 {"419", "ZZZ", "ZZ"}, 223 } 224 for _, tt := range tests { 225 r, _ := getRegionID(b(tt.from)) 226 if s := r.ISO3(); s != tt.iso3 { 227 t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3) 228 } 229 if tt.iso3 == "" { 230 continue 231 } 232 want := tt.to 233 if tt.to == "" { 234 want = tt.from 235 } 236 r, _ = getRegionID(b(want)) 237 if id, _ := getRegionISO3(b(tt.iso3)); id != r { 238 t.Errorf("%s: found %q; want %q", tt.iso3, id, want) 239 } 240 } 241 } 242 243 func TestRegionM49(t *testing.T) { 244 fromTests := []struct { 245 m49 int 246 id string 247 }{ 248 {0, ""}, 249 {-1, ""}, 250 {1000, ""}, 251 {10000, ""}, 252 253 {001, "001"}, 254 {104, "MM"}, 255 {180, "CD"}, 256 {230, "ET"}, 257 {231, "ET"}, 258 {249, "FX"}, 259 {250, "FR"}, 260 {276, "DE"}, 261 {278, "DD"}, 262 {280, "DE"}, 263 {419, "419"}, 264 {626, "TL"}, 265 {736, "SD"}, 266 {840, "US"}, 267 {854, "BF"}, 268 {891, "CS"}, 269 {899, ""}, 270 {958, "AA"}, 271 {966, "QT"}, 272 {967, "EU"}, 273 {999, "ZZ"}, 274 } 275 for _, tt := range fromTests { 276 id, err := getRegionM49(tt.m49) 277 if want, have := err != nil, tt.id == ""; want != have { 278 t.Errorf("error(%d): have %v; want %v", tt.m49, have, want) 279 continue 280 } 281 r, _ := getRegionID(b(tt.id)) 282 if r != id { 283 t.Errorf("region(%d): have %s; want %s", tt.m49, id, r) 284 } 285 } 286 287 toTests := []struct { 288 m49 int 289 id string 290 }{ 291 {0, "000"}, 292 {0, "IC"}, // Some codes don't have an ID 293 294 {001, "001"}, 295 {104, "MM"}, 296 {104, "BU"}, 297 {180, "CD"}, 298 {180, "ZR"}, 299 {231, "ET"}, 300 {250, "FR"}, 301 {249, "FX"}, 302 {276, "DE"}, 303 {278, "DD"}, 304 {419, "419"}, 305 {626, "TL"}, 306 {626, "TP"}, 307 {729, "SD"}, 308 {826, "GB"}, 309 {840, "US"}, 310 {854, "BF"}, 311 {891, "YU"}, 312 {891, "CS"}, 313 {958, "AA"}, 314 {966, "QT"}, 315 {967, "EU"}, 316 {967, "QU"}, 317 {999, "ZZ"}, 318 // For codes that don't have an M49 code use the replacement value, 319 // if available. 320 {854, "HV"}, // maps to Burkino Faso 321 } 322 for _, tt := range toTests { 323 r, _ := getRegionID(b(tt.id)) 324 if r.M49() != tt.m49 { 325 t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49) 326 } 327 } 328 } 329 330 func TestRegionDeprecation(t *testing.T) { 331 tests := []struct{ in, out string }{ 332 {"BU", "MM"}, 333 {"BUR", "MM"}, 334 {"CT", "KI"}, 335 {"DD", "DE"}, 336 {"DDR", "DE"}, 337 {"DY", "BJ"}, 338 {"FX", "FR"}, 339 {"HV", "BF"}, 340 {"JT", "UM"}, 341 {"MI", "UM"}, 342 {"NH", "VU"}, 343 {"NQ", "AQ"}, 344 {"PU", "UM"}, 345 {"PZ", "PA"}, 346 {"QU", "EU"}, 347 {"RH", "ZW"}, 348 {"TP", "TL"}, 349 {"UK", "GB"}, 350 {"VD", "VN"}, 351 {"WK", "UM"}, 352 {"YD", "YE"}, 353 {"NL", "NL"}, 354 } 355 for _, tt := range tests { 356 rIn, _ := getRegionID([]byte(tt.in)) 357 rOut, _ := getRegionISO2([]byte(tt.out)) 358 r := normRegion(rIn) 359 if rOut == rIn && r != 0 { 360 t.Errorf("%s: was %q; want %q", tt.in, r, tt.in) 361 } 362 if rOut != rIn && r != rOut { 363 t.Errorf("%s: was %q; want %q", tt.in, r, tt.out) 364 } 365 366 } 367 } 368 369 func TestGetScriptID(t *testing.T) { 370 idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff") 371 tests := []struct { 372 in string 373 out Script 374 }{ 375 {" ", 0}, 376 {" ", 0}, 377 {" ", 0}, 378 {"", 0}, 379 {"Aaaa", 0}, 380 {"Bbbb", 1}, 381 {"Dddd", 2}, 382 {"dddd", 2}, 383 {"dDDD", 2}, 384 {"Eeee", 3}, 385 {"Zzzz", 4}, 386 } 387 for i, tt := range tests { 388 if id, err := getScriptID(idx, b(tt.in)); id != tt.out { 389 t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out) 390 } else if id == 0 && err == nil { 391 t.Errorf("%d:%s: no error; expected one", i, tt.in) 392 } 393 } 394 } 395 396 func TestIsPrivateUse(t *testing.T) { 397 type test struct { 398 s string 399 private bool 400 } 401 tests := []test{ 402 {"en", false}, 403 {"und", false}, 404 {"pzn", false}, 405 {"qaa", true}, 406 {"qtz", true}, 407 {"qua", false}, 408 } 409 for i, tt := range tests { 410 x, _ := getLangID([]byte(tt.s)) 411 if b := x.IsPrivateUse(); b != tt.private { 412 t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private) 413 } 414 } 415 tests = []test{ 416 {"001", false}, 417 {"419", false}, 418 {"899", false}, 419 {"900", false}, 420 {"957", false}, 421 {"958", true}, 422 {"AA", true}, 423 {"AC", false}, 424 {"EU", false}, // CLDR grouping, exceptionally reserved in ISO. 425 {"QU", true}, // Canonicalizes to EU, User-assigned in ISO. 426 {"QO", true}, // CLDR grouping, User-assigned in ISO. 427 {"QA", false}, 428 {"QM", true}, 429 {"QZ", true}, 430 {"XA", true}, 431 {"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO. 432 {"XZ", true}, 433 {"ZW", false}, 434 {"ZZ", true}, 435 } 436 for i, tt := range tests { 437 x, _ := getRegionID([]byte(tt.s)) 438 if b := x.IsPrivateUse(); b != tt.private { 439 t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private) 440 } 441 } 442 tests = []test{ 443 {"Latn", false}, 444 {"Laaa", false}, // invalid 445 {"Qaaa", true}, 446 {"Qabx", true}, 447 {"Qaby", false}, 448 {"Zyyy", false}, 449 {"Zzzz", false}, 450 } 451 for i, tt := range tests { 452 x, _ := getScriptID(script, []byte(tt.s)) 453 if b := x.IsPrivateUse(); b != tt.private { 454 t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private) 455 } 456 } 457 }