github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/language/parse_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package language 6 7 import ( 8 "bytes" 9 "strings" 10 "testing" 11 12 "golang.org/x/text/internal/tag" 13 ) 14 15 type scanTest struct { 16 ok bool // true if scanning does not result in an error 17 in string 18 tok []string // the expected tokens 19 } 20 21 var tests = []scanTest{ 22 {true, "", []string{}}, 23 {true, "1", []string{"1"}}, 24 {true, "en", []string{"en"}}, 25 {true, "root", []string{"root"}}, 26 {true, "maxchars", []string{"maxchars"}}, 27 {false, "bad/", []string{}}, 28 {false, "morethan8", []string{}}, 29 {false, "-", []string{}}, 30 {false, "----", []string{}}, 31 {false, "_", []string{}}, 32 {true, "en-US", []string{"en", "US"}}, 33 {true, "en_US", []string{"en", "US"}}, 34 {false, "en-US-", []string{"en", "US"}}, 35 {false, "en-US--", []string{"en", "US"}}, 36 {false, "en-US---", []string{"en", "US"}}, 37 {false, "en--US", []string{"en", "US"}}, 38 {false, "-en-US", []string{"en", "US"}}, 39 {false, "-en--US-", []string{"en", "US"}}, 40 {false, "-en--US-", []string{"en", "US"}}, 41 {false, "en-.-US", []string{"en", "US"}}, 42 {false, ".-en--US-.", []string{"en", "US"}}, 43 {false, "en-u.-US", []string{"en", "US"}}, 44 {true, "en-u1-US", []string{"en", "u1", "US"}}, 45 {true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}}, 46 {false, "moreThan8-moreThan8-e", []string{"e"}}, 47 } 48 49 func TestScan(t *testing.T) { 50 for i, tt := range tests { 51 scan := makeScannerString(tt.in) 52 for j := 0; !scan.done; j++ { 53 if j >= len(tt.tok) { 54 t.Errorf("%d: extra token %q", i, scan.token) 55 } else if tag.Compare(tt.tok[j], scan.token) != 0 { 56 t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j]) 57 break 58 } 59 scan.scan() 60 } 61 if s := strings.Join(tt.tok, "-"); tag.Compare(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 { 62 t.Errorf("%d: input: found %q; want %q", i, scan.b, s) 63 } 64 if (scan.err == nil) != tt.ok { 65 t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok) 66 } 67 } 68 } 69 70 func TestAcceptMinSize(t *testing.T) { 71 for i, tt := range tests { 72 // count number of successive tokens with a minimum size. 73 for sz := 1; sz <= 8; sz++ { 74 scan := makeScannerString(tt.in) 75 scan.end, scan.next = 0, 0 76 end := scan.acceptMinSize(sz) 77 n := 0 78 for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ { 79 n += len(tt.tok[i]) 80 if i > 0 { 81 n++ 82 } 83 } 84 if end != n { 85 t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n) 86 } 87 } 88 } 89 } 90 91 type parseTest struct { 92 i int // the index of this test 93 in string 94 lang, script, region string 95 variants, ext string 96 extList []string // only used when more than one extension is present 97 invalid bool 98 rewrite bool // special rewrite not handled by parseTag 99 changed bool // string needed to be reformatted 100 } 101 102 func parseTests() []parseTest { 103 tests := []parseTest{ 104 {in: "root", lang: "und"}, 105 {in: "und", lang: "und"}, 106 {in: "en", lang: "en"}, 107 {in: "xy", lang: "und", invalid: true}, 108 {in: "en-ZY", lang: "en", invalid: true}, 109 {in: "gsw", lang: "gsw"}, 110 {in: "sr_Latn", lang: "sr", script: "Latn"}, 111 {in: "af-Arab", lang: "af", script: "Arab"}, 112 {in: "nl-BE", lang: "nl", region: "BE"}, 113 {in: "es-419", lang: "es", region: "419"}, 114 {in: "und-001", lang: "und", region: "001"}, 115 {in: "de-latn-be", lang: "de", script: "Latn", region: "BE"}, 116 // Variants 117 {in: "de-1901", lang: "de", variants: "1901"}, 118 // Accept with unsuppressed script. 119 {in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"}, 120 // Specialized. 121 {in: "sl-rozaj", lang: "sl", variants: "rozaj"}, 122 {in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"}, 123 {in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"}, 124 {in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"}, 125 {in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"}, 126 // Maximum number of variants while adhering to prefix rules. 127 {in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"}, 128 129 // Sorting. 130 {in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true}, 131 {in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true}, 132 {in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true}, 133 134 // Duplicates variants are removed, but not an error. 135 {in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"}, 136 137 // Variants that do not have correct prefixes. We still accept these. 138 {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"}, 139 {in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"}, 140 {in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true}, 141 {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"}, 142 143 // Invalid variant. 144 {in: "de-1902", lang: "de", variants: "", invalid: true}, 145 146 {in: "EN_CYRL", lang: "en", script: "Cyrl"}, 147 // private use and extensions 148 {in: "x-a-b-c-d", ext: "x-a-b-c-d"}, 149 {in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true}, 150 {in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"}, 151 {in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}}, 152 {in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true}, 153 {in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}}, 154 {in: "en-v-c", lang: "en", ext: "", invalid: true}, 155 {in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true}, 156 {in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true}, 157 {in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true}, 158 {in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true}, 159 {in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true}, 160 {in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true}, 161 {in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true}, 162 {in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true}, 163 {in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true}, 164 {in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true}, 165 {in: "en-u-c", lang: "en", ext: "", invalid: true}, 166 {in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"}, 167 {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true}, 168 {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, 169 {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, 170 {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, 171 {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true}, 172 {in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true}, 173 {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true}, 174 {in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"}, 175 {in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"}, 176 {in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"}, 177 {in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"}, 178 {in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true}, 179 {in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true}, 180 {in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true}, 181 {in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true}, 182 // Invalid "u" extension. Drop invalid parts. 183 {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true}, 184 {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true}, 185 // We allow duplicate keys as the LDML spec does not explicitly prohibit it. 186 // TODO: Consider eliminating duplicates and returning an error. 187 {in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau-cu-xau", changed: true}, 188 {in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true}, 189 {in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true}, 190 {in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"}, 191 // Not necessary to have changed here. 192 {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, 193 {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, 194 {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, 195 // invalid 196 {in: "", lang: "und", invalid: true}, 197 {in: "-", lang: "und", invalid: true}, 198 {in: "x", lang: "und", invalid: true}, 199 {in: "x-", lang: "und", invalid: true}, 200 {in: "x--", lang: "und", invalid: true}, 201 {in: "a-a-b-c-d", lang: "und", invalid: true}, 202 {in: "en-", lang: "en", invalid: true}, 203 {in: "enne-", lang: "und", invalid: true}, 204 {in: "en.", lang: "und", invalid: true}, 205 {in: "en.-latn", lang: "und", invalid: true}, 206 {in: "en.-en", lang: "en", invalid: true}, 207 {in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true}, 208 {in: "a-tooManyChars-c-d", lang: "und", invalid: true}, 209 // TODO: check key-value validity 210 // { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true }, 211 {in: "en-t-abcd", lang: "en", invalid: true}, 212 {in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true}, 213 // rewrites (more tests in TestGrandfathered) 214 {in: "zh-min-nan", lang: "nan"}, 215 {in: "zh-yue", lang: "yue"}, 216 {in: "zh-xiang", lang: "hsn", rewrite: true}, 217 {in: "zh-guoyu", lang: "cmn", rewrite: true}, 218 {in: "iw", lang: "iw"}, 219 {in: "sgn-BE-FR", lang: "sfb", rewrite: true}, 220 {in: "i-klingon", lang: "tlh", rewrite: true}, 221 } 222 for i, tt := range tests { 223 tests[i].i = i 224 if tt.extList != nil { 225 tests[i].ext = strings.Join(tt.extList, "-") 226 } 227 if tt.ext != "" && tt.extList == nil { 228 tests[i].extList = []string{tt.ext} 229 } 230 } 231 return tests 232 } 233 234 func TestParseExtensions(t *testing.T) { 235 for i, tt := range parseTests() { 236 if tt.ext == "" || tt.rewrite { 237 continue 238 } 239 scan := makeScannerString(tt.in) 240 if len(scan.b) > 1 && scan.b[1] != '-' { 241 scan.end = nextExtension(string(scan.b), 0) 242 scan.next = scan.end + 1 243 scan.scan() 244 } 245 start := scan.start 246 scan.toLower(start, len(scan.b)) 247 parseExtensions(&scan) 248 ext := string(scan.b[start:]) 249 if ext != tt.ext { 250 t.Errorf("%d(%s): ext was %v; want %v", i, tt.in, ext, tt.ext) 251 } 252 if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed { 253 t.Errorf("%d(%s): changed was %v; want %v", i, tt.in, changed, tt.changed) 254 } 255 } 256 } 257 258 // partChecks runs checks for each part by calling the function returned by f. 259 func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) { 260 for i, tt := range parseTests() { 261 tag, skip := f(&tt) 262 if skip { 263 continue 264 } 265 if l, _ := getLangID(b(tt.lang)); l != tag.lang { 266 t.Errorf("%d: lang was %q; want %q", i, tag.lang, l) 267 } 268 if sc, _ := getScriptID(script, b(tt.script)); sc != tag.script { 269 t.Errorf("%d: script was %q; want %q", i, tag.script, sc) 270 } 271 if r, _ := getRegionID(b(tt.region)); r != tag.region { 272 t.Errorf("%d: region was %q; want %q", i, tag.region, r) 273 } 274 if tag.str == "" { 275 continue 276 } 277 p := int(tag.pVariant) 278 if p < int(tag.pExt) { 279 p++ 280 } 281 if s, g := tag.str[p:tag.pExt], tt.variants; s != g { 282 t.Errorf("%d: variants was %q; want %q", i, s, g) 283 } 284 p = int(tag.pExt) 285 if p > 0 && p < len(tag.str) { 286 p++ 287 } 288 if s, g := (tag.str)[p:], tt.ext; s != g { 289 t.Errorf("%d: extensions were %q; want %q", i, s, g) 290 } 291 } 292 } 293 294 func TestParseTag(t *testing.T) { 295 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 296 if strings.HasPrefix(tt.in, "x-") || tt.rewrite { 297 return Tag{}, true 298 } 299 scan := makeScannerString(tt.in) 300 id, end := parseTag(&scan) 301 id.str = string(scan.b[:end]) 302 tt.ext = "" 303 tt.extList = []string{} 304 return id, false 305 }) 306 } 307 308 func TestParse(t *testing.T) { 309 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 310 id, err := Raw.Parse(tt.in) 311 ext := "" 312 if id.str != "" { 313 if strings.HasPrefix(id.str, "x-") { 314 ext = id.str 315 } else if int(id.pExt) < len(id.str) && id.pExt > 0 { 316 ext = id.str[id.pExt+1:] 317 } 318 } 319 if tag, _ := Raw.Parse(id.String()); tag.String() != id.String() { 320 t.Errorf("%d:%s: reparse was %q; want %q", tt.i, tt.in, id.String(), tag.String()) 321 } 322 if ext != tt.ext { 323 t.Errorf("%d:%s: ext was %q; want %q", tt.i, tt.in, ext, tt.ext) 324 } 325 changed := id.str != "" && !strings.HasPrefix(tt.in, id.str) 326 if changed != tt.changed { 327 t.Errorf("%d:%s: changed was %v; want %v", tt.i, tt.in, changed, tt.changed) 328 } 329 if (err != nil) != tt.invalid { 330 t.Errorf("%d:%s: invalid was %v; want %v. Error: %v", tt.i, tt.in, err != nil, tt.invalid, err) 331 } 332 return id, false 333 }) 334 } 335 336 func TestErrors(t *testing.T) { 337 mkInvalid := func(s string) error { 338 return mkErrInvalid([]byte(s)) 339 } 340 tests := []struct { 341 in string 342 out error 343 }{ 344 // invalid subtags. 345 {"ac", mkInvalid("ac")}, 346 {"AC", mkInvalid("ac")}, 347 {"aa-Uuuu", mkInvalid("Uuuu")}, 348 {"aa-AB", mkInvalid("AB")}, 349 // ill-formed wins over invalid. 350 {"ac-u", errSyntax}, 351 {"ac-u-ca", errSyntax}, 352 {"ac-u-ca-co-pinyin", errSyntax}, 353 {"noob", errSyntax}, 354 } 355 for _, tt := range tests { 356 _, err := Parse(tt.in) 357 if err != tt.out { 358 t.Errorf("%s: was %q; want %q", tt.in, err, tt.out) 359 } 360 } 361 } 362 363 func TestCompose1(t *testing.T) { 364 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 365 l, _ := ParseBase(tt.lang) 366 s, _ := ParseScript(tt.script) 367 r, _ := ParseRegion(tt.region) 368 v := []Variant{} 369 for _, x := range strings.Split(tt.variants, "-") { 370 p, _ := ParseVariant(x) 371 v = append(v, p) 372 } 373 e := []Extension{} 374 for _, x := range tt.extList { 375 p, _ := ParseExtension(x) 376 e = append(e, p) 377 } 378 id, _ = Raw.Compose(l, s, r, v, e) 379 return id, false 380 }) 381 } 382 383 func TestCompose2(t *testing.T) { 384 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 385 l, _ := ParseBase(tt.lang) 386 s, _ := ParseScript(tt.script) 387 r, _ := ParseRegion(tt.region) 388 p := []interface{}{l, s, r, s, r, l} 389 for _, x := range strings.Split(tt.variants, "-") { 390 v, _ := ParseVariant(x) 391 p = append(p, v) 392 } 393 for _, x := range tt.extList { 394 e, _ := ParseExtension(x) 395 p = append(p, e) 396 } 397 id, _ = Raw.Compose(p...) 398 return id, false 399 }) 400 } 401 402 func TestCompose3(t *testing.T) { 403 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 404 id, _ = Raw.Parse(tt.in) 405 id, _ = Raw.Compose(id) 406 return id, false 407 }) 408 } 409 410 func mk(s string) Tag { 411 return Raw.Make(s) 412 } 413 414 func TestParseAcceptLanguage(t *testing.T) { 415 type res struct { 416 t Tag 417 q float32 418 } 419 en := []res{{mk("en"), 1.0}} 420 tests := []struct { 421 out []res 422 in string 423 ok bool 424 }{ 425 {en, "en", true}, 426 {en, " en", true}, 427 {en, "en ", true}, 428 {en, " en ", true}, 429 {en, "en,", true}, 430 {en, ",en", true}, 431 {en, ",,,en,,,", true}, 432 {en, ",en;q=1", true}, 433 434 // We allow an empty input, contrary to spec. 435 {nil, "", true}, 436 {[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight 437 438 // errors 439 {nil, ";", false}, 440 {nil, "$", false}, 441 {nil, "e;", false}, 442 {nil, "x;", false}, 443 {nil, "x", false}, 444 {nil, "ac", false}, // non-existing language 445 {nil, "aa;q", false}, 446 {nil, "aa;q=", false}, 447 {nil, "aa;q=.", false}, 448 449 // odd fallbacks 450 { 451 []res{{mk("en"), 0.1}}, 452 " english ;q=.1", 453 true, 454 }, 455 { 456 []res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}}, 457 " italian, deutsch, french", 458 true, 459 }, 460 461 // lists 462 { 463 []res{{mk("en"), 0.1}}, 464 "en;q=.1", 465 true, 466 }, 467 { 468 []res{{mk("mul"), 1.0}}, 469 "*", 470 true, 471 }, 472 { 473 []res{{mk("en"), 1.0}, {mk("de"), 1.0}}, 474 "en,de", 475 true, 476 }, 477 { 478 []res{{mk("en"), 1.0}, {mk("de"), .5}}, 479 "en,de;q=0.5", 480 true, 481 }, 482 { 483 []res{{mk("de"), 0.8}, {mk("en"), 0.5}}, 484 " en ; q = 0.5 , , de;q=0.8", 485 true, 486 }, 487 { 488 []res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}}, 489 "en,de,fr,i-klingon", 490 true, 491 }, 492 // sorting 493 { 494 []res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}}, 495 "en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4", 496 true, 497 }, 498 // dropping 499 { 500 []res{{mk("fr"), 0.2}, {mk("en"), 0.1}}, 501 "en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0", 502 true, 503 }, 504 } 505 for i, tt := range tests { 506 tags, qs, e := ParseAcceptLanguage(tt.in) 507 if e == nil != tt.ok { 508 t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok) 509 } 510 for j, tag := range tags { 511 if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q { 512 t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q) 513 break 514 } 515 } 516 } 517 }