golang.org/x/text@v0.14.0/language/parse_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package language 6 7 import ( 8 "strings" 9 "testing" 10 11 "golang.org/x/text/internal/language" 12 ) 13 14 // equalTags compares language, script and region subtags only. 15 func (t Tag) equalTags(a Tag) bool { 16 return t.lang() == a.lang() && 17 t.script() == a.script() && 18 t.region() == a.region() 19 } 20 21 var errSyntax = language.ErrSyntax 22 23 type parseTest struct { 24 i int // the index of this test 25 in string 26 lang, script, region string 27 variants, ext string 28 extList []string // only used when more than one extension is present 29 invalid bool 30 rewrite bool // special rewrite not handled by parseTag 31 changed bool // string needed to be reformatted 32 } 33 34 func parseTests() []parseTest { 35 tests := []parseTest{ 36 {in: "root", lang: "und"}, 37 {in: "und", lang: "und"}, 38 {in: "en", lang: "en"}, 39 40 {in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"}, 41 {in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"}, 42 {in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"}, 43 44 {in: "xy", lang: "und", invalid: true}, 45 {in: "en-ZY", lang: "en", invalid: true}, 46 {in: "gsw", lang: "gsw"}, 47 {in: "sr_Latn", lang: "sr", script: "Latn"}, 48 {in: "af-Arab", lang: "af", script: "Arab"}, 49 {in: "nl-BE", lang: "nl", region: "BE"}, 50 {in: "es-419", lang: "es", region: "419"}, 51 {in: "und-001", lang: "und", region: "001"}, 52 {in: "de-latn-be", lang: "de", script: "Latn", region: "BE"}, 53 // Variants 54 {in: "de-1901", lang: "de", variants: "1901"}, 55 // Accept with unsuppressed script. 56 {in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"}, 57 // Specialized. 58 {in: "sl-rozaj", lang: "sl", variants: "rozaj"}, 59 {in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"}, 60 {in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"}, 61 {in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"}, 62 {in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"}, 63 // Maximum number of variants while adhering to prefix rules. 64 {in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"}, 65 66 // Sorting. 67 {in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true}, 68 {in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true}, 69 {in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true}, 70 71 // Duplicates variants are removed, but not an error. 72 {in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"}, 73 74 // Variants that do not have correct prefixes. We still accept these. 75 {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"}, 76 {in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"}, 77 {in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true}, 78 {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"}, 79 80 // Invalid variant. 81 {in: "de-1902", lang: "de", variants: "", invalid: true}, 82 83 {in: "EN_CYRL", lang: "en", script: "Cyrl"}, 84 // private use and extensions 85 {in: "x-a-b-c-d", ext: "x-a-b-c-d"}, 86 {in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true}, 87 {in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"}, 88 {in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}}, 89 {in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true}, 90 {in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}}, 91 {in: "en-v-c", lang: "en", ext: "", invalid: true}, 92 {in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true}, 93 {in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true}, 94 {in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true}, 95 {in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true}, 96 {in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true}, 97 {in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true}, 98 {in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true}, 99 {in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true}, 100 {in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true}, 101 {in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true}, 102 {in: "en-u-c", lang: "en", ext: "", invalid: true}, 103 {in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"}, 104 {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", invalid: true}, 105 {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, 106 {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, 107 {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, 108 {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true}, 109 {in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true}, 110 {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true}, 111 {in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"}, 112 {in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"}, 113 {in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"}, 114 {in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"}, 115 {in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true}, 116 {in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true}, 117 {in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true}, 118 {in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true}, 119 // Invalid "u" extension. Drop invalid parts. 120 {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, invalid: true, changed: true}, 121 {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, invalid: true}, 122 // We allow duplicate keys as the LDML spec does not explicitly prohibit it. 123 // TODO: Consider eliminating duplicates and returning an error. 124 {in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true}, 125 {in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true}, 126 {in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true}, 127 {in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"}, 128 // Not necessary to have changed here. 129 {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, 130 {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, 131 {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, 132 {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, 133 {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, 134 {in: "fr-est", lang: "et", changed: true}, 135 {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, 136 {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true}, 137 // invalid 138 {in: "", lang: "und", invalid: true}, 139 {in: "-", lang: "und", invalid: true}, 140 {in: "x", lang: "und", invalid: true}, 141 {in: "x-", lang: "und", invalid: true}, 142 {in: "x--", lang: "und", invalid: true}, 143 {in: "a-a-b-c-d", lang: "und", invalid: true}, 144 {in: "en-", lang: "en", invalid: true}, 145 {in: "enne-", lang: "und", invalid: true}, 146 {in: "en.", lang: "und", invalid: true}, 147 {in: "en.-latn", lang: "und", invalid: true}, 148 {in: "en.-en", lang: "en", invalid: true}, 149 {in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true}, 150 {in: "a-tooManyChars-c-d", lang: "und", invalid: true}, 151 // TODO: check key-value validity 152 // { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true }, 153 {in: "en-t-abcd", lang: "en", invalid: true}, 154 {in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true}, 155 // rewrites (more tests in TestGrandfathered) 156 {in: "zh-min-nan", lang: "nan"}, 157 {in: "zh-yue", lang: "yue"}, 158 {in: "zh-xiang", lang: "hsn", rewrite: true}, 159 {in: "zh-guoyu", lang: "cmn", rewrite: true}, 160 {in: "iw", lang: "iw"}, 161 {in: "sgn-BE-FR", lang: "sfb", rewrite: true}, 162 {in: "i-klingon", lang: "tlh", rewrite: true}, 163 } 164 for i, tt := range tests { 165 tests[i].i = i 166 if tt.extList != nil { 167 tests[i].ext = strings.Join(tt.extList, "-") 168 } 169 if tt.ext != "" && tt.extList == nil { 170 tests[i].extList = []string{tt.ext} 171 } 172 } 173 return tests 174 } 175 176 // partChecks runs checks for each part by calling the function returned by f. 177 func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) { 178 for i, tt := range parseTests() { 179 tag, skip := f(&tt) 180 if skip { 181 continue 182 } 183 if l, _ := language.ParseBase(tt.lang); l != tag.lang() { 184 t.Errorf("%d: lang was %q; want %q", i, tag.lang(), l) 185 } 186 if sc, _ := language.ParseScript(tt.script); sc != tag.script() { 187 t.Errorf("%d: script was %q; want %q", i, tag.script(), sc) 188 } 189 if r, _ := language.ParseRegion(tt.region); r != tag.region() { 190 t.Errorf("%d: region was %q; want %q", i, tag.region(), r) 191 } 192 v := tag.tag().Variants() 193 if v != "" { 194 v = v[1:] 195 } 196 if v != tt.variants { 197 t.Errorf("%d: variants was %q; want %q", i, v, tt.variants) 198 } 199 if e := strings.Join(tag.tag().Extensions(), "-"); e != tt.ext { 200 t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext) 201 } 202 } 203 } 204 205 func TestParse(t *testing.T) { 206 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 207 id, _ = Raw.Parse(tt.in) 208 return id, false 209 }) 210 } 211 212 func TestErrors(t *testing.T) { 213 mkInvalid := func(s string) error { 214 return language.NewValueError([]byte(s)) 215 } 216 tests := []struct { 217 in string 218 out error 219 }{ 220 // invalid subtags. 221 {"ac", mkInvalid("ac")}, 222 {"AC", mkInvalid("ac")}, 223 {"aa-Uuuu", mkInvalid("Uuuu")}, 224 {"aa-AB", mkInvalid("AB")}, 225 // ill-formed wins over invalid. 226 {"ac-u", errSyntax}, 227 {"ac-u-ca", mkInvalid("ac")}, 228 {"ac-u-ca-co-pinyin", mkInvalid("ac")}, 229 {"noob", errSyntax}, 230 } 231 for _, tt := range tests { 232 _, err := Parse(tt.in) 233 if err != tt.out { 234 t.Errorf("%s: was %q; want %q", tt.in, err, tt.out) 235 } 236 } 237 } 238 239 func TestCompose1(t *testing.T) { 240 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 241 l, _ := ParseBase(tt.lang) 242 s, _ := ParseScript(tt.script) 243 r, _ := ParseRegion(tt.region) 244 v := []Variant{} 245 for _, x := range strings.Split(tt.variants, "-") { 246 p, _ := ParseVariant(x) 247 v = append(v, p) 248 } 249 e := []Extension{} 250 for _, x := range tt.extList { 251 p, _ := ParseExtension(x) 252 e = append(e, p) 253 } 254 id, _ = Raw.Compose(l, s, r, v, e) 255 return id, false 256 }) 257 } 258 259 func TestCompose2(t *testing.T) { 260 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 261 l, _ := ParseBase(tt.lang) 262 s, _ := ParseScript(tt.script) 263 r, _ := ParseRegion(tt.region) 264 p := []interface{}{l, s, r, s, r, l} 265 for _, x := range strings.Split(tt.variants, "-") { 266 if x != "" { 267 v, _ := ParseVariant(x) 268 p = append(p, v) 269 } 270 } 271 for _, x := range tt.extList { 272 e, _ := ParseExtension(x) 273 p = append(p, e) 274 } 275 id, _ = Raw.Compose(p...) 276 return id, false 277 }) 278 } 279 280 func TestCompose3(t *testing.T) { 281 partChecks(t, func(tt *parseTest) (id Tag, skip bool) { 282 id, _ = Raw.Parse(tt.in) 283 id, _ = Raw.Compose(id) 284 return id, false 285 }) 286 } 287 288 func mk(s string) Tag { 289 return Raw.Make(s) 290 } 291 292 func TestParseAcceptLanguage(t *testing.T) { 293 type res struct { 294 t Tag 295 q float32 296 } 297 en := []res{{mk("en"), 1.0}} 298 tests := []struct { 299 out []res 300 in string 301 ok bool 302 }{ 303 {en, "en", true}, 304 {en, " en", true}, 305 {en, "en ", true}, 306 {en, " en ", true}, 307 {en, "en,", true}, 308 {en, ",en", true}, 309 {en, ",,,en,,,", true}, 310 {en, ",en;q=1", true}, 311 312 // We allow an empty input, contrary to spec. 313 {nil, "", true}, 314 {[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight 315 316 // errors 317 {nil, ";", false}, 318 {nil, "$", false}, 319 {nil, "e;", false}, 320 {nil, "x;", false}, 321 {nil, "x", false}, 322 {nil, "ac", false}, // non-existing language 323 {nil, "aa;q", false}, 324 {nil, "aa;q=", false}, 325 {nil, "aa;q=.", false}, 326 {nil, "00-t-0o", false}, 327 328 // odd fallbacks 329 { 330 []res{{mk("en"), 0.1}}, 331 " english ;q=.1", 332 true, 333 }, 334 { 335 []res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}}, 336 " italian, deutsch, french", 337 true, 338 }, 339 340 // lists 341 { 342 []res{{mk("en"), 0.1}}, 343 "en;q=.1", 344 true, 345 }, 346 { 347 []res{{mk("mul"), 1.0}}, 348 "*", 349 true, 350 }, 351 { 352 []res{{mk("en"), 1.0}, {mk("de"), 1.0}}, 353 "en,de", 354 true, 355 }, 356 { 357 []res{{mk("en"), 1.0}, {mk("de"), .5}}, 358 "en,de;q=0.5", 359 true, 360 }, 361 { 362 []res{{mk("de"), 0.8}, {mk("en"), 0.5}}, 363 " en ; q = 0.5 , , de;q=0.8", 364 true, 365 }, 366 { 367 []res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}}, 368 "en,de,fr,i-klingon", 369 true, 370 }, 371 // sorting 372 { 373 []res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}}, 374 "en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4", 375 true, 376 }, 377 // dropping 378 { 379 []res{{mk("fr"), 0.2}, {mk("en"), 0.1}}, 380 "en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0", 381 true, 382 }, 383 } 384 for i, tt := range tests { 385 tags, qs, e := ParseAcceptLanguage(tt.in) 386 if e == nil != tt.ok { 387 t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok) 388 } 389 for j, tag := range tags { 390 if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q { 391 t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q) 392 break 393 } 394 } 395 } 396 } 397 398 func TestParseAcceptLanguageTooBig(t *testing.T) { 399 s := strings.Repeat("en-x-a-", 333) 400 _, _, err := ParseAcceptLanguage(s) 401 if err != language.ErrSyntax { 402 t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, language.ErrSyntax) 403 } 404 s += "en-x-a" 405 _, _, err = ParseAcceptLanguage(s) 406 if err != errTagListTooLarge { 407 t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, errTagListTooLarge) 408 } 409 }