golang.org/x/text@v0.14.0/language/display/maketables.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ignore 6 7 // Generator for display name tables. 8 9 package main 10 11 import ( 12 "bytes" 13 "flag" 14 "fmt" 15 "log" 16 "reflect" 17 "sort" 18 "strings" 19 20 "golang.org/x/text/internal/gen" 21 "golang.org/x/text/language" 22 "golang.org/x/text/unicode/cldr" 23 ) 24 25 var ( 26 test = flag.Bool("test", false, 27 "test existing tables; can be used to compare web data with package data.") 28 outputFile = flag.String("output", "tables.go", "output file") 29 30 stats = flag.Bool("stats", false, "prints statistics to stderr") 31 32 short = flag.Bool("short", false, `Use "short" alternatives, when available.`) 33 draft = flag.String("draft", 34 "contributed", 35 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) 36 pkg = flag.String("package", 37 "display", 38 "the name of the package in which the generated file is to be included") 39 40 tags = newTagSet("tags", 41 []language.Tag{}, 42 "space-separated list of tags to include or empty for all") 43 dict = newTagSet("dict", 44 dictTags(), 45 "space-separated list or tags for which to include a Dictionary. "+ 46 `"" means the common list from go.text/language.`) 47 ) 48 49 func dictTags() (tag []language.Tag) { 50 // TODO: replace with language.Common.Tags() once supported. 51 const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " + 52 "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " + 53 "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " + 54 "pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " + 55 "zh zh-Hans zh-Hant zu" 56 57 for _, s := range strings.Split(str, " ") { 58 tag = append(tag, language.MustParse(s)) 59 } 60 return tag 61 } 62 63 func main() { 64 gen.Init() 65 66 // Read the CLDR zip file. 67 r := gen.OpenCLDRCoreZip() 68 defer r.Close() 69 70 d := &cldr.Decoder{} 71 d.SetDirFilter("main", "supplemental") 72 d.SetSectionFilter("localeDisplayNames") 73 data, err := d.DecodeZip(r) 74 if err != nil { 75 log.Fatalf("DecodeZip: %v", err) 76 } 77 78 w := gen.NewCodeWriter() 79 defer w.WriteGoFile(*outputFile, "display") 80 81 gen.WriteCLDRVersion(w) 82 83 b := builder{ 84 w: w, 85 data: data, 86 group: make(map[string]*group), 87 } 88 b.generate() 89 } 90 91 const tagForm = language.All 92 93 // tagSet is used to parse command line flags of tags. It implements the 94 // flag.Value interface. 95 type tagSet map[language.Tag]bool 96 97 func newTagSet(name string, tags []language.Tag, usage string) tagSet { 98 f := tagSet(make(map[language.Tag]bool)) 99 for _, t := range tags { 100 f[t] = true 101 } 102 flag.Var(f, name, usage) 103 return f 104 } 105 106 // String implements the String method of the flag.Value interface. 107 func (f tagSet) String() string { 108 tags := []string{} 109 for t := range f { 110 tags = append(tags, t.String()) 111 } 112 sort.Strings(tags) 113 return strings.Join(tags, " ") 114 } 115 116 // Set implements Set from the flag.Value interface. 117 func (f tagSet) Set(s string) error { 118 if s != "" { 119 for _, s := range strings.Split(s, " ") { 120 if s != "" { 121 tag, err := tagForm.Parse(s) 122 if err != nil { 123 return err 124 } 125 f[tag] = true 126 } 127 } 128 } 129 return nil 130 } 131 132 func (f tagSet) contains(t language.Tag) bool { 133 if len(f) == 0 { 134 return true 135 } 136 return f[t] 137 } 138 139 // builder is used to create all tables with display name information. 140 type builder struct { 141 w *gen.CodeWriter 142 143 data *cldr.CLDR 144 145 fromLocs []string 146 147 // destination tags for the current locale. 148 toTags []string 149 toTagIndex map[string]int 150 151 // list of supported tags 152 supported []language.Tag 153 154 // key-value pairs per group 155 group map[string]*group 156 157 // statistics 158 sizeIndex int // total size of all indexes of headers 159 sizeData int // total size of all data of headers 160 totalSize int 161 } 162 163 type group struct { 164 // Maps from a given language to the Namer data for this language. 165 lang map[language.Tag]keyValues 166 headers []header 167 168 toTags []string 169 threeStart int 170 fourPlusStart int 171 } 172 173 // set sets the typ to the name for locale loc. 174 func (g *group) set(t language.Tag, typ, name string) { 175 kv := g.lang[t] 176 if kv == nil { 177 kv = make(keyValues) 178 g.lang[t] = kv 179 } 180 if kv[typ] == "" { 181 kv[typ] = name 182 } 183 } 184 185 type keyValues map[string]string 186 187 type header struct { 188 tag language.Tag 189 data string 190 index []uint16 191 } 192 193 var versionInfo = `// Version is deprecated. Use CLDRVersion. 194 const Version = %#v 195 196 ` 197 198 var self = language.MustParse("mul") 199 200 // generate builds and writes all tables. 201 func (b *builder) generate() { 202 fmt.Fprintf(b.w, versionInfo, cldr.Version) 203 204 b.filter() 205 b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 206 if ldn.Languages != nil { 207 for _, v := range ldn.Languages.Language { 208 lang := v.Type 209 if lang == "root" { 210 // We prefer the data from "und" 211 // TODO: allow both the data for root and und somehow. 212 continue 213 } 214 tag := tagForm.MustParse(lang) 215 if tags.contains(tag) { 216 g.set(loc, tag.String(), v.Data()) 217 } 218 } 219 } 220 }) 221 b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 222 if ldn.Scripts != nil { 223 for _, v := range ldn.Scripts.Script { 224 code := language.MustParseScript(v.Type) 225 if code.IsPrivateUse() { // Qaaa..Qabx 226 // TODO: data currently appears to be very meager. 227 // Reconsider if we have data for English. 228 if loc == language.English { 229 log.Fatal("Consider including data for private use scripts.") 230 } 231 continue 232 } 233 g.set(loc, code.String(), v.Data()) 234 } 235 } 236 }) 237 b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 238 if ldn.Territories != nil { 239 for _, v := range ldn.Territories.Territory { 240 g.set(loc, language.MustParseRegion(v.Type).String(), v.Data()) 241 } 242 } 243 }) 244 245 b.makeSupported() 246 247 b.writeParents() 248 249 b.writeGroup("lang") 250 b.writeGroup("script") 251 b.writeGroup("region") 252 253 b.w.WriteConst("numSupported", len(b.supported)) 254 buf := bytes.Buffer{} 255 for _, tag := range b.supported { 256 fmt.Fprint(&buf, tag.String(), "|") 257 } 258 b.w.WriteConst("supported", buf.String()) 259 260 b.writeDictionaries() 261 262 b.supported = []language.Tag{self} 263 264 // Compute the names of locales in their own language. Some of these names 265 // may be specified in their parent locales. We iterate the maximum depth 266 // of the parent three times to match successive parents of tags until a 267 // possible match is found. 268 for i := 0; i < 4; i++ { 269 b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) { 270 parent := tag 271 if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) { 272 parent, _ = language.Raw.Compose(b) 273 } 274 if ldn.Languages != nil { 275 for _, v := range ldn.Languages.Language { 276 key := tagForm.MustParse(v.Type) 277 saved := key 278 if key == parent { 279 g.set(self, tag.String(), v.Data()) 280 } 281 for k := 0; k < i; k++ { 282 key = key.Parent() 283 } 284 if key == tag { 285 g.set(self, saved.String(), v.Data()) // set does not overwrite a value. 286 } 287 } 288 } 289 }) 290 } 291 292 b.writeGroup("self") 293 } 294 295 func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) { 296 b.sizeIndex = 0 297 b.sizeData = 0 298 b.toTags = nil 299 b.fromLocs = nil 300 b.toTagIndex = make(map[string]int) 301 302 g := b.group[name] 303 if g == nil { 304 g = &group{lang: make(map[language.Tag]keyValues)} 305 b.group[name] = g 306 } 307 for _, loc := range b.data.Locales() { 308 // We use RawLDML instead of LDML as we are managing our own inheritance 309 // in this implementation. 310 ldml := b.data.RawLDML(loc) 311 312 // We do not support the POSIX variant (it is not a supported BCP 47 313 // variant). This locale also doesn't happen to contain any data, so 314 // we'll skip it by checking for this. 315 tag, err := tagForm.Parse(loc) 316 if err != nil { 317 if ldml.LocaleDisplayNames != nil { 318 log.Fatalf("setData: %v", err) 319 } 320 continue 321 } 322 if ldml.LocaleDisplayNames != nil && tags.contains(tag) { 323 f(g, tag, ldml.LocaleDisplayNames) 324 } 325 } 326 } 327 328 func (b *builder) filter() { 329 filter := func(s *cldr.Slice) { 330 if *short { 331 s.SelectOnePerGroup("alt", []string{"short", ""}) 332 } else { 333 s.SelectOnePerGroup("alt", []string{"stand-alone", ""}) 334 } 335 d, err := cldr.ParseDraft(*draft) 336 if err != nil { 337 log.Fatalf("filter: %v", err) 338 } 339 s.SelectDraft(d) 340 } 341 for _, loc := range b.data.Locales() { 342 if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil { 343 if ldn.Languages != nil { 344 s := cldr.MakeSlice(&ldn.Languages.Language) 345 if filter(&s); len(ldn.Languages.Language) == 0 { 346 ldn.Languages = nil 347 } 348 } 349 if ldn.Scripts != nil { 350 s := cldr.MakeSlice(&ldn.Scripts.Script) 351 if filter(&s); len(ldn.Scripts.Script) == 0 { 352 ldn.Scripts = nil 353 } 354 } 355 if ldn.Territories != nil { 356 s := cldr.MakeSlice(&ldn.Territories.Territory) 357 if filter(&s); len(ldn.Territories.Territory) == 0 { 358 ldn.Territories = nil 359 } 360 } 361 } 362 } 363 } 364 365 // makeSupported creates a list of all supported locales. 366 func (b *builder) makeSupported() { 367 // tags across groups 368 for _, g := range b.group { 369 for t, _ := range g.lang { 370 b.supported = append(b.supported, t) 371 } 372 } 373 b.supported = b.supported[:unique(tagsSorter(b.supported))] 374 375 } 376 377 type tagsSorter []language.Tag 378 379 func (a tagsSorter) Len() int { return len(a) } 380 func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 381 func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() } 382 383 func (b *builder) writeGroup(name string) { 384 g := b.group[name] 385 386 for _, kv := range g.lang { 387 for t, _ := range kv { 388 g.toTags = append(g.toTags, t) 389 } 390 } 391 g.toTags = g.toTags[:unique(tagsBySize(g.toTags))] 392 393 // Allocate header per supported value. 394 g.headers = make([]header, len(b.supported)) 395 for i, sup := range b.supported { 396 kv, ok := g.lang[sup] 397 if !ok { 398 g.headers[i].tag = sup 399 continue 400 } 401 data := []byte{} 402 index := make([]uint16, len(g.toTags), len(g.toTags)+1) 403 for j, t := range g.toTags { 404 index[j] = uint16(len(data)) 405 data = append(data, kv[t]...) 406 } 407 index = append(index, uint16(len(data))) 408 409 // Trim the tail of the index. 410 // TODO: indexes can be reduced in size quite a bit more. 411 n := len(index) 412 for ; n >= 2 && index[n-2] == index[n-1]; n-- { 413 } 414 index = index[:n] 415 416 // Workaround for a bug in CLDR 26. 417 // See https://unicode.org/cldr/trac/ticket/8042. 418 if cldr.Version == "26" && sup.String() == "hsb" { 419 data = bytes.Replace(data, []byte{'"'}, nil, 1) 420 } 421 g.headers[i] = header{sup, string(data), index} 422 } 423 g.writeTable(b.w, name) 424 } 425 426 type tagsBySize []string 427 428 func (l tagsBySize) Len() int { return len(l) } 429 func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] } 430 func (l tagsBySize) Less(i, j int) bool { 431 a, b := l[i], l[j] 432 // Sort single-tag entries based on size first. Otherwise alphabetic. 433 if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) { 434 return len(a) < len(b) 435 } 436 return a < b 437 } 438 439 // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent 440 // of tags[i]. 441 func parentIndices(tags []language.Tag) []int16 { 442 index := make(map[language.Tag]int16) 443 for i, t := range tags { 444 index[t] = int16(i) 445 } 446 447 // Construct default parents. 448 parents := make([]int16, len(tags)) 449 for i, t := range tags { 450 parents[i] = -1 451 for t = t.Parent(); t != language.Und; t = t.Parent() { 452 if j, ok := index[t]; ok { 453 parents[i] = j 454 break 455 } 456 } 457 } 458 return parents 459 } 460 461 func (b *builder) writeParents() { 462 parents := parentIndices(b.supported) 463 fmt.Fprintf(b.w, "var parents = ") 464 b.w.WriteArray(parents) 465 } 466 467 // writeKeys writes keys to a special index used by the display package. 468 // tags are assumed to be sorted by length. 469 func writeKeys(w *gen.CodeWriter, name string, keys []string) { 470 w.Size += int(3 * reflect.TypeOf("").Size()) 471 w.WriteComment("Number of keys: %d", len(keys)) 472 fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name) 473 for i := 2; i <= 4; i++ { 474 sub := []string{} 475 for _, t := range keys { 476 if len(t) != i { 477 break 478 } 479 sub = append(sub, t) 480 } 481 s := strings.Join(sub, "") 482 w.WriteString(s) 483 fmt.Fprintf(w, ",\n") 484 keys = keys[len(sub):] 485 } 486 fmt.Fprintln(w, "\t}") 487 if len(keys) > 0 { 488 w.Size += int(reflect.TypeOf([]string{}).Size()) 489 fmt.Fprintf(w, "\t%sTagsLong = ", name) 490 w.WriteSlice(keys) 491 } 492 fmt.Fprintln(w, ")\n") 493 } 494 495 // identifier creates an identifier from the given tag. 496 func identifier(t language.Tag) string { 497 return strings.Replace(t.String(), "-", "", -1) 498 } 499 500 func (h *header) writeEntry(w *gen.CodeWriter, name string) { 501 if len(dict) > 0 && dict.contains(h.tag) { 502 fmt.Fprintf(w, "\t{ // %s\n", h.tag) 503 fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name) 504 fmt.Fprintln(w, "\t},") 505 } else if len(h.data) == 0 { 506 fmt.Fprintln(w, "\t\t{}, //", h.tag) 507 } else { 508 fmt.Fprintf(w, "\t{ // %s\n", h.tag) 509 w.WriteString(h.data) 510 fmt.Fprintln(w, ",") 511 w.WriteSlice(h.index) 512 fmt.Fprintln(w, ",\n\t},") 513 } 514 } 515 516 // write the data for the given header as single entries. The size for this data 517 // was already accounted for in writeEntry. 518 func (h *header) writeSingle(w *gen.CodeWriter, name string) { 519 if len(dict) > 0 && dict.contains(h.tag) { 520 tag := identifier(h.tag) 521 w.WriteConst(tag+name+"Str", h.data) 522 523 // Note that we create a slice instead of an array. If we use an array 524 // we need to refer to it as a[:] in other tables, which will cause the 525 // array to always be included by the linker. See Issue 7651. 526 w.WriteVar(tag+name+"Idx", h.index) 527 } 528 } 529 530 // writeTable writes an entry for a single Namer. 531 func (g *group) writeTable(w *gen.CodeWriter, name string) { 532 start := w.Size 533 writeKeys(w, name, g.toTags) 534 w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size()) 535 536 fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers)) 537 538 title := strings.Title(name) 539 for _, h := range g.headers { 540 h.writeEntry(w, title) 541 } 542 fmt.Fprintln(w, "}\n") 543 544 for _, h := range g.headers { 545 h.writeSingle(w, title) 546 } 547 n := w.Size - start 548 fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000) 549 } 550 551 func (b *builder) writeDictionaries() { 552 fmt.Fprintln(b.w, "// Dictionary entries of frequent languages") 553 fmt.Fprintln(b.w, "var (") 554 parents := parentIndices(b.supported) 555 556 for i, t := range b.supported { 557 if dict.contains(t) { 558 ident := identifier(t) 559 fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t) 560 if p := parents[i]; p == -1 { 561 fmt.Fprintln(b.w, "\t\tnil,") 562 } else { 563 fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p])) 564 } 565 fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident) 566 fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident) 567 fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident) 568 fmt.Fprintln(b.w, "\t}") 569 } 570 } 571 fmt.Fprintln(b.w, ")") 572 573 var s string 574 var a []uint16 575 sz := reflect.TypeOf(s).Size() 576 sz += reflect.TypeOf(a).Size() 577 sz *= 3 578 sz += reflect.TypeOf(&a).Size() 579 n := int(sz) * len(dict) 580 fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000) 581 582 b.w.Size += n 583 } 584 585 // unique sorts the given lists and removes duplicate entries by swapping them 586 // past position k, where k is the number of unique values. It returns k. 587 func unique(a sort.Interface) int { 588 if a.Len() == 0 { 589 return 0 590 } 591 sort.Sort(a) 592 k := 1 593 for i := 1; i < a.Len(); i++ { 594 if a.Less(k-1, i) { 595 if k != i { 596 a.Swap(k, i) 597 } 598 k++ 599 } 600 } 601 return k 602 }