github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/language/display/maketables.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 // Generator for display name tables. 8 9 package main 10 11 import ( 12 "bytes" 13 "flag" 14 "fmt" 15 "log" 16 "reflect" 17 "sort" 18 "strings" 19 20 "golang.org/x/text/internal/gen" 21 "golang.org/x/text/language" 22 "golang.org/x/text/unicode/cldr" 23 ) 24 25 var ( 26 test = flag.Bool("test", false, 27 "test existing tables; can be used to compare web data with package data.") 28 outputFile = flag.String("output", "tables.go", "output file") 29 30 stats = flag.Bool("stats", false, "prints statistics to stderr") 31 32 short = flag.Bool("short", false, `Use "short" alternatives, when available.`) 33 draft = flag.String("draft", 34 "contributed", 35 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) 36 pkg = flag.String("package", 37 "display", 38 "the name of the package in which the generated file is to be included") 39 40 tags = newTagSet("tags", 41 []language.Tag{}, 42 "space-separated list of tags to include or empty for all") 43 dict = newTagSet("dict", 44 dictTags(), 45 "space-separated list or tags for which to include a Dictionary. "+ 46 `"" means the common list from go.text/language.`) 47 ) 48 49 func dictTags() (tag []language.Tag) { 50 // TODO: replace with language.Common.Tags() once supported. 51 const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " + 52 "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " + 53 "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " + 54 "pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " + 55 "zh zh-Hans zh-Hant zu" 56 57 for _, s := range strings.Split(str, " ") { 58 tag = append(tag, language.MustParse(s)) 59 } 60 return tag 61 } 62 63 func main() { 64 gen.Init() 65 66 // Read the CLDR zip file. 67 r := gen.OpenCLDRCoreZip() 68 defer r.Close() 69 70 d := &cldr.Decoder{} 71 d.SetDirFilter("main", "supplemental") 72 d.SetSectionFilter("localeDisplayNames") 73 data, err := d.DecodeZip(r) 74 if err != nil { 75 log.Fatalf("DecodeZip: %v", err) 76 } 77 78 w := gen.NewCodeWriter() 79 defer w.WriteGoFile(*outputFile, "display") 80 81 gen.WriteCLDRVersion(w) 82 83 b := builder{ 84 w: w, 85 data: data, 86 group: make(map[string]*group), 87 } 88 b.generate() 89 } 90 91 const tagForm = language.All 92 93 // tagSet is used to parse command line flags of tags. It implements the 94 // flag.Value interface. 95 type tagSet map[language.Tag]bool 96 97 func newTagSet(name string, tags []language.Tag, usage string) tagSet { 98 f := tagSet(make(map[language.Tag]bool)) 99 for _, t := range tags { 100 f[t] = true 101 } 102 flag.Var(f, name, usage) 103 return f 104 } 105 106 // String implements the String method of the flag.Value interface. 107 func (f tagSet) String() string { 108 tags := []string{} 109 for t := range f { 110 tags = append(tags, t.String()) 111 } 112 sort.Strings(tags) 113 return strings.Join(tags, " ") 114 } 115 116 // Set implements Set from the flag.Value interface. 117 func (f tagSet) Set(s string) error { 118 if s != "" { 119 for _, s := range strings.Split(s, " ") { 120 if s != "" { 121 tag, err := tagForm.Parse(s) 122 if err != nil { 123 return err 124 } 125 f[tag] = true 126 } 127 } 128 } 129 return nil 130 } 131 132 func (f tagSet) contains(t language.Tag) bool { 133 if len(f) == 0 { 134 return true 135 } 136 return f[t] 137 } 138 139 // builder is used to create all tables with display name information. 140 type builder struct { 141 w *gen.CodeWriter 142 143 data *cldr.CLDR 144 145 fromLocs []string 146 147 // destination tags for the current locale. 148 toTags []string 149 toTagIndex map[string]int 150 151 // list of supported tags 152 supported []language.Tag 153 154 // key-value pairs per group 155 group map[string]*group 156 157 // statistics 158 sizeIndex int // total size of all indexes of headers 159 sizeData int // total size of all data of headers 160 totalSize int 161 } 162 163 type group struct { 164 // Maps from a given language to the Namer data for this language. 165 lang map[language.Tag]keyValues 166 headers []header 167 168 toTags []string 169 threeStart int 170 fourPlusStart int 171 } 172 173 // set sets the typ to the name for locale loc. 174 func (g *group) set(t language.Tag, typ, name string) { 175 kv := g.lang[t] 176 if kv == nil { 177 kv = make(keyValues) 178 g.lang[t] = kv 179 } 180 if kv[typ] == "" { 181 kv[typ] = name 182 } 183 } 184 185 type keyValues map[string]string 186 187 type header struct { 188 tag language.Tag 189 data string 190 index []uint16 191 } 192 193 var versionInfo = `// Version is deprecated. Use CLDRVersion. 194 const Version = %#v 195 196 ` 197 198 var self = language.MustParse("mul") 199 200 // generate builds and writes all tables. 201 func (b *builder) generate() { 202 fmt.Fprintf(b.w, versionInfo, cldr.Version) 203 204 b.filter() 205 b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 206 if ldn.Languages != nil { 207 for _, v := range ldn.Languages.Language { 208 tag := tagForm.MustParse(v.Type) 209 if tags.contains(tag) { 210 g.set(loc, tag.String(), v.Data()) 211 } 212 } 213 } 214 }) 215 b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 216 if ldn.Scripts != nil { 217 for _, v := range ldn.Scripts.Script { 218 code := language.MustParseScript(v.Type) 219 if code.IsPrivateUse() { // Qaaa..Qabx 220 // TODO: data currently appears to be very meager. 221 // Reconsider if we have data for English. 222 if loc == language.English { 223 log.Fatal("Consider including data for private use scripts.") 224 } 225 continue 226 } 227 g.set(loc, code.String(), v.Data()) 228 } 229 } 230 }) 231 b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 232 if ldn.Territories != nil { 233 for _, v := range ldn.Territories.Territory { 234 g.set(loc, language.MustParseRegion(v.Type).String(), v.Data()) 235 } 236 } 237 }) 238 239 b.makeSupported() 240 241 b.writeParents() 242 243 b.writeGroup("lang") 244 b.writeGroup("script") 245 b.writeGroup("region") 246 247 b.w.WriteConst("numSupported", len(b.supported)) 248 buf := bytes.Buffer{} 249 for _, tag := range b.supported { 250 fmt.Fprint(&buf, tag.String(), "|") 251 } 252 b.w.WriteConst("supported", buf.String()) 253 254 b.writeDictionaries() 255 256 b.supported = []language.Tag{self} 257 258 // Compute the names of locales in their own language. Some of these names 259 // may be specified in their parent locales. We iterate the maximum depth 260 // of the parent three times to match successive parents of tags until a 261 // possible match is found. 262 for i := 0; i < 4; i++ { 263 b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) { 264 parent := tag 265 if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) { 266 parent, _ = language.Raw.Compose(b) 267 } 268 if ldn.Languages != nil { 269 for _, v := range ldn.Languages.Language { 270 key := tagForm.MustParse(v.Type) 271 saved := key 272 if key == parent { 273 g.set(self, tag.String(), v.Data()) 274 } 275 for k := 0; k < i; k++ { 276 key = key.Parent() 277 } 278 if key == tag { 279 g.set(self, saved.String(), v.Data()) // set does not overwrite a value. 280 } 281 } 282 } 283 }) 284 } 285 286 b.writeGroup("self") 287 } 288 289 func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) { 290 b.sizeIndex = 0 291 b.sizeData = 0 292 b.toTags = nil 293 b.fromLocs = nil 294 b.toTagIndex = make(map[string]int) 295 296 g := b.group[name] 297 if g == nil { 298 g = &group{lang: make(map[language.Tag]keyValues)} 299 b.group[name] = g 300 } 301 for _, loc := range b.data.Locales() { 302 // We use RawLDML instead of LDML as we are managing our own inheritance 303 // in this implementation. 304 ldml := b.data.RawLDML(loc) 305 306 // We do not support the POSIX variant (it is not a supported BCP 47 307 // variant). This locale also doesn't happen to contain any data, so 308 // we'll skip it by checking for this. 309 tag, err := tagForm.Parse(loc) 310 if err != nil { 311 if ldml.LocaleDisplayNames != nil { 312 log.Fatalf("setData: %v", err) 313 } 314 continue 315 } 316 if ldml.LocaleDisplayNames != nil && tags.contains(tag) { 317 f(g, tag, ldml.LocaleDisplayNames) 318 } 319 } 320 } 321 322 func (b *builder) filter() { 323 filter := func(s *cldr.Slice) { 324 if *short { 325 s.SelectOnePerGroup("alt", []string{"short", ""}) 326 } else { 327 s.SelectOnePerGroup("alt", []string{"stand-alone", ""}) 328 } 329 d, err := cldr.ParseDraft(*draft) 330 if err != nil { 331 log.Fatalf("filter: %v", err) 332 } 333 s.SelectDraft(d) 334 } 335 for _, loc := range b.data.Locales() { 336 if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil { 337 if ldn.Languages != nil { 338 s := cldr.MakeSlice(&ldn.Languages.Language) 339 if filter(&s); len(ldn.Languages.Language) == 0 { 340 ldn.Languages = nil 341 } 342 } 343 if ldn.Scripts != nil { 344 s := cldr.MakeSlice(&ldn.Scripts.Script) 345 if filter(&s); len(ldn.Scripts.Script) == 0 { 346 ldn.Scripts = nil 347 } 348 } 349 if ldn.Territories != nil { 350 s := cldr.MakeSlice(&ldn.Territories.Territory) 351 if filter(&s); len(ldn.Territories.Territory) == 0 { 352 ldn.Territories = nil 353 } 354 } 355 } 356 } 357 } 358 359 // makeSupported creates a list of all supported locales. 360 func (b *builder) makeSupported() { 361 // tags across groups 362 for _, g := range b.group { 363 for t, _ := range g.lang { 364 b.supported = append(b.supported, t) 365 } 366 } 367 b.supported = b.supported[:unique(tagsSorter(b.supported))] 368 369 } 370 371 type tagsSorter []language.Tag 372 373 func (a tagsSorter) Len() int { return len(a) } 374 func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 375 func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() } 376 377 func (b *builder) writeGroup(name string) { 378 g := b.group[name] 379 380 for _, kv := range g.lang { 381 for t, _ := range kv { 382 g.toTags = append(g.toTags, t) 383 } 384 } 385 g.toTags = g.toTags[:unique(tagsBySize(g.toTags))] 386 387 // Allocate header per supported value. 388 g.headers = make([]header, len(b.supported)) 389 for i, sup := range b.supported { 390 kv, ok := g.lang[sup] 391 if !ok { 392 g.headers[i].tag = sup 393 continue 394 } 395 data := []byte{} 396 index := make([]uint16, len(g.toTags), len(g.toTags)+1) 397 for j, t := range g.toTags { 398 index[j] = uint16(len(data)) 399 data = append(data, kv[t]...) 400 } 401 index = append(index, uint16(len(data))) 402 403 // Trim the tail of the index. 404 // TODO: indexes can be reduced in size quite a bit more. 405 n := len(index) 406 for ; n >= 2 && index[n-2] == index[n-1]; n-- { 407 } 408 index = index[:n] 409 410 // Workaround for a bug in CLDR 26. 411 // See http://unicode.org/cldr/trac/ticket/8042. 412 if cldr.Version == "26" && sup.String() == "hsb" { 413 data = bytes.Replace(data, []byte{'"'}, nil, 1) 414 } 415 g.headers[i] = header{sup, string(data), index} 416 } 417 g.writeTable(b.w, name) 418 } 419 420 type tagsBySize []string 421 422 func (l tagsBySize) Len() int { return len(l) } 423 func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] } 424 func (l tagsBySize) Less(i, j int) bool { 425 a, b := l[i], l[j] 426 // Sort single-tag entries based on size first. Otherwise alphabetic. 427 if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) { 428 return len(a) < len(b) 429 } 430 return a < b 431 } 432 433 // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent 434 // of tags[i]. 435 func parentIndices(tags []language.Tag) []int16 { 436 index := make(map[language.Tag]int16) 437 for i, t := range tags { 438 index[t] = int16(i) 439 } 440 441 // Construct default parents. 442 parents := make([]int16, len(tags)) 443 for i, t := range tags { 444 parents[i] = -1 445 for t = t.Parent(); t != language.Und; t = t.Parent() { 446 if j, ok := index[t]; ok { 447 parents[i] = j 448 break 449 } 450 } 451 } 452 return parents 453 } 454 455 func (b *builder) writeParents() { 456 parents := parentIndices(b.supported) 457 fmt.Fprintf(b.w, "var parents = ") 458 b.w.WriteArray(parents) 459 } 460 461 // writeKeys writes keys to a special index used by the display package. 462 // tags are assumed to be sorted by length. 463 func writeKeys(w *gen.CodeWriter, name string, keys []string) { 464 w.Size += int(3 * reflect.TypeOf("").Size()) 465 w.WriteComment("Number of keys: %d", len(keys)) 466 fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name) 467 for i := 2; i <= 4; i++ { 468 sub := []string{} 469 for _, t := range keys { 470 if len(t) != i { 471 break 472 } 473 sub = append(sub, t) 474 } 475 s := strings.Join(sub, "") 476 w.WriteString(s) 477 fmt.Fprintf(w, ",\n") 478 keys = keys[len(sub):] 479 } 480 fmt.Fprintln(w, "\t}") 481 if len(keys) > 0 { 482 w.Size += int(reflect.TypeOf([]string{}).Size()) 483 fmt.Fprintf(w, "\t%sTagsLong = ", name) 484 w.WriteSlice(keys) 485 } 486 fmt.Fprintln(w, ")\n") 487 } 488 489 // identifier creates an identifier from the given tag. 490 func identifier(t language.Tag) string { 491 return strings.Replace(t.String(), "-", "", -1) 492 } 493 494 func (h *header) writeEntry(w *gen.CodeWriter, name string) { 495 if len(dict) > 0 && dict.contains(h.tag) { 496 fmt.Fprintf(w, "\t{ // %s\n", h.tag) 497 fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name) 498 fmt.Fprintln(w, "\t},") 499 } else if len(h.data) == 0 { 500 fmt.Fprintln(w, "\t\t{}, //", h.tag) 501 } else { 502 fmt.Fprintf(w, "\t{ // %s\n", h.tag) 503 w.WriteString(h.data) 504 fmt.Fprintln(w, ",") 505 w.WriteSlice(h.index) 506 fmt.Fprintln(w, ",\n\t},") 507 } 508 } 509 510 // write the data for the given header as single entries. The size for this data 511 // was already accounted for in writeEntry. 512 func (h *header) writeSingle(w *gen.CodeWriter, name string) { 513 if len(dict) > 0 && dict.contains(h.tag) { 514 tag := identifier(h.tag) 515 w.WriteConst(tag+name+"Str", h.data) 516 517 // Note that we create a slice instead of an array. If we use an array 518 // we need to refer to it as a[:] in other tables, which will cause the 519 // array to always be included by the linker. See Issue 7651. 520 w.WriteVar(tag+name+"Idx", h.index) 521 } 522 } 523 524 // WriteTable writes an entry for a single Namer. 525 func (g *group) writeTable(w *gen.CodeWriter, name string) { 526 start := w.Size 527 writeKeys(w, name, g.toTags) 528 w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size()) 529 530 fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers)) 531 532 title := strings.Title(name) 533 for _, h := range g.headers { 534 h.writeEntry(w, title) 535 } 536 fmt.Fprintln(w, "}\n") 537 538 for _, h := range g.headers { 539 h.writeSingle(w, title) 540 } 541 n := w.Size - start 542 fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000) 543 } 544 545 func (b *builder) writeDictionaries() { 546 fmt.Fprintln(b.w, "// Dictionary entries of frequent languages") 547 fmt.Fprintln(b.w, "var (") 548 parents := parentIndices(b.supported) 549 550 for i, t := range b.supported { 551 if dict.contains(t) { 552 ident := identifier(t) 553 fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t) 554 if p := parents[i]; p == -1 { 555 fmt.Fprintln(b.w, "\t\tnil,") 556 } else { 557 fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p])) 558 } 559 fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident) 560 fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident) 561 fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident) 562 fmt.Fprintln(b.w, "\t}") 563 } 564 } 565 fmt.Fprintln(b.w, ")") 566 567 var s string 568 var a []uint16 569 sz := reflect.TypeOf(s).Size() 570 sz += reflect.TypeOf(a).Size() 571 sz *= 3 572 sz += reflect.TypeOf(&a).Size() 573 n := int(sz) * len(dict) 574 fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000) 575 576 b.w.Size += n 577 } 578 579 // unique sorts the given lists and removes duplicate entries by swapping them 580 // past position k, where k is the number of unique values. It returns k. 581 func unique(a sort.Interface) int { 582 if a.Len() == 0 { 583 return 0 584 } 585 sort.Sort(a) 586 k := 1 587 for i := 1; i < a.Len(); i++ { 588 if a.Less(k-1, i) { 589 if k != i { 590 a.Swap(k, i) 591 } 592 k++ 593 } 594 } 595 return k 596 }