github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/display/maketables.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 // Generator for display name tables. 8 9 package main 10 11 import ( 12 "bytes" 13 "flag" 14 "fmt" 15 "go/format" 16 "io" 17 "io/ioutil" 18 "log" 19 "net/http" 20 "os" 21 "path" 22 "path/filepath" 23 "reflect" 24 "sort" 25 "strings" 26 27 "golang.org/x/text/cldr" 28 "golang.org/x/text/language" 29 ) 30 31 var ( 32 url = flag.String("cldr", 33 "http://www.unicode.org/Public/cldr/"+cldr.Version+"/core.zip", 34 "URL of CLDR archive.") 35 iana = flag.String("iana", 36 "http://www.iana.org/assignments/language-subtag-registry", 37 "URL of IANA language subtag registry.") 38 test = flag.Bool("test", false, 39 "test existing tables; can be used to compare web data with package data.") 40 localDir = flag.String("local", 41 "", 42 "directory containing local data files; for debugging only.") 43 outputFile = flag.String("output", "tables.go", "output file") 44 45 stats = flag.Bool("stats", false, "prints statistics to stderr") 46 47 short = flag.Bool("short", false, `Use "short" alternatives, when available.`) 48 draft = flag.String("draft", 49 "contributed", 50 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) 51 pkg = flag.String("package", 52 "display", 53 "the name of the package in which the generated file is to be included") 54 55 tags = newTagSet("tags", 56 []language.Tag{}, 57 "space-separated list of tags to include or empty for all") 58 dict = newTagSet("dict", 59 dictTags(), 60 "space-separated list or tags for which to include a Dictionary. "+ 61 `"" means the common list from go.text/language.`) 62 ) 63 64 func dictTags() (tag []language.Tag) { 65 // TODO: replace with language.Common.Tags() once supported. 66 const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " + 67 "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " + 68 "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " + 69 "pt-PT ro ru si sk sl sq sr sv sw ta te th tr uk ur uz vi zh zh-Hans " + 70 "zh-Hant zu" 71 72 for _, s := range strings.Split(str, " ") { 73 tag = append(tag, language.MustParse(s)) 74 } 75 return tag 76 } 77 78 func main() { 79 flag.Parse() 80 81 // Read the CLDR zip file. 82 var r io.ReadCloser 83 if *localDir != "" { 84 dir, err := filepath.Abs(*localDir) 85 if err != nil { 86 log.Fatalf("Could not locate file: %v", err) 87 } 88 if r, err = os.Open(filepath.Join(dir, path.Base(*url))); err != nil { 89 log.Fatalf("Could not open file: %v", err) 90 } 91 } else { 92 resp, err := http.Get(*url) 93 if err != nil { 94 log.Fatalf("HTTP GET: %v", err) 95 } 96 if resp.StatusCode != 200 { 97 log.Fatalf("Bad GET status for %q: %q", *url, resp.Status) 98 } 99 r = resp.Body 100 } 101 defer r.Close() 102 103 d := &cldr.Decoder{} 104 d.SetDirFilter("main", "supplemental") 105 d.SetSectionFilter("localeDisplayNames") 106 data, err := d.DecodeZip(r) 107 if err != nil { 108 log.Fatalf("DecodeZip: %v", err) 109 } 110 111 var buf bytes.Buffer 112 b := builder{ 113 w: &buf, 114 data: data, 115 group: make(map[string]*group), 116 } 117 b.generate() 118 119 out, err := format.Source(buf.Bytes()) 120 if err != nil { 121 log.Fatalf("Could not format output: %v", err) 122 } 123 if err := ioutil.WriteFile(*outputFile, out, 0644); err != nil { 124 log.Fatalf("Could not write output: %v", err) 125 } 126 } 127 128 const tagForm = language.All 129 130 // tagSet is used to parse command line flags of tags. It implements the 131 // flag.Value interface. 132 type tagSet map[language.Tag]bool 133 134 func newTagSet(name string, tags []language.Tag, usage string) tagSet { 135 f := tagSet(make(map[language.Tag]bool)) 136 for _, t := range tags { 137 f[t] = true 138 } 139 flag.Var(f, name, usage) 140 return f 141 } 142 143 // String implements the String method of the flag.Value interface. 144 func (f tagSet) String() string { 145 tags := []string{} 146 for t := range f { 147 tags = append(tags, t.String()) 148 } 149 sort.Strings(tags) 150 return strings.Join(tags, " ") 151 } 152 153 // Set implements Set from the flag.Value interface. 154 func (f tagSet) Set(s string) error { 155 if s != "" { 156 for _, s := range strings.Split(s, " ") { 157 if s != "" { 158 tag, err := tagForm.Parse(s) 159 if err != nil { 160 return err 161 } 162 f[tag] = true 163 } 164 } 165 } 166 return nil 167 } 168 169 func (f tagSet) contains(t language.Tag) bool { 170 if len(f) == 0 { 171 return true 172 } 173 return f[t] 174 } 175 176 // builder is used to create all tables with display name information. 177 type builder struct { 178 w io.Writer 179 180 data *cldr.CLDR 181 182 fromLocs []string 183 184 // destination tags for the current locale. 185 toTags []string 186 toTagIndex map[string]int 187 188 // list of supported tags 189 supported []language.Tag 190 191 // key-value pairs per group 192 group map[string]*group 193 194 // statistics 195 sizeIndex int // total size of all indexes of headers 196 sizeData int // total size of all data of headers 197 totalSize int 198 } 199 200 type group struct { 201 // Maps from a given language to the Namer data for this language. 202 lang map[language.Tag]keyValues 203 headers []header 204 205 toTags []string 206 threeStart int 207 fourPlusStart int 208 } 209 210 // set sets the typ to the name for locale loc. 211 func (g *group) set(t language.Tag, typ, name string) { 212 kv := g.lang[t] 213 if kv == nil { 214 kv = make(keyValues) 215 g.lang[t] = kv 216 } 217 if kv[typ] == "" { 218 kv[typ] = name 219 } 220 } 221 222 type keyValues map[string]string 223 224 type header struct { 225 tag language.Tag 226 data string 227 index []uint16 228 } 229 230 var head = `// Generated by running 231 // maketables -url=%s 232 // automatically with go generate. 233 // DO NOT EDIT 234 235 package %s 236 237 // Version is the version of CLDR used to generate the data in this package. 238 const Version = %#v 239 240 ` 241 242 var self = language.MustParse("mul") 243 244 // generate builds and writes all tables. 245 func (b *builder) generate() { 246 fmt.Fprintf(b.w, head, *url, *pkg, cldr.Version) 247 248 b.filter() 249 b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 250 if ldn.Languages != nil { 251 for _, v := range ldn.Languages.Language { 252 tag := tagForm.MustParse(v.Type) 253 if tags.contains(tag) { 254 g.set(loc, tag.String(), v.Data()) 255 } 256 } 257 } 258 }) 259 b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 260 if ldn.Scripts != nil { 261 for _, v := range ldn.Scripts.Script { 262 g.set(loc, language.MustParseScript(v.Type).String(), v.Data()) 263 } 264 } 265 }) 266 b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) { 267 if ldn.Territories != nil { 268 for _, v := range ldn.Territories.Territory { 269 g.set(loc, language.MustParseRegion(v.Type).String(), v.Data()) 270 } 271 } 272 }) 273 274 b.makeSupported() 275 276 n := b.writeParents() 277 278 n += b.writeGroup("lang") 279 n += b.writeGroup("script") 280 n += b.writeGroup("region") 281 282 b.writeSupported() 283 284 n += b.writeDictionaries() 285 286 b.supported = []language.Tag{self} 287 288 // Compute the names of locales in their own language. Some of these names 289 // may be specified in their parent locales. We iterate the maximum depth 290 // of the parent three times to match successive parents of tags until a 291 // possible match is found. 292 for i := 0; i < 4; i++ { 293 b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) { 294 parent := tag 295 if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) { 296 parent, _ = language.Raw.Compose(b) 297 } 298 if ldn.Languages != nil { 299 for _, v := range ldn.Languages.Language { 300 key := tagForm.MustParse(v.Type) 301 saved := key 302 if key == parent { 303 g.set(self, tag.String(), v.Data()) 304 } 305 for k := 0; k < i; k++ { 306 key = key.Parent() 307 } 308 if key == tag { 309 g.set(self, saved.String(), v.Data()) // set does not overwrite a value. 310 } 311 } 312 } 313 }) 314 } 315 316 n += b.writeGroup("self") 317 318 fmt.Fprintf(b.w, "// TOTAL %d Bytes (%d KB)", n, n/1000) 319 } 320 321 func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) { 322 b.sizeIndex = 0 323 b.sizeData = 0 324 b.toTags = nil 325 b.fromLocs = nil 326 b.toTagIndex = make(map[string]int) 327 328 g := b.group[name] 329 if g == nil { 330 g = &group{lang: make(map[language.Tag]keyValues)} 331 b.group[name] = g 332 } 333 for _, loc := range b.data.Locales() { 334 // We use RawLDML instead of LDML as we are managing our own inheritance 335 // in this implementation. 336 ldml := b.data.RawLDML(loc) 337 338 // We do not support the POSIX variant (it is not a supported BCP 47 339 // variant). This locale also doesn't happen to contain any data, so 340 // we'll skip it by checking for this. 341 tag, err := tagForm.Parse(loc) 342 if err != nil { 343 if ldml.LocaleDisplayNames != nil { 344 log.Fatalf("setData: %v", err) 345 } 346 continue 347 } 348 if ldml.LocaleDisplayNames != nil && tags.contains(tag) { 349 f(g, tag, ldml.LocaleDisplayNames) 350 } 351 } 352 } 353 354 func (b *builder) filter() { 355 filter := func(s *cldr.Slice) { 356 if *short { 357 s.SelectOnePerGroup("alt", []string{"short", ""}) 358 } else { 359 s.SelectOnePerGroup("alt", []string{"stand-alone", ""}) 360 } 361 d, err := cldr.ParseDraft(*draft) 362 if err != nil { 363 log.Fatalf("filter: %v", err) 364 } 365 s.SelectDraft(d) 366 } 367 for _, loc := range b.data.Locales() { 368 if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil { 369 if ldn.Languages != nil { 370 s := cldr.MakeSlice(&ldn.Languages.Language) 371 if filter(&s); len(ldn.Languages.Language) == 0 { 372 ldn.Languages = nil 373 } 374 } 375 if ldn.Scripts != nil { 376 s := cldr.MakeSlice(&ldn.Scripts.Script) 377 if filter(&s); len(ldn.Scripts.Script) == 0 { 378 ldn.Scripts = nil 379 } 380 } 381 if ldn.Territories != nil { 382 s := cldr.MakeSlice(&ldn.Territories.Territory) 383 if filter(&s); len(ldn.Territories.Territory) == 0 { 384 ldn.Territories = nil 385 } 386 } 387 } 388 } 389 } 390 391 // makeSupported creates a list of all supported locales. 392 func (b *builder) makeSupported() { 393 // tags across groups 394 for _, g := range b.group { 395 for t, _ := range g.lang { 396 b.supported = append(b.supported, t) 397 } 398 } 399 b.supported = b.supported[:unique(tagsSorter(b.supported))] 400 401 } 402 403 type tagsSorter []language.Tag 404 405 func (a tagsSorter) Len() int { return len(a) } 406 func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 407 func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() } 408 409 func (b *builder) writeGroup(name string) int { 410 g := b.group[name] 411 412 for _, kv := range g.lang { 413 for t, _ := range kv { 414 g.toTags = append(g.toTags, t) 415 } 416 } 417 g.toTags = g.toTags[:unique(tagsBySize(g.toTags))] 418 419 // Allocate header per supported value. 420 g.headers = make([]header, len(b.supported)) 421 for i, sup := range b.supported { 422 kv, ok := g.lang[sup] 423 if !ok { 424 g.headers[i].tag = sup 425 continue 426 } 427 data := []byte{} 428 index := make([]uint16, len(g.toTags), len(g.toTags)+1) 429 for j, t := range g.toTags { 430 index[j] = uint16(len(data)) 431 data = append(data, kv[t]...) 432 } 433 index = append(index, uint16(len(data))) 434 435 // Trim the tail of the index. 436 // TODO: indexes can be reduced in size quite a bit more. 437 n := len(index) 438 for ; n >= 2 && index[n-2] == index[n-1]; n-- { 439 } 440 index = index[:n] 441 442 // Workaround for a bug in CLDR 26. 443 // See http://unicode.org/cldr/trac/ticket/8042. 444 if cldr.Version == "26" && sup.String() == "hsb" { 445 data = bytes.Replace(data, []byte{'"'}, nil, 1) 446 } 447 g.headers[i] = header{sup, string(data), index} 448 } 449 return g.writeTable(b.w, name) 450 } 451 452 type tagsBySize []string 453 454 func (l tagsBySize) Len() int { return len(l) } 455 func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] } 456 func (l tagsBySize) Less(i, j int) bool { 457 a, b := l[i], l[j] 458 // Sort single-tag entries based on size first. Otherwise alphabetic. 459 if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) { 460 return len(a) < len(b) 461 } 462 return a < b 463 } 464 465 func (b *builder) writeSupported() { 466 fmt.Fprintf(b.w, "const numSupported = %d\n", len(b.supported)) 467 fmt.Fprint(b.w, "const supported = \"\" +\n\t\"") 468 n := 0 469 for _, t := range b.supported { 470 s := t.String() 471 if n += len(s) + 1; n > 80 { 472 n = len(s) + 1 473 fmt.Fprint(b.w, "\" + \n\t\"") 474 } 475 fmt.Fprintf(b.w, "%s|", s) 476 } 477 fmt.Fprintln(b.w, "\"\n") 478 } 479 480 // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent 481 // of tags[i]. 482 func parentIndices(tags []language.Tag) []int { 483 index := make(map[language.Tag]int) 484 for i, t := range tags { 485 index[t] = int(i) 486 } 487 488 // Construct default parents. 489 parents := make([]int, len(tags)) 490 for i, t := range tags { 491 parents[i] = -1 492 for t = t.Parent(); t != language.Und; t = t.Parent() { 493 if j, ok := index[t]; ok { 494 parents[i] = j 495 break 496 } 497 } 498 } 499 return parents 500 } 501 502 func (b *builder) writeParents() int { 503 parents := parentIndices(b.supported) 504 505 fmt.Fprintf(b.w, "// parent relationship: %d entries\n", len(parents)) 506 fmt.Fprintf(b.w, "var parents = [%d]int16{", len(parents)) 507 for i, v := range parents { 508 if i%12 == 0 { 509 fmt.Fprint(b.w, "\n\t") 510 } 511 fmt.Fprintf(b.w, "%d, ", v) 512 } 513 fmt.Fprintln(b.w, "}\n") 514 return len(parents) * 2 515 } 516 517 // writeKeys writes keys to a special index used by the display package. 518 // tags are assumed to be sorted by length. 519 func writeKeys(w io.Writer, name string, keys []string) (n int) { 520 n = int(3 * reflect.TypeOf("").Size()) 521 fmt.Fprintf(w, "// Number of keys: %d\n", len(keys)) 522 fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name) 523 for i := 2; i <= 4; i++ { 524 sub := []string{} 525 for _, t := range keys { 526 if len(t) != i { 527 break 528 } 529 sub = append(sub, t) 530 } 531 s := strings.Join(sub, "") 532 n += len(s) 533 fmt.Fprintf(w, "\t\t%+q,\n", s) 534 keys = keys[len(sub):] 535 } 536 fmt.Fprintln(w, "\t}") 537 if len(keys) > 0 { 538 fmt.Fprintf(w, "\t%sTagsLong = %#v\n", name, keys) 539 n += len(keys) * int(reflect.TypeOf("").Size()) 540 n += len(strings.Join(keys, "")) 541 n += int(reflect.TypeOf([]string{}).Size()) 542 } 543 fmt.Fprintln(w, ")\n") 544 return n 545 } 546 547 func writeString(w io.Writer, s string) { 548 k := 0 549 fmt.Fprint(w, "\t\t\"") 550 for _, r := range s { 551 fmt.Fprint(w, string(r)) 552 if k++; k == 80 { 553 fmt.Fprint(w, "\" +\n\t\t\"") 554 k = 0 555 } 556 } 557 fmt.Fprint(w, `"`) 558 } 559 560 func writeUint16Body(w io.Writer, a []uint16) { 561 for v := a; len(v) > 0; { 562 vv := v 563 const nPerLine = 12 564 if len(vv) > nPerLine { 565 vv = v[:nPerLine] 566 v = v[nPerLine:] 567 } else { 568 v = nil 569 } 570 fmt.Fprintf(w, "\t\t\t") 571 for _, x := range vv { 572 fmt.Fprintf(w, "0x%x, ", x) 573 } 574 fmt.Fprintln(w) 575 } 576 } 577 578 // identifier creates an identifier from the given tag. 579 func identifier(t language.Tag) string { 580 return strings.Replace(t.String(), "-", "", -1) 581 } 582 583 func (h *header) writeEntry(w io.Writer, name string) int { 584 n := int(reflect.TypeOf(h.data).Size()) 585 n += int(reflect.TypeOf(h.index).Size()) 586 n += len(h.data) 587 n += len(h.index) * 2 588 589 if len(dict) > 0 && dict.contains(h.tag) { 590 fmt.Fprintf(w, "\t{ // %s\n", h.tag) 591 fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name) 592 n += int(reflect.TypeOf(h.index).Size()) 593 fmt.Fprintln(w, "\t},") 594 } else if len(h.data) == 0 { 595 fmt.Fprintln(w, "\t\t{}, //", h.tag) 596 } else { 597 fmt.Fprintf(w, "\t{ // %s\n", h.tag) 598 writeString(w, h.data) 599 fmt.Fprintln(w, ",") 600 601 fmt.Fprintf(w, "\t\t[]uint16{ // %d entries\n", len(h.index)) 602 writeUint16Body(w, h.index) 603 fmt.Fprintln(w, "\t\t},") 604 fmt.Fprintln(w, "\t},") 605 } 606 607 return n 608 } 609 610 // write the data for the given header as single entries. The size for this data 611 // was already accounted for in writeEntry. 612 func (h *header) writeSingle(w io.Writer, name string) { 613 if len(dict) > 0 && dict.contains(h.tag) { 614 tag := identifier(h.tag) 615 fmt.Fprintf(w, "const %s%sStr = \"\" +\n", tag, name) 616 writeString(w, h.data) 617 fmt.Fprintln(w, "\n") 618 619 // Note that we create a slice instead of an array. If we use an array 620 // we need to refer to it as a[:] in other tables, which will cause the 621 // array to always be included by the linker. See Issue 7651. 622 fmt.Fprintf(w, "var %s%sIdx = []uint16{ // %d entries\n", tag, name, len(h.index)) 623 writeUint16Body(w, h.index) 624 fmt.Fprintln(w, "}\n") 625 } 626 } 627 628 // WriteTable writes an entry for a single Namer. 629 func (g *group) writeTable(w io.Writer, name string) int { 630 n := writeKeys(w, name, g.toTags) 631 fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers)) 632 633 title := strings.Title(name) 634 for _, h := range g.headers { 635 n += h.writeEntry(w, title) 636 } 637 fmt.Fprintln(w, "}\n") 638 639 for _, h := range g.headers { 640 h.writeSingle(w, title) 641 } 642 643 fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000) 644 return n 645 } 646 647 func (b *builder) writeDictionaries() int { 648 fmt.Fprintln(b.w, "// Dictionary entries of frequent languages") 649 fmt.Fprintln(b.w, "var (") 650 parents := parentIndices(b.supported) 651 652 for i, t := range b.supported { 653 if dict.contains(t) { 654 ident := identifier(t) 655 fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t) 656 if p := parents[i]; p == -1 { 657 fmt.Fprintln(b.w, "\t\tnil,") 658 } else { 659 fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p])) 660 } 661 fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident) 662 fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident) 663 fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident) 664 fmt.Fprintln(b.w, "\t}") 665 } 666 } 667 fmt.Fprintln(b.w, ")") 668 669 var s string 670 var a []uint16 671 sz := reflect.TypeOf(s).Size() 672 sz += reflect.TypeOf(a).Size() 673 sz *= 3 674 sz += reflect.TypeOf(&a).Size() 675 n := int(sz) * len(dict) 676 fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000) 677 678 return n 679 } 680 681 // unique sorts the given lists and removes duplicate entries by swapping them 682 // past position k, where k is the number of unique values. It returns k. 683 func unique(a sort.Interface) int { 684 if a.Len() == 0 { 685 return 0 686 } 687 sort.Sort(a) 688 k := 1 689 for i := 1; i < a.Len(); i++ { 690 if a.Less(k-1, i) { 691 if k != i { 692 a.Swap(k, i) 693 } 694 k++ 695 } 696 } 697 return k 698 }