github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/language/display/maketables.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  // Generator for display name tables.
     8  
     9  package main
    10  
    11  import (
    12  	"bytes"
    13  	"flag"
    14  	"fmt"
    15  	"log"
    16  	"reflect"
    17  	"sort"
    18  	"strings"
    19  
    20  	"golang.org/x/text/internal/gen"
    21  	"golang.org/x/text/language"
    22  	"golang.org/x/text/unicode/cldr"
    23  )
    24  
    25  var (
    26  	test = flag.Bool("test", false,
    27  		"test existing tables; can be used to compare web data with package data.")
    28  	outputFile = flag.String("output", "tables.go", "output file")
    29  
    30  	stats = flag.Bool("stats", false, "prints statistics to stderr")
    31  
    32  	short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
    33  	draft = flag.String("draft",
    34  		"contributed",
    35  		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
    36  	pkg = flag.String("package",
    37  		"display",
    38  		"the name of the package in which the generated file is to be included")
    39  
    40  	tags = newTagSet("tags",
    41  		[]language.Tag{},
    42  		"space-separated list of tags to include or empty for all")
    43  	dict = newTagSet("dict",
    44  		dictTags(),
    45  		"space-separated list or tags for which to include a Dictionary. "+
    46  			`"" means the common list from go.text/language.`)
    47  )
    48  
    49  func dictTags() (tag []language.Tag) {
    50  	// TODO: replace with language.Common.Tags() once supported.
    51  	const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
    52  		"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
    53  		"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
    54  		"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
    55  		"zh zh-Hans zh-Hant zu"
    56  
    57  	for _, s := range strings.Split(str, " ") {
    58  		tag = append(tag, language.MustParse(s))
    59  	}
    60  	return tag
    61  }
    62  
    63  func main() {
    64  	gen.Init()
    65  
    66  	// Read the CLDR zip file.
    67  	r := gen.OpenCLDRCoreZip()
    68  	defer r.Close()
    69  
    70  	d := &cldr.Decoder{}
    71  	d.SetDirFilter("main", "supplemental")
    72  	d.SetSectionFilter("localeDisplayNames")
    73  	data, err := d.DecodeZip(r)
    74  	if err != nil {
    75  		log.Fatalf("DecodeZip: %v", err)
    76  	}
    77  
    78  	w := gen.NewCodeWriter()
    79  	defer w.WriteGoFile(*outputFile, "display")
    80  
    81  	gen.WriteCLDRVersion(w)
    82  
    83  	b := builder{
    84  		w:     w,
    85  		data:  data,
    86  		group: make(map[string]*group),
    87  	}
    88  	b.generate()
    89  }
    90  
    91  const tagForm = language.All
    92  
    93  // tagSet is used to parse command line flags of tags. It implements the
    94  // flag.Value interface.
    95  type tagSet map[language.Tag]bool
    96  
    97  func newTagSet(name string, tags []language.Tag, usage string) tagSet {
    98  	f := tagSet(make(map[language.Tag]bool))
    99  	for _, t := range tags {
   100  		f[t] = true
   101  	}
   102  	flag.Var(f, name, usage)
   103  	return f
   104  }
   105  
   106  // String implements the String method of the flag.Value interface.
   107  func (f tagSet) String() string {
   108  	tags := []string{}
   109  	for t := range f {
   110  		tags = append(tags, t.String())
   111  	}
   112  	sort.Strings(tags)
   113  	return strings.Join(tags, " ")
   114  }
   115  
   116  // Set implements Set from the flag.Value interface.
   117  func (f tagSet) Set(s string) error {
   118  	if s != "" {
   119  		for _, s := range strings.Split(s, " ") {
   120  			if s != "" {
   121  				tag, err := tagForm.Parse(s)
   122  				if err != nil {
   123  					return err
   124  				}
   125  				f[tag] = true
   126  			}
   127  		}
   128  	}
   129  	return nil
   130  }
   131  
   132  func (f tagSet) contains(t language.Tag) bool {
   133  	if len(f) == 0 {
   134  		return true
   135  	}
   136  	return f[t]
   137  }
   138  
   139  // builder is used to create all tables with display name information.
   140  type builder struct {
   141  	w *gen.CodeWriter
   142  
   143  	data *cldr.CLDR
   144  
   145  	fromLocs []string
   146  
   147  	// destination tags for the current locale.
   148  	toTags     []string
   149  	toTagIndex map[string]int
   150  
   151  	// list of supported tags
   152  	supported []language.Tag
   153  
   154  	// key-value pairs per group
   155  	group map[string]*group
   156  
   157  	// statistics
   158  	sizeIndex int // total size of all indexes of headers
   159  	sizeData  int // total size of all data of headers
   160  	totalSize int
   161  }
   162  
   163  type group struct {
   164  	// Maps from a given language to the Namer data for this language.
   165  	lang    map[language.Tag]keyValues
   166  	headers []header
   167  
   168  	toTags        []string
   169  	threeStart    int
   170  	fourPlusStart int
   171  }
   172  
   173  // set sets the typ to the name for locale loc.
   174  func (g *group) set(t language.Tag, typ, name string) {
   175  	kv := g.lang[t]
   176  	if kv == nil {
   177  		kv = make(keyValues)
   178  		g.lang[t] = kv
   179  	}
   180  	if kv[typ] == "" {
   181  		kv[typ] = name
   182  	}
   183  }
   184  
   185  type keyValues map[string]string
   186  
   187  type header struct {
   188  	tag   language.Tag
   189  	data  string
   190  	index []uint16
   191  }
   192  
   193  var versionInfo = `// Version is deprecated. Use CLDRVersion.
   194  const Version = %#v
   195  
   196  `
   197  
   198  var self = language.MustParse("mul")
   199  
   200  // generate builds and writes all tables.
   201  func (b *builder) generate() {
   202  	fmt.Fprintf(b.w, versionInfo, cldr.Version)
   203  
   204  	b.filter()
   205  	b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
   206  		if ldn.Languages != nil {
   207  			for _, v := range ldn.Languages.Language {
   208  				tag := tagForm.MustParse(v.Type)
   209  				if tags.contains(tag) {
   210  					g.set(loc, tag.String(), v.Data())
   211  				}
   212  			}
   213  		}
   214  	})
   215  	b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
   216  		if ldn.Scripts != nil {
   217  			for _, v := range ldn.Scripts.Script {
   218  				code := language.MustParseScript(v.Type)
   219  				if code.IsPrivateUse() { // Qaaa..Qabx
   220  					// TODO: data currently appears to be very meager.
   221  					// Reconsider if we have data for English.
   222  					if loc == language.English {
   223  						log.Fatal("Consider including data for private use scripts.")
   224  					}
   225  					continue
   226  				}
   227  				g.set(loc, code.String(), v.Data())
   228  			}
   229  		}
   230  	})
   231  	b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
   232  		if ldn.Territories != nil {
   233  			for _, v := range ldn.Territories.Territory {
   234  				g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
   235  			}
   236  		}
   237  	})
   238  
   239  	b.makeSupported()
   240  
   241  	b.writeParents()
   242  
   243  	b.writeGroup("lang")
   244  	b.writeGroup("script")
   245  	b.writeGroup("region")
   246  
   247  	b.w.WriteConst("numSupported", len(b.supported))
   248  	buf := bytes.Buffer{}
   249  	for _, tag := range b.supported {
   250  		fmt.Fprint(&buf, tag.String(), "|")
   251  	}
   252  	b.w.WriteConst("supported", buf.String())
   253  
   254  	b.writeDictionaries()
   255  
   256  	b.supported = []language.Tag{self}
   257  
   258  	// Compute the names of locales in their own language. Some of these names
   259  	// may be specified in their parent locales. We iterate the maximum depth
   260  	// of the parent three times to match successive parents of tags until a
   261  	// possible match is found.
   262  	for i := 0; i < 4; i++ {
   263  		b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
   264  			parent := tag
   265  			if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
   266  				parent, _ = language.Raw.Compose(b)
   267  			}
   268  			if ldn.Languages != nil {
   269  				for _, v := range ldn.Languages.Language {
   270  					key := tagForm.MustParse(v.Type)
   271  					saved := key
   272  					if key == parent {
   273  						g.set(self, tag.String(), v.Data())
   274  					}
   275  					for k := 0; k < i; k++ {
   276  						key = key.Parent()
   277  					}
   278  					if key == tag {
   279  						g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
   280  					}
   281  				}
   282  			}
   283  		})
   284  	}
   285  
   286  	b.writeGroup("self")
   287  }
   288  
   289  func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
   290  	b.sizeIndex = 0
   291  	b.sizeData = 0
   292  	b.toTags = nil
   293  	b.fromLocs = nil
   294  	b.toTagIndex = make(map[string]int)
   295  
   296  	g := b.group[name]
   297  	if g == nil {
   298  		g = &group{lang: make(map[language.Tag]keyValues)}
   299  		b.group[name] = g
   300  	}
   301  	for _, loc := range b.data.Locales() {
   302  		// We use RawLDML instead of LDML as we are managing our own inheritance
   303  		// in this implementation.
   304  		ldml := b.data.RawLDML(loc)
   305  
   306  		// We do not support the POSIX variant (it is not a supported BCP 47
   307  		// variant). This locale also doesn't happen to contain any data, so
   308  		// we'll skip it by checking for this.
   309  		tag, err := tagForm.Parse(loc)
   310  		if err != nil {
   311  			if ldml.LocaleDisplayNames != nil {
   312  				log.Fatalf("setData: %v", err)
   313  			}
   314  			continue
   315  		}
   316  		if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
   317  			f(g, tag, ldml.LocaleDisplayNames)
   318  		}
   319  	}
   320  }
   321  
   322  func (b *builder) filter() {
   323  	filter := func(s *cldr.Slice) {
   324  		if *short {
   325  			s.SelectOnePerGroup("alt", []string{"short", ""})
   326  		} else {
   327  			s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
   328  		}
   329  		d, err := cldr.ParseDraft(*draft)
   330  		if err != nil {
   331  			log.Fatalf("filter: %v", err)
   332  		}
   333  		s.SelectDraft(d)
   334  	}
   335  	for _, loc := range b.data.Locales() {
   336  		if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
   337  			if ldn.Languages != nil {
   338  				s := cldr.MakeSlice(&ldn.Languages.Language)
   339  				if filter(&s); len(ldn.Languages.Language) == 0 {
   340  					ldn.Languages = nil
   341  				}
   342  			}
   343  			if ldn.Scripts != nil {
   344  				s := cldr.MakeSlice(&ldn.Scripts.Script)
   345  				if filter(&s); len(ldn.Scripts.Script) == 0 {
   346  					ldn.Scripts = nil
   347  				}
   348  			}
   349  			if ldn.Territories != nil {
   350  				s := cldr.MakeSlice(&ldn.Territories.Territory)
   351  				if filter(&s); len(ldn.Territories.Territory) == 0 {
   352  					ldn.Territories = nil
   353  				}
   354  			}
   355  		}
   356  	}
   357  }
   358  
   359  // makeSupported creates a list of all supported locales.
   360  func (b *builder) makeSupported() {
   361  	// tags across groups
   362  	for _, g := range b.group {
   363  		for t, _ := range g.lang {
   364  			b.supported = append(b.supported, t)
   365  		}
   366  	}
   367  	b.supported = b.supported[:unique(tagsSorter(b.supported))]
   368  
   369  }
   370  
   371  type tagsSorter []language.Tag
   372  
   373  func (a tagsSorter) Len() int           { return len(a) }
   374  func (a tagsSorter) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   375  func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
   376  
   377  func (b *builder) writeGroup(name string) {
   378  	g := b.group[name]
   379  
   380  	for _, kv := range g.lang {
   381  		for t, _ := range kv {
   382  			g.toTags = append(g.toTags, t)
   383  		}
   384  	}
   385  	g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
   386  
   387  	// Allocate header per supported value.
   388  	g.headers = make([]header, len(b.supported))
   389  	for i, sup := range b.supported {
   390  		kv, ok := g.lang[sup]
   391  		if !ok {
   392  			g.headers[i].tag = sup
   393  			continue
   394  		}
   395  		data := []byte{}
   396  		index := make([]uint16, len(g.toTags), len(g.toTags)+1)
   397  		for j, t := range g.toTags {
   398  			index[j] = uint16(len(data))
   399  			data = append(data, kv[t]...)
   400  		}
   401  		index = append(index, uint16(len(data)))
   402  
   403  		// Trim the tail of the index.
   404  		// TODO: indexes can be reduced in size quite a bit more.
   405  		n := len(index)
   406  		for ; n >= 2 && index[n-2] == index[n-1]; n-- {
   407  		}
   408  		index = index[:n]
   409  
   410  		// Workaround for a bug in CLDR 26.
   411  		// See http://unicode.org/cldr/trac/ticket/8042.
   412  		if cldr.Version == "26" && sup.String() == "hsb" {
   413  			data = bytes.Replace(data, []byte{'"'}, nil, 1)
   414  		}
   415  		g.headers[i] = header{sup, string(data), index}
   416  	}
   417  	g.writeTable(b.w, name)
   418  }
   419  
   420  type tagsBySize []string
   421  
   422  func (l tagsBySize) Len() int      { return len(l) }
   423  func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
   424  func (l tagsBySize) Less(i, j int) bool {
   425  	a, b := l[i], l[j]
   426  	// Sort single-tag entries based on size first. Otherwise alphabetic.
   427  	if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
   428  		return len(a) < len(b)
   429  	}
   430  	return a < b
   431  }
   432  
   433  // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
   434  // of tags[i].
   435  func parentIndices(tags []language.Tag) []int16 {
   436  	index := make(map[language.Tag]int16)
   437  	for i, t := range tags {
   438  		index[t] = int16(i)
   439  	}
   440  
   441  	// Construct default parents.
   442  	parents := make([]int16, len(tags))
   443  	for i, t := range tags {
   444  		parents[i] = -1
   445  		for t = t.Parent(); t != language.Und; t = t.Parent() {
   446  			if j, ok := index[t]; ok {
   447  				parents[i] = j
   448  				break
   449  			}
   450  		}
   451  	}
   452  	return parents
   453  }
   454  
   455  func (b *builder) writeParents() {
   456  	parents := parentIndices(b.supported)
   457  	fmt.Fprintf(b.w, "var parents = ")
   458  	b.w.WriteArray(parents)
   459  }
   460  
   461  // writeKeys writes keys to a special index used by the display package.
   462  // tags are assumed to be sorted by length.
   463  func writeKeys(w *gen.CodeWriter, name string, keys []string) {
   464  	w.Size += int(3 * reflect.TypeOf("").Size())
   465  	w.WriteComment("Number of keys: %d", len(keys))
   466  	fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
   467  	for i := 2; i <= 4; i++ {
   468  		sub := []string{}
   469  		for _, t := range keys {
   470  			if len(t) != i {
   471  				break
   472  			}
   473  			sub = append(sub, t)
   474  		}
   475  		s := strings.Join(sub, "")
   476  		w.WriteString(s)
   477  		fmt.Fprintf(w, ",\n")
   478  		keys = keys[len(sub):]
   479  	}
   480  	fmt.Fprintln(w, "\t}")
   481  	if len(keys) > 0 {
   482  		w.Size += int(reflect.TypeOf([]string{}).Size())
   483  		fmt.Fprintf(w, "\t%sTagsLong = ", name)
   484  		w.WriteSlice(keys)
   485  	}
   486  	fmt.Fprintln(w, ")\n")
   487  }
   488  
   489  // identifier creates an identifier from the given tag.
   490  func identifier(t language.Tag) string {
   491  	return strings.Replace(t.String(), "-", "", -1)
   492  }
   493  
   494  func (h *header) writeEntry(w *gen.CodeWriter, name string) {
   495  	if len(dict) > 0 && dict.contains(h.tag) {
   496  		fmt.Fprintf(w, "\t{ // %s\n", h.tag)
   497  		fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
   498  		fmt.Fprintln(w, "\t},")
   499  	} else if len(h.data) == 0 {
   500  		fmt.Fprintln(w, "\t\t{}, //", h.tag)
   501  	} else {
   502  		fmt.Fprintf(w, "\t{ // %s\n", h.tag)
   503  		w.WriteString(h.data)
   504  		fmt.Fprintln(w, ",")
   505  		w.WriteSlice(h.index)
   506  		fmt.Fprintln(w, ",\n\t},")
   507  	}
   508  }
   509  
   510  // write the data for the given header as single entries. The size for this data
   511  // was already accounted for in writeEntry.
   512  func (h *header) writeSingle(w *gen.CodeWriter, name string) {
   513  	if len(dict) > 0 && dict.contains(h.tag) {
   514  		tag := identifier(h.tag)
   515  		w.WriteConst(tag+name+"Str", h.data)
   516  
   517  		// Note that we create a slice instead of an array. If we use an array
   518  		// we need to refer to it as a[:] in other tables, which will cause the
   519  		// array to always be included by the linker. See Issue 7651.
   520  		w.WriteVar(tag+name+"Idx", h.index)
   521  	}
   522  }
   523  
   524  // WriteTable writes an entry for a single Namer.
   525  func (g *group) writeTable(w *gen.CodeWriter, name string) {
   526  	start := w.Size
   527  	writeKeys(w, name, g.toTags)
   528  	w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
   529  
   530  	fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
   531  
   532  	title := strings.Title(name)
   533  	for _, h := range g.headers {
   534  		h.writeEntry(w, title)
   535  	}
   536  	fmt.Fprintln(w, "}\n")
   537  
   538  	for _, h := range g.headers {
   539  		h.writeSingle(w, title)
   540  	}
   541  	n := w.Size - start
   542  	fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
   543  }
   544  
   545  func (b *builder) writeDictionaries() {
   546  	fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
   547  	fmt.Fprintln(b.w, "var (")
   548  	parents := parentIndices(b.supported)
   549  
   550  	for i, t := range b.supported {
   551  		if dict.contains(t) {
   552  			ident := identifier(t)
   553  			fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
   554  			if p := parents[i]; p == -1 {
   555  				fmt.Fprintln(b.w, "\t\tnil,")
   556  			} else {
   557  				fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
   558  			}
   559  			fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
   560  			fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
   561  			fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
   562  			fmt.Fprintln(b.w, "\t}")
   563  		}
   564  	}
   565  	fmt.Fprintln(b.w, ")")
   566  
   567  	var s string
   568  	var a []uint16
   569  	sz := reflect.TypeOf(s).Size()
   570  	sz += reflect.TypeOf(a).Size()
   571  	sz *= 3
   572  	sz += reflect.TypeOf(&a).Size()
   573  	n := int(sz) * len(dict)
   574  	fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
   575  
   576  	b.w.Size += n
   577  }
   578  
   579  // unique sorts the given lists and removes duplicate entries by swapping them
   580  // past position k, where k is the number of unique values. It returns k.
   581  func unique(a sort.Interface) int {
   582  	if a.Len() == 0 {
   583  		return 0
   584  	}
   585  	sort.Sort(a)
   586  	k := 1
   587  	for i := 1; i < a.Len(); i++ {
   588  		if a.Less(k-1, i) {
   589  			if k != i {
   590  				a.Swap(k, i)
   591  			}
   592  			k++
   593  		}
   594  	}
   595  	return k
   596  }