github.com/go-enjin/golang-org-x-text@v0.12.1-enjin.2/internal/number/gen.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  // +build ignore
     7  
     8  package main
     9  
    10  import (
    11  	"flag"
    12  	"fmt"
    13  	"log"
    14  	"reflect"
    15  	"strings"
    16  	"unicode/utf8"
    17  
    18  	"github.com/go-enjin/golang-org-x-text/internal/gen"
    19  	"github.com/go-enjin/golang-org-x-text/internal/language"
    20  	"github.com/go-enjin/golang-org-x-text/internal/language/compact"
    21  	"github.com/go-enjin/golang-org-x-text/internal/number"
    22  	"github.com/go-enjin/golang-org-x-text/internal/stringset"
    23  	"github.com/go-enjin/golang-org-x-text/unicode/cldr"
    24  )
    25  
    26  var (
    27  	test = flag.Bool("test", false,
    28  		"test existing tables; can be used to compare web data with package data.")
    29  	outputFile     = flag.String("output", "tables.go", "output file")
    30  	outputTestFile = flag.String("testoutput", "data_test.go", "output file")
    31  
    32  	draft = flag.String("draft",
    33  		"contributed",
    34  		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
    35  )
    36  
    37  func main() {
    38  	gen.Init()
    39  
    40  	const pkg = "number"
    41  
    42  	gen.Repackage("gen_common.go", "common.go", pkg)
    43  	// Read the CLDR zip file.
    44  	r := gen.OpenCLDRCoreZip()
    45  	defer r.Close()
    46  
    47  	d := &cldr.Decoder{}
    48  	d.SetDirFilter("supplemental", "main")
    49  	d.SetSectionFilter("numbers", "numberingSystem")
    50  	data, err := d.DecodeZip(r)
    51  	if err != nil {
    52  		log.Fatalf("DecodeZip: %v", err)
    53  	}
    54  
    55  	w := gen.NewCodeWriter()
    56  	defer w.WriteGoFile(*outputFile, pkg)
    57  
    58  	fmt.Fprintln(w, `import "github.com/go-enjin/golang-org-x-text/internal/stringset"`)
    59  
    60  	gen.WriteCLDRVersion(w)
    61  
    62  	genNumSystem(w, data)
    63  	genSymbols(w, data)
    64  	genFormats(w, data)
    65  }
    66  
    67  var systemMap = map[string]system{"latn": 0}
    68  
    69  func getNumberSystem(str string) system {
    70  	ns, ok := systemMap[str]
    71  	if !ok {
    72  		log.Fatalf("No index for numbering system %q", str)
    73  	}
    74  	return ns
    75  }
    76  
    77  func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
    78  	numSysData := []systemData{
    79  		{digitSize: 1, zero: [4]byte{'0'}},
    80  	}
    81  
    82  	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
    83  		if len(ns.Digits) == 0 {
    84  			continue
    85  		}
    86  		switch ns.Id {
    87  		case "latn":
    88  			// hard-wired
    89  			continue
    90  		case "hanidec":
    91  			// non-consecutive digits: treat as "algorithmic"
    92  			continue
    93  		}
    94  
    95  		zero, sz := utf8.DecodeRuneInString(ns.Digits)
    96  		if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
    97  			log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
    98  		}
    99  
   100  		i := rune(0)
   101  		for _, r := range ns.Digits {
   102  			// Verify that we can do simple math on the UTF-8 byte sequence
   103  			// of zero to get the digit.
   104  			if zero+i != r {
   105  				// Runes not consecutive.
   106  				log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
   107  			}
   108  			i++
   109  		}
   110  		var x [utf8.UTFMax]byte
   111  		utf8.EncodeRune(x[:], zero)
   112  		id := system(len(numSysData))
   113  		systemMap[ns.Id] = id
   114  		numSysData = append(numSysData, systemData{
   115  			id:        id,
   116  			digitSize: byte(sz),
   117  			zero:      x,
   118  		})
   119  	}
   120  	w.WriteVar("numSysData", numSysData)
   121  
   122  	algoID := system(len(numSysData))
   123  	fmt.Fprintln(w, "const (")
   124  	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
   125  		id, ok := systemMap[ns.Id]
   126  		if !ok {
   127  			id = algoID
   128  			systemMap[ns.Id] = id
   129  			algoID++
   130  		}
   131  		fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
   132  	}
   133  	fmt.Fprintln(w, "numNumberSystems")
   134  	fmt.Fprintln(w, ")")
   135  
   136  	fmt.Fprintln(w, "var systemMap = map[string]system{")
   137  	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
   138  		fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
   139  		w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
   140  	}
   141  	fmt.Fprintln(w, "}")
   142  }
   143  
   144  func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
   145  	d, err := cldr.ParseDraft(*draft)
   146  	if err != nil {
   147  		log.Fatalf("invalid draft level: %v", err)
   148  	}
   149  
   150  	nNumberSystems := system(len(systemMap))
   151  
   152  	type symbols [NumSymbolTypes]string
   153  
   154  	type key struct {
   155  		tag    compact.ID
   156  		system system
   157  	}
   158  	symbolMap := map[key]*symbols{}
   159  
   160  	defaults := map[compact.ID]system{}
   161  
   162  	for _, lang := range data.Locales() {
   163  		ldml := data.RawLDML(lang)
   164  		if ldml.Numbers == nil {
   165  			continue
   166  		}
   167  		langIndex, ok := compact.FromTag(language.MustParse(lang))
   168  		if !ok {
   169  			log.Fatalf("No compact index for language %s", lang)
   170  		}
   171  		if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
   172  			defaults[langIndex] = getNumberSystem(d[0].Data())
   173  		}
   174  
   175  		syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
   176  		syms.SelectDraft(d)
   177  
   178  		getFirst := func(name string, x interface{}) string {
   179  			v := reflect.ValueOf(x)
   180  			slice := cldr.MakeSlice(x)
   181  			slice.SelectAnyOf("alt", "", "alt")
   182  			if reflect.Indirect(v).Len() == 0 {
   183  				return ""
   184  			} else if reflect.Indirect(v).Len() > 1 {
   185  				log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
   186  			}
   187  			return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
   188  		}
   189  
   190  		for _, sym := range ldml.Numbers.Symbols {
   191  			if sym.NumberSystem == "" {
   192  				// This is just linking the default of root to "latn".
   193  				continue
   194  			}
   195  			symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
   196  				SymDecimal:                getFirst("decimal", &sym.Decimal),
   197  				SymGroup:                  getFirst("group", &sym.Group),
   198  				SymList:                   getFirst("list", &sym.List),
   199  				SymPercentSign:            getFirst("percentSign", &sym.PercentSign),
   200  				SymPlusSign:               getFirst("plusSign", &sym.PlusSign),
   201  				SymMinusSign:              getFirst("minusSign", &sym.MinusSign),
   202  				SymExponential:            getFirst("exponential", &sym.Exponential),
   203  				SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
   204  				SymPerMille:               getFirst("perMille", &sym.PerMille),
   205  				SymInfinity:               getFirst("infinity", &sym.Infinity),
   206  				SymNan:                    getFirst("nan", &sym.Nan),
   207  				SymTimeSeparator:          getFirst("timeSeparator", &sym.TimeSeparator),
   208  			}
   209  		}
   210  	}
   211  
   212  	// Expand all values.
   213  	for k, syms := range symbolMap {
   214  		for t := SymDecimal; t < NumSymbolTypes; t++ {
   215  			p := k.tag
   216  			for syms[t] == "" {
   217  				p = p.Parent()
   218  				if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
   219  					syms[t] = (*pSyms)[t]
   220  					break
   221  				}
   222  				if p == 0 /* und */ {
   223  					// Default to root, latn.
   224  					syms[t] = (*symbolMap[key{}])[t]
   225  				}
   226  			}
   227  		}
   228  	}
   229  
   230  	// Unique the symbol sets and write the string data.
   231  	m := map[symbols]int{}
   232  	sb := stringset.NewBuilder()
   233  
   234  	symIndex := [][NumSymbolTypes]byte{}
   235  
   236  	for ns := system(0); ns < nNumberSystems; ns++ {
   237  		for _, l := range data.Locales() {
   238  			langIndex, _ := compact.FromTag(language.MustParse(l))
   239  			s := symbolMap[key{langIndex, ns}]
   240  			if s == nil {
   241  				continue
   242  			}
   243  			if _, ok := m[*s]; !ok {
   244  				m[*s] = len(symIndex)
   245  				sb.Add(s[:]...)
   246  				var x [NumSymbolTypes]byte
   247  				for i := SymDecimal; i < NumSymbolTypes; i++ {
   248  					x[i] = byte(sb.Index((*s)[i]))
   249  				}
   250  				symIndex = append(symIndex, x)
   251  			}
   252  		}
   253  	}
   254  	w.WriteVar("symIndex", symIndex)
   255  	w.WriteVar("symData", sb.Set())
   256  
   257  	// resolveSymbolIndex gets the index from the closest matching locale,
   258  	// including the locale itself.
   259  	resolveSymbolIndex := func(langIndex compact.ID, ns system) symOffset {
   260  		for {
   261  			if sym := symbolMap[key{langIndex, ns}]; sym != nil {
   262  				return symOffset(m[*sym])
   263  			}
   264  			if langIndex == 0 {
   265  				return 0 // und, latn
   266  			}
   267  			langIndex = langIndex.Parent()
   268  		}
   269  	}
   270  
   271  	// Create an index with the symbols for each locale for the latn numbering
   272  	// system. If this is not the default, or the only one, for a locale, we
   273  	// will overwrite the value later.
   274  	var langToDefaults [compact.NumCompactTags]symOffset
   275  	for _, l := range data.Locales() {
   276  		langIndex, _ := compact.FromTag(language.MustParse(l))
   277  		langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
   278  	}
   279  
   280  	// Delete redundant entries.
   281  	for _, l := range data.Locales() {
   282  		langIndex, _ := compact.FromTag(language.MustParse(l))
   283  		def := defaults[langIndex]
   284  		syms := symbolMap[key{langIndex, def}]
   285  		if syms == nil {
   286  			continue
   287  		}
   288  		for ns := system(0); ns < nNumberSystems; ns++ {
   289  			if ns == def {
   290  				continue
   291  			}
   292  			if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
   293  				delete(symbolMap, key{langIndex, ns})
   294  			}
   295  		}
   296  	}
   297  
   298  	// Create a sorted list of alternatives per language. This will only need to
   299  	// be referenced if a user specified an alternative numbering system.
   300  	var langToAlt []altSymData
   301  	for _, l := range data.Locales() {
   302  		langIndex, _ := compact.FromTag(language.MustParse(l))
   303  		start := len(langToAlt)
   304  		if start >= hasNonLatnMask {
   305  			log.Fatalf("Number of alternative assignments >= %x", hasNonLatnMask)
   306  		}
   307  		// Create the entry for the default value.
   308  		def := defaults[langIndex]
   309  		langToAlt = append(langToAlt, altSymData{
   310  			compactTag: langIndex,
   311  			system:     def,
   312  			symIndex:   resolveSymbolIndex(langIndex, def),
   313  		})
   314  
   315  		for ns := system(0); ns < nNumberSystems; ns++ {
   316  			if def == ns {
   317  				continue
   318  			}
   319  			if sym := symbolMap[key{langIndex, ns}]; sym != nil {
   320  				langToAlt = append(langToAlt, altSymData{
   321  					compactTag: langIndex,
   322  					system:     ns,
   323  					symIndex:   resolveSymbolIndex(langIndex, ns),
   324  				})
   325  			}
   326  		}
   327  		if def == 0 && len(langToAlt) == start+1 {
   328  			// No additional data: erase the entry.
   329  			langToAlt = langToAlt[:start]
   330  		} else {
   331  			// Overwrite the entry in langToDefaults.
   332  			langToDefaults[langIndex] = hasNonLatnMask | symOffset(start)
   333  		}
   334  	}
   335  	w.WriteComment(`
   336  langToDefaults maps a compact language index to the default numbering system
   337  and default symbol set`)
   338  	w.WriteVar("langToDefaults", langToDefaults)
   339  
   340  	w.WriteComment(`
   341  langToAlt is a list of numbering system and symbol set pairs, sorted and
   342  marked by compact language index.`)
   343  	w.WriteVar("langToAlt", langToAlt)
   344  }
   345  
   346  // genFormats generates the lookup table for decimal, scientific and percent
   347  // patterns.
   348  //
   349  // CLDR allows for patterns to be different per language for different numbering
   350  // systems. In practice the patterns are set to be consistent for a language
   351  // independent of the numbering system. genFormats verifies that no language
   352  // deviates from this.
   353  func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
   354  	d, err := cldr.ParseDraft(*draft)
   355  	if err != nil {
   356  		log.Fatalf("invalid draft level: %v", err)
   357  	}
   358  
   359  	// Fill the first slot with a dummy so we can identify unspecified tags.
   360  	formats := []number.Pattern{{}}
   361  	patterns := map[string]int{}
   362  
   363  	// TODO: It would be possible to eliminate two of these slices by having
   364  	// another indirection and store a reference to the combination of patterns.
   365  	decimal := make([]byte, compact.NumCompactTags)
   366  	scientific := make([]byte, compact.NumCompactTags)
   367  	percent := make([]byte, compact.NumCompactTags)
   368  
   369  	for _, lang := range data.Locales() {
   370  		ldml := data.RawLDML(lang)
   371  		if ldml.Numbers == nil {
   372  			continue
   373  		}
   374  		langIndex, ok := compact.FromTag(language.MustParse(lang))
   375  		if !ok {
   376  			log.Fatalf("No compact index for language %s", lang)
   377  		}
   378  		type patternSlice []*struct {
   379  			cldr.Common
   380  			Numbers string `xml:"numbers,attr"`
   381  			Count   string `xml:"count,attr"`
   382  		}
   383  
   384  		add := func(name string, tags []byte, ps patternSlice) {
   385  			sl := cldr.MakeSlice(&ps)
   386  			sl.SelectDraft(d)
   387  			if len(ps) == 0 {
   388  				return
   389  			}
   390  			if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
   391  				log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
   392  			}
   393  			s := ps[0].Data()
   394  
   395  			index, ok := patterns[s]
   396  			if !ok {
   397  				nf, err := number.ParsePattern(s)
   398  				if err != nil {
   399  					log.Fatal(err)
   400  				}
   401  				index = len(formats)
   402  				patterns[s] = index
   403  				formats = append(formats, *nf)
   404  			}
   405  			tags[langIndex] = byte(index)
   406  		}
   407  
   408  		for _, df := range ldml.Numbers.DecimalFormats {
   409  			for _, l := range df.DecimalFormatLength {
   410  				if l.Type != "" {
   411  					continue
   412  				}
   413  				for _, f := range l.DecimalFormat {
   414  					add("decimal", decimal, f.Pattern)
   415  				}
   416  			}
   417  		}
   418  		for _, df := range ldml.Numbers.ScientificFormats {
   419  			for _, l := range df.ScientificFormatLength {
   420  				if l.Type != "" {
   421  					continue
   422  				}
   423  				for _, f := range l.ScientificFormat {
   424  					add("scientific", scientific, f.Pattern)
   425  				}
   426  			}
   427  		}
   428  		for _, df := range ldml.Numbers.PercentFormats {
   429  			for _, l := range df.PercentFormatLength {
   430  				if l.Type != "" {
   431  					continue
   432  				}
   433  				for _, f := range l.PercentFormat {
   434  					add("percent", percent, f.Pattern)
   435  				}
   436  			}
   437  		}
   438  	}
   439  
   440  	// Complete the parent tag array to reflect inheritance. An index of 0
   441  	// indicates an unspecified value.
   442  	for _, data := range [][]byte{decimal, scientific, percent} {
   443  		for i := range data {
   444  			p := compact.ID(i)
   445  			for ; data[p] == 0; p = p.Parent() {
   446  			}
   447  			data[i] = data[p]
   448  		}
   449  	}
   450  	w.WriteVar("tagToDecimal", decimal)
   451  	w.WriteVar("tagToScientific", scientific)
   452  	w.WriteVar("tagToPercent", percent)
   453  
   454  	value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
   455  	// Break up the lines. This won't give ideal perfect formatting, but it is
   456  	// better than one huge line.
   457  	value = strings.Replace(value, ", ", ",\n", -1)
   458  	fmt.Fprintf(w, "var formats = %s\n", value)
   459  }