github.com/go-enjin/golang-org-x-text@v0.12.1-enjin.2/internal/number/gen.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ignore 6 // +build ignore 7 8 package main 9 10 import ( 11 "flag" 12 "fmt" 13 "log" 14 "reflect" 15 "strings" 16 "unicode/utf8" 17 18 "github.com/go-enjin/golang-org-x-text/internal/gen" 19 "github.com/go-enjin/golang-org-x-text/internal/language" 20 "github.com/go-enjin/golang-org-x-text/internal/language/compact" 21 "github.com/go-enjin/golang-org-x-text/internal/number" 22 "github.com/go-enjin/golang-org-x-text/internal/stringset" 23 "github.com/go-enjin/golang-org-x-text/unicode/cldr" 24 ) 25 26 var ( 27 test = flag.Bool("test", false, 28 "test existing tables; can be used to compare web data with package data.") 29 outputFile = flag.String("output", "tables.go", "output file") 30 outputTestFile = flag.String("testoutput", "data_test.go", "output file") 31 32 draft = flag.String("draft", 33 "contributed", 34 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) 35 ) 36 37 func main() { 38 gen.Init() 39 40 const pkg = "number" 41 42 gen.Repackage("gen_common.go", "common.go", pkg) 43 // Read the CLDR zip file. 44 r := gen.OpenCLDRCoreZip() 45 defer r.Close() 46 47 d := &cldr.Decoder{} 48 d.SetDirFilter("supplemental", "main") 49 d.SetSectionFilter("numbers", "numberingSystem") 50 data, err := d.DecodeZip(r) 51 if err != nil { 52 log.Fatalf("DecodeZip: %v", err) 53 } 54 55 w := gen.NewCodeWriter() 56 defer w.WriteGoFile(*outputFile, pkg) 57 58 fmt.Fprintln(w, `import "github.com/go-enjin/golang-org-x-text/internal/stringset"`) 59 60 gen.WriteCLDRVersion(w) 61 62 genNumSystem(w, data) 63 genSymbols(w, data) 64 genFormats(w, data) 65 } 66 67 var systemMap = map[string]system{"latn": 0} 68 69 func getNumberSystem(str string) system { 70 ns, ok := systemMap[str] 71 if !ok { 72 log.Fatalf("No index for numbering system %q", str) 73 } 74 return ns 75 } 76 77 func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) { 78 numSysData := []systemData{ 79 {digitSize: 1, zero: [4]byte{'0'}}, 80 } 81 82 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { 83 if len(ns.Digits) == 0 { 84 continue 85 } 86 switch ns.Id { 87 case "latn": 88 // hard-wired 89 continue 90 case "hanidec": 91 // non-consecutive digits: treat as "algorithmic" 92 continue 93 } 94 95 zero, sz := utf8.DecodeRuneInString(ns.Digits) 96 if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte 97 log.Fatalf("Last byte of zero value overflows for %s", ns.Id) 98 } 99 100 i := rune(0) 101 for _, r := range ns.Digits { 102 // Verify that we can do simple math on the UTF-8 byte sequence 103 // of zero to get the digit. 104 if zero+i != r { 105 // Runes not consecutive. 106 log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r) 107 } 108 i++ 109 } 110 var x [utf8.UTFMax]byte 111 utf8.EncodeRune(x[:], zero) 112 id := system(len(numSysData)) 113 systemMap[ns.Id] = id 114 numSysData = append(numSysData, systemData{ 115 id: id, 116 digitSize: byte(sz), 117 zero: x, 118 }) 119 } 120 w.WriteVar("numSysData", numSysData) 121 122 algoID := system(len(numSysData)) 123 fmt.Fprintln(w, "const (") 124 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { 125 id, ok := systemMap[ns.Id] 126 if !ok { 127 id = algoID 128 systemMap[ns.Id] = id 129 algoID++ 130 } 131 fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id) 132 } 133 fmt.Fprintln(w, "numNumberSystems") 134 fmt.Fprintln(w, ")") 135 136 fmt.Fprintln(w, "var systemMap = map[string]system{") 137 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { 138 fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id)) 139 w.Size += len(ns.Id) + 16 + 1 // very coarse approximation 140 } 141 fmt.Fprintln(w, "}") 142 } 143 144 func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) { 145 d, err := cldr.ParseDraft(*draft) 146 if err != nil { 147 log.Fatalf("invalid draft level: %v", err) 148 } 149 150 nNumberSystems := system(len(systemMap)) 151 152 type symbols [NumSymbolTypes]string 153 154 type key struct { 155 tag compact.ID 156 system system 157 } 158 symbolMap := map[key]*symbols{} 159 160 defaults := map[compact.ID]system{} 161 162 for _, lang := range data.Locales() { 163 ldml := data.RawLDML(lang) 164 if ldml.Numbers == nil { 165 continue 166 } 167 langIndex, ok := compact.FromTag(language.MustParse(lang)) 168 if !ok { 169 log.Fatalf("No compact index for language %s", lang) 170 } 171 if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 { 172 defaults[langIndex] = getNumberSystem(d[0].Data()) 173 } 174 175 syms := cldr.MakeSlice(&ldml.Numbers.Symbols) 176 syms.SelectDraft(d) 177 178 getFirst := func(name string, x interface{}) string { 179 v := reflect.ValueOf(x) 180 slice := cldr.MakeSlice(x) 181 slice.SelectAnyOf("alt", "", "alt") 182 if reflect.Indirect(v).Len() == 0 { 183 return "" 184 } else if reflect.Indirect(v).Len() > 1 { 185 log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name) 186 } 187 return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String() 188 } 189 190 for _, sym := range ldml.Numbers.Symbols { 191 if sym.NumberSystem == "" { 192 // This is just linking the default of root to "latn". 193 continue 194 } 195 symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{ 196 SymDecimal: getFirst("decimal", &sym.Decimal), 197 SymGroup: getFirst("group", &sym.Group), 198 SymList: getFirst("list", &sym.List), 199 SymPercentSign: getFirst("percentSign", &sym.PercentSign), 200 SymPlusSign: getFirst("plusSign", &sym.PlusSign), 201 SymMinusSign: getFirst("minusSign", &sym.MinusSign), 202 SymExponential: getFirst("exponential", &sym.Exponential), 203 SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent), 204 SymPerMille: getFirst("perMille", &sym.PerMille), 205 SymInfinity: getFirst("infinity", &sym.Infinity), 206 SymNan: getFirst("nan", &sym.Nan), 207 SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator), 208 } 209 } 210 } 211 212 // Expand all values. 213 for k, syms := range symbolMap { 214 for t := SymDecimal; t < NumSymbolTypes; t++ { 215 p := k.tag 216 for syms[t] == "" { 217 p = p.Parent() 218 if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" { 219 syms[t] = (*pSyms)[t] 220 break 221 } 222 if p == 0 /* und */ { 223 // Default to root, latn. 224 syms[t] = (*symbolMap[key{}])[t] 225 } 226 } 227 } 228 } 229 230 // Unique the symbol sets and write the string data. 231 m := map[symbols]int{} 232 sb := stringset.NewBuilder() 233 234 symIndex := [][NumSymbolTypes]byte{} 235 236 for ns := system(0); ns < nNumberSystems; ns++ { 237 for _, l := range data.Locales() { 238 langIndex, _ := compact.FromTag(language.MustParse(l)) 239 s := symbolMap[key{langIndex, ns}] 240 if s == nil { 241 continue 242 } 243 if _, ok := m[*s]; !ok { 244 m[*s] = len(symIndex) 245 sb.Add(s[:]...) 246 var x [NumSymbolTypes]byte 247 for i := SymDecimal; i < NumSymbolTypes; i++ { 248 x[i] = byte(sb.Index((*s)[i])) 249 } 250 symIndex = append(symIndex, x) 251 } 252 } 253 } 254 w.WriteVar("symIndex", symIndex) 255 w.WriteVar("symData", sb.Set()) 256 257 // resolveSymbolIndex gets the index from the closest matching locale, 258 // including the locale itself. 259 resolveSymbolIndex := func(langIndex compact.ID, ns system) symOffset { 260 for { 261 if sym := symbolMap[key{langIndex, ns}]; sym != nil { 262 return symOffset(m[*sym]) 263 } 264 if langIndex == 0 { 265 return 0 // und, latn 266 } 267 langIndex = langIndex.Parent() 268 } 269 } 270 271 // Create an index with the symbols for each locale for the latn numbering 272 // system. If this is not the default, or the only one, for a locale, we 273 // will overwrite the value later. 274 var langToDefaults [compact.NumCompactTags]symOffset 275 for _, l := range data.Locales() { 276 langIndex, _ := compact.FromTag(language.MustParse(l)) 277 langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0) 278 } 279 280 // Delete redundant entries. 281 for _, l := range data.Locales() { 282 langIndex, _ := compact.FromTag(language.MustParse(l)) 283 def := defaults[langIndex] 284 syms := symbolMap[key{langIndex, def}] 285 if syms == nil { 286 continue 287 } 288 for ns := system(0); ns < nNumberSystems; ns++ { 289 if ns == def { 290 continue 291 } 292 if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms { 293 delete(symbolMap, key{langIndex, ns}) 294 } 295 } 296 } 297 298 // Create a sorted list of alternatives per language. This will only need to 299 // be referenced if a user specified an alternative numbering system. 300 var langToAlt []altSymData 301 for _, l := range data.Locales() { 302 langIndex, _ := compact.FromTag(language.MustParse(l)) 303 start := len(langToAlt) 304 if start >= hasNonLatnMask { 305 log.Fatalf("Number of alternative assignments >= %x", hasNonLatnMask) 306 } 307 // Create the entry for the default value. 308 def := defaults[langIndex] 309 langToAlt = append(langToAlt, altSymData{ 310 compactTag: langIndex, 311 system: def, 312 symIndex: resolveSymbolIndex(langIndex, def), 313 }) 314 315 for ns := system(0); ns < nNumberSystems; ns++ { 316 if def == ns { 317 continue 318 } 319 if sym := symbolMap[key{langIndex, ns}]; sym != nil { 320 langToAlt = append(langToAlt, altSymData{ 321 compactTag: langIndex, 322 system: ns, 323 symIndex: resolveSymbolIndex(langIndex, ns), 324 }) 325 } 326 } 327 if def == 0 && len(langToAlt) == start+1 { 328 // No additional data: erase the entry. 329 langToAlt = langToAlt[:start] 330 } else { 331 // Overwrite the entry in langToDefaults. 332 langToDefaults[langIndex] = hasNonLatnMask | symOffset(start) 333 } 334 } 335 w.WriteComment(` 336 langToDefaults maps a compact language index to the default numbering system 337 and default symbol set`) 338 w.WriteVar("langToDefaults", langToDefaults) 339 340 w.WriteComment(` 341 langToAlt is a list of numbering system and symbol set pairs, sorted and 342 marked by compact language index.`) 343 w.WriteVar("langToAlt", langToAlt) 344 } 345 346 // genFormats generates the lookup table for decimal, scientific and percent 347 // patterns. 348 // 349 // CLDR allows for patterns to be different per language for different numbering 350 // systems. In practice the patterns are set to be consistent for a language 351 // independent of the numbering system. genFormats verifies that no language 352 // deviates from this. 353 func genFormats(w *gen.CodeWriter, data *cldr.CLDR) { 354 d, err := cldr.ParseDraft(*draft) 355 if err != nil { 356 log.Fatalf("invalid draft level: %v", err) 357 } 358 359 // Fill the first slot with a dummy so we can identify unspecified tags. 360 formats := []number.Pattern{{}} 361 patterns := map[string]int{} 362 363 // TODO: It would be possible to eliminate two of these slices by having 364 // another indirection and store a reference to the combination of patterns. 365 decimal := make([]byte, compact.NumCompactTags) 366 scientific := make([]byte, compact.NumCompactTags) 367 percent := make([]byte, compact.NumCompactTags) 368 369 for _, lang := range data.Locales() { 370 ldml := data.RawLDML(lang) 371 if ldml.Numbers == nil { 372 continue 373 } 374 langIndex, ok := compact.FromTag(language.MustParse(lang)) 375 if !ok { 376 log.Fatalf("No compact index for language %s", lang) 377 } 378 type patternSlice []*struct { 379 cldr.Common 380 Numbers string `xml:"numbers,attr"` 381 Count string `xml:"count,attr"` 382 } 383 384 add := func(name string, tags []byte, ps patternSlice) { 385 sl := cldr.MakeSlice(&ps) 386 sl.SelectDraft(d) 387 if len(ps) == 0 { 388 return 389 } 390 if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] { 391 log.Fatalf("Inconsistent %d patterns for language %s", name, lang) 392 } 393 s := ps[0].Data() 394 395 index, ok := patterns[s] 396 if !ok { 397 nf, err := number.ParsePattern(s) 398 if err != nil { 399 log.Fatal(err) 400 } 401 index = len(formats) 402 patterns[s] = index 403 formats = append(formats, *nf) 404 } 405 tags[langIndex] = byte(index) 406 } 407 408 for _, df := range ldml.Numbers.DecimalFormats { 409 for _, l := range df.DecimalFormatLength { 410 if l.Type != "" { 411 continue 412 } 413 for _, f := range l.DecimalFormat { 414 add("decimal", decimal, f.Pattern) 415 } 416 } 417 } 418 for _, df := range ldml.Numbers.ScientificFormats { 419 for _, l := range df.ScientificFormatLength { 420 if l.Type != "" { 421 continue 422 } 423 for _, f := range l.ScientificFormat { 424 add("scientific", scientific, f.Pattern) 425 } 426 } 427 } 428 for _, df := range ldml.Numbers.PercentFormats { 429 for _, l := range df.PercentFormatLength { 430 if l.Type != "" { 431 continue 432 } 433 for _, f := range l.PercentFormat { 434 add("percent", percent, f.Pattern) 435 } 436 } 437 } 438 } 439 440 // Complete the parent tag array to reflect inheritance. An index of 0 441 // indicates an unspecified value. 442 for _, data := range [][]byte{decimal, scientific, percent} { 443 for i := range data { 444 p := compact.ID(i) 445 for ; data[p] == 0; p = p.Parent() { 446 } 447 data[i] = data[p] 448 } 449 } 450 w.WriteVar("tagToDecimal", decimal) 451 w.WriteVar("tagToScientific", scientific) 452 w.WriteVar("tagToPercent", percent) 453 454 value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1) 455 // Break up the lines. This won't give ideal perfect formatting, but it is 456 // better than one huge line. 457 value = strings.Replace(value, ", ", ",\n", -1) 458 fmt.Fprintf(w, "var formats = %s\n", value) 459 }