github.com/go-xe2/third@v1.0.3/golang.org/x/text/internal/number/gen.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 package main 8 9 import ( 10 "flag" 11 "fmt" 12 "log" 13 "reflect" 14 "strings" 15 "unicode/utf8" 16 17 "github.com/go-xe2/third/golang.org/x/text/internal" 18 "github.com/go-xe2/third/golang.org/x/text/internal/gen" 19 "github.com/go-xe2/third/golang.org/x/text/internal/number" 20 "github.com/go-xe2/third/golang.org/x/text/internal/stringset" 21 "github.com/go-xe2/third/golang.org/x/text/language" 22 "github.com/go-xe2/third/golang.org/x/text/unicode/cldr" 23 ) 24 25 var ( 26 test = flag.Bool("test", false, 27 "test existing tables; can be used to compare web data with package data.") 28 outputFile = flag.String("output", "tables.go", "output file") 29 outputTestFile = flag.String("testoutput", "data_test.go", "output file") 30 31 draft = flag.String("draft", 32 "contributed", 33 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) 34 ) 35 36 func main() { 37 gen.Init() 38 39 const pkg = "number" 40 41 gen.Repackage("gen_common.go", "common.go", pkg) 42 // Read the CLDR zip file. 43 r := gen.OpenCLDRCoreZip() 44 defer r.Close() 45 46 d := &cldr.Decoder{} 47 d.SetDirFilter("supplemental", "main") 48 d.SetSectionFilter("numbers", "numberingSystem") 49 data, err := d.DecodeZip(r) 50 if err != nil { 51 log.Fatalf("DecodeZip: %v", err) 52 } 53 54 w := gen.NewCodeWriter() 55 defer w.WriteGoFile(*outputFile, pkg) 56 57 fmt.Fprintln(w, `import "github.com/go-xe2/third/golang.org/x/text/internal/stringset"`) 58 59 gen.WriteCLDRVersion(w) 60 61 genNumSystem(w, data) 62 genSymbols(w, data) 63 genFormats(w, data) 64 } 65 66 var systemMap = map[string]system{"latn": 0} 67 68 func getNumberSystem(str string) system { 69 ns, ok := systemMap[str] 70 if !ok { 71 log.Fatalf("No index for numbering system %q", str) 72 } 73 return ns 74 } 75 76 func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) { 77 numSysData := []systemData{ 78 {digitSize: 1, zero: [4]byte{'0'}}, 79 } 80 81 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { 82 if len(ns.Digits) == 0 { 83 continue 84 } 85 switch ns.Id { 86 case "latn": 87 // hard-wired 88 continue 89 case "hanidec": 90 // non-consecutive digits: treat as "algorithmic" 91 continue 92 } 93 94 zero, sz := utf8.DecodeRuneInString(ns.Digits) 95 if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte 96 log.Fatalf("Last byte of zero value overflows for %s", ns.Id) 97 } 98 99 i := rune(0) 100 for _, r := range ns.Digits { 101 // Verify that we can do simple math on the UTF-8 byte sequence 102 // of zero to get the digit. 103 if zero+i != r { 104 // Runes not consecutive. 105 log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r) 106 } 107 i++ 108 } 109 var x [utf8.UTFMax]byte 110 utf8.EncodeRune(x[:], zero) 111 id := system(len(numSysData)) 112 systemMap[ns.Id] = id 113 numSysData = append(numSysData, systemData{ 114 id: id, 115 digitSize: byte(sz), 116 zero: x, 117 }) 118 } 119 w.WriteVar("numSysData", numSysData) 120 121 algoID := system(len(numSysData)) 122 fmt.Fprintln(w, "const (") 123 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { 124 id, ok := systemMap[ns.Id] 125 if !ok { 126 id = algoID 127 systemMap[ns.Id] = id 128 algoID++ 129 } 130 fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id) 131 } 132 fmt.Fprintln(w, "numNumberSystems") 133 fmt.Fprintln(w, ")") 134 135 fmt.Fprintln(w, "var systemMap = map[string]system{") 136 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { 137 fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id)) 138 w.Size += len(ns.Id) + 16 + 1 // very coarse approximation 139 } 140 fmt.Fprintln(w, "}") 141 } 142 143 func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) { 144 d, err := cldr.ParseDraft(*draft) 145 if err != nil { 146 log.Fatalf("invalid draft level: %v", err) 147 } 148 149 nNumberSystems := system(len(systemMap)) 150 151 type symbols [NumSymbolTypes]string 152 153 type key struct { 154 tag int // from language.CompactIndex 155 system system 156 } 157 symbolMap := map[key]*symbols{} 158 159 defaults := map[int]system{} 160 161 for _, lang := range data.Locales() { 162 ldml := data.RawLDML(lang) 163 if ldml.Numbers == nil { 164 continue 165 } 166 langIndex, ok := language.CompactIndex(language.MustParse(lang)) 167 if !ok { 168 log.Fatalf("No compact index for language %s", lang) 169 } 170 if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 { 171 defaults[langIndex] = getNumberSystem(d[0].Data()) 172 } 173 174 syms := cldr.MakeSlice(&ldml.Numbers.Symbols) 175 syms.SelectDraft(d) 176 177 getFirst := func(name string, x interface{}) string { 178 v := reflect.ValueOf(x) 179 slice := cldr.MakeSlice(x) 180 slice.SelectAnyOf("alt", "", "alt") 181 if reflect.Indirect(v).Len() == 0 { 182 return "" 183 } else if reflect.Indirect(v).Len() > 1 { 184 log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name) 185 } 186 return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String() 187 } 188 189 for _, sym := range ldml.Numbers.Symbols { 190 if sym.NumberSystem == "" { 191 // This is just linking the default of root to "latn". 192 continue 193 } 194 symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{ 195 SymDecimal: getFirst("decimal", &sym.Decimal), 196 SymGroup: getFirst("group", &sym.Group), 197 SymList: getFirst("list", &sym.List), 198 SymPercentSign: getFirst("percentSign", &sym.PercentSign), 199 SymPlusSign: getFirst("plusSign", &sym.PlusSign), 200 SymMinusSign: getFirst("minusSign", &sym.MinusSign), 201 SymExponential: getFirst("exponential", &sym.Exponential), 202 SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent), 203 SymPerMille: getFirst("perMille", &sym.PerMille), 204 SymInfinity: getFirst("infinity", &sym.Infinity), 205 SymNan: getFirst("nan", &sym.Nan), 206 SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator), 207 } 208 } 209 } 210 211 // Expand all values. 212 for k, syms := range symbolMap { 213 for t := SymDecimal; t < NumSymbolTypes; t++ { 214 p := k.tag 215 for syms[t] == "" { 216 p = int(internal.Parent[p]) 217 if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" { 218 syms[t] = (*pSyms)[t] 219 break 220 } 221 if p == 0 /* und */ { 222 // Default to root, latn. 223 syms[t] = (*symbolMap[key{}])[t] 224 } 225 } 226 } 227 } 228 229 // Unique the symbol sets and write the string data. 230 m := map[symbols]int{} 231 sb := stringset.NewBuilder() 232 233 symIndex := [][NumSymbolTypes]byte{} 234 235 for ns := system(0); ns < nNumberSystems; ns++ { 236 for _, l := range data.Locales() { 237 langIndex, _ := language.CompactIndex(language.MustParse(l)) 238 s := symbolMap[key{langIndex, ns}] 239 if s == nil { 240 continue 241 } 242 if _, ok := m[*s]; !ok { 243 m[*s] = len(symIndex) 244 sb.Add(s[:]...) 245 var x [NumSymbolTypes]byte 246 for i := SymDecimal; i < NumSymbolTypes; i++ { 247 x[i] = byte(sb.Index((*s)[i])) 248 } 249 symIndex = append(symIndex, x) 250 } 251 } 252 } 253 w.WriteVar("symIndex", symIndex) 254 w.WriteVar("symData", sb.Set()) 255 256 // resolveSymbolIndex gets the index from the closest matching locale, 257 // including the locale itself. 258 resolveSymbolIndex := func(langIndex int, ns system) byte { 259 for { 260 if sym := symbolMap[key{langIndex, ns}]; sym != nil { 261 return byte(m[*sym]) 262 } 263 if langIndex == 0 { 264 return 0 // und, latn 265 } 266 langIndex = int(internal.Parent[langIndex]) 267 } 268 } 269 270 // Create an index with the symbols for each locale for the latn numbering 271 // system. If this is not the default, or the only one, for a locale, we 272 // will overwrite the value later. 273 var langToDefaults [language.NumCompactTags]byte 274 for _, l := range data.Locales() { 275 langIndex, _ := language.CompactIndex(language.MustParse(l)) 276 langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0) 277 } 278 279 // Delete redundant entries. 280 for _, l := range data.Locales() { 281 langIndex, _ := language.CompactIndex(language.MustParse(l)) 282 def := defaults[langIndex] 283 syms := symbolMap[key{langIndex, def}] 284 if syms == nil { 285 continue 286 } 287 for ns := system(0); ns < nNumberSystems; ns++ { 288 if ns == def { 289 continue 290 } 291 if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms { 292 delete(symbolMap, key{langIndex, ns}) 293 } 294 } 295 } 296 297 // Create a sorted list of alternatives per language. This will only need to 298 // be referenced if a user specified an alternative numbering system. 299 var langToAlt []altSymData 300 for _, l := range data.Locales() { 301 langIndex, _ := language.CompactIndex(language.MustParse(l)) 302 start := len(langToAlt) 303 if start > 0x7F { 304 log.Fatal("Number of alternative assignments > 0x7F") 305 } 306 // Create the entry for the default value. 307 def := defaults[langIndex] 308 langToAlt = append(langToAlt, altSymData{ 309 compactTag: uint16(langIndex), 310 system: def, 311 symIndex: resolveSymbolIndex(langIndex, def), 312 }) 313 314 for ns := system(0); ns < nNumberSystems; ns++ { 315 if def == ns { 316 continue 317 } 318 if sym := symbolMap[key{langIndex, ns}]; sym != nil { 319 langToAlt = append(langToAlt, altSymData{ 320 compactTag: uint16(langIndex), 321 system: ns, 322 symIndex: resolveSymbolIndex(langIndex, ns), 323 }) 324 } 325 } 326 if def == 0 && len(langToAlt) == start+1 { 327 // No additional data: erase the entry. 328 langToAlt = langToAlt[:start] 329 } else { 330 // Overwrite the entry in langToDefaults. 331 langToDefaults[langIndex] = 0x80 | byte(start) 332 } 333 } 334 w.WriteComment(` 335 langToDefaults maps a compact language index to the default numbering system 336 and default symbol set`) 337 w.WriteVar("langToDefaults", langToDefaults) 338 339 w.WriteComment(` 340 langToAlt is a list of numbering system and symbol set pairs, sorted and 341 marked by compact language index.`) 342 w.WriteVar("langToAlt", langToAlt) 343 } 344 345 // genFormats generates the lookup table for decimal, scientific and percent 346 // patterns. 347 // 348 // CLDR allows for patterns to be different per language for different numbering 349 // systems. In practice the patterns are set to be consistent for a language 350 // independent of the numbering system. genFormats verifies that no language 351 // deviates from this. 352 func genFormats(w *gen.CodeWriter, data *cldr.CLDR) { 353 d, err := cldr.ParseDraft(*draft) 354 if err != nil { 355 log.Fatalf("invalid draft level: %v", err) 356 } 357 358 // Fill the first slot with a dummy so we can identify unspecified tags. 359 formats := []number.Pattern{{}} 360 patterns := map[string]int{} 361 362 // TODO: It would be possible to eliminate two of these slices by having 363 // another indirection and store a reference to the combination of patterns. 364 decimal := make([]byte, language.NumCompactTags) 365 scientific := make([]byte, language.NumCompactTags) 366 percent := make([]byte, language.NumCompactTags) 367 368 for _, lang := range data.Locales() { 369 ldml := data.RawLDML(lang) 370 if ldml.Numbers == nil { 371 continue 372 } 373 langIndex, ok := language.CompactIndex(language.MustParse(lang)) 374 if !ok { 375 log.Fatalf("No compact index for language %s", lang) 376 } 377 type patternSlice []*struct { 378 cldr.Common 379 Numbers string `xml:"numbers,attr"` 380 Count string `xml:"count,attr"` 381 } 382 383 add := func(name string, tags []byte, ps patternSlice) { 384 sl := cldr.MakeSlice(&ps) 385 sl.SelectDraft(d) 386 if len(ps) == 0 { 387 return 388 } 389 if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] { 390 log.Fatalf("Inconsistent %d patterns for language %s", name, lang) 391 } 392 s := ps[0].Data() 393 394 index, ok := patterns[s] 395 if !ok { 396 nf, err := number.ParsePattern(s) 397 if err != nil { 398 log.Fatal(err) 399 } 400 index = len(formats) 401 patterns[s] = index 402 formats = append(formats, *nf) 403 } 404 tags[langIndex] = byte(index) 405 } 406 407 for _, df := range ldml.Numbers.DecimalFormats { 408 for _, l := range df.DecimalFormatLength { 409 if l.Type != "" { 410 continue 411 } 412 for _, f := range l.DecimalFormat { 413 add("decimal", decimal, f.Pattern) 414 } 415 } 416 } 417 for _, df := range ldml.Numbers.ScientificFormats { 418 for _, l := range df.ScientificFormatLength { 419 if l.Type != "" { 420 continue 421 } 422 for _, f := range l.ScientificFormat { 423 add("scientific", scientific, f.Pattern) 424 } 425 } 426 } 427 for _, df := range ldml.Numbers.PercentFormats { 428 for _, l := range df.PercentFormatLength { 429 if l.Type != "" { 430 continue 431 } 432 for _, f := range l.PercentFormat { 433 add("percent", percent, f.Pattern) 434 } 435 } 436 } 437 } 438 439 // Complete the parent tag array to reflect inheritance. An index of 0 440 // indicates an unspecified value. 441 for _, data := range [][]byte{decimal, scientific, percent} { 442 for i := range data { 443 p := uint16(i) 444 for ; data[p] == 0; p = internal.Parent[p] { 445 } 446 data[i] = data[p] 447 } 448 } 449 w.WriteVar("tagToDecimal", decimal) 450 w.WriteVar("tagToScientific", scientific) 451 w.WriteVar("tagToPercent", percent) 452 453 value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1) 454 // Break up the lines. This won't give ideal perfect formatting, but it is 455 // better than one huge line. 456 value = strings.Replace(value, ", ", ",\n", -1) 457 fmt.Fprintf(w, "var formats = %s\n", value) 458 }