github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/cldr/makexml.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 // This tool generates types for the various XML formats of CLDR. 8 package main 9 10 import ( 11 "archive/zip" 12 "bytes" 13 "encoding/xml" 14 "flag" 15 "fmt" 16 "go/format" 17 "io" 18 "io/ioutil" 19 "log" 20 "net/http" 21 "os" 22 "path" 23 "path/filepath" 24 "regexp" 25 "strconv" 26 "strings" 27 ) 28 29 var ( 30 url = flag.String("url", 31 "http://www.unicode.org/Public/cldr/26/core.zip", 32 "Path to CLDR directory or zip archive.") 33 localDir = flag.String("local", 34 "", 35 "directory containing local data files; for debugging only.") 36 outputFile = flag.String("output", "xml.go", "output file name") 37 ) 38 39 func main() { 40 flag.Parse() 41 42 z := openArchive(url) 43 44 var buf bytes.Buffer 45 46 fmt.Fprintf(&buf, header, *url) 47 48 var version uint64 49 50 for _, dtd := range files { 51 for _, f := range z.File { 52 if strings.HasSuffix(f.Name, dtd.file+".dtd") { 53 r, err := f.Open() 54 failOnError(err) 55 56 b := makeBuilder(&buf, dtd) 57 b.parseDTD(r) 58 b.resolve(b.index[dtd.top[0]]) 59 b.write() 60 if version == 0 { 61 version = b.version 62 } else if b.version != 0 && version != b.version { 63 log.Fatalf("main: inconsistent versions: found %d; want %d", b.version, version) 64 } 65 break 66 } 67 } 68 } 69 fmt.Fprintf(&buf, "\nconst Version = \"%d\"\n", version) 70 71 data, err := format.Source(buf.Bytes()) 72 failOnError(err) 73 failOnError(ioutil.WriteFile(*outputFile, data, 0644)) 74 } 75 76 const header = `// Generated by running 77 // makexml --url=%s 78 // automatically with go generate. 79 // DO NOT EDIT 80 81 package cldr 82 ` 83 84 func failOnError(err error) { 85 if err != nil { 86 log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error()) 87 os.Exit(1) 88 } 89 } 90 91 // configuration data per DTD type 92 type dtd struct { 93 file string // base file name 94 root string // Go name of the root XML element 95 top []string // create a different type for this section 96 97 skipElem []string // hard-coded or deprecated elements 98 skipAttr []string // attributes to exclude 99 predefined []string // hard-coded elements exist of the form <name>Elem 100 forceRepeat []string // elements to make slices despite DTD 101 } 102 103 var files = []dtd{ 104 { 105 file: "ldmlBCP47", 106 root: "LDMLBCP47", 107 top: []string{"ldmlBCP47"}, 108 skipElem: []string{ 109 "cldrVersion", // deprecated, not used 110 }, 111 }, 112 { 113 file: "ldmlSupplemental", 114 root: "SupplementalData", 115 top: []string{"supplementalData"}, 116 skipElem: []string{ 117 "cldrVersion", // deprecated, not used 118 }, 119 forceRepeat: []string{ 120 "plurals", // data defined in plurals.xml and ordinals.xml 121 }, 122 }, 123 { 124 file: "ldml", 125 root: "LDML", 126 top: []string{ 127 "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers", 128 }, 129 skipElem: []string{ 130 "cp", // not used anywhere 131 "special", // not used anywhere 132 "fallback", // deprecated, not used 133 "alias", // in Common 134 "default", // in Common 135 }, 136 skipAttr: []string{ 137 "hiraganaQuarternary", // typo in DTD, correct version included as well 138 }, 139 predefined: []string{"rules"}, 140 }, 141 } 142 143 var comments = map[string]string{ 144 "ldmlBCP47": ` 145 // LDMLBCP47 holds information on allowable values for various variables in LDML. 146 `, 147 "supplementalData": ` 148 // SupplementalData holds information relevant for internationalization 149 // and proper use of CLDR, but that is not contained in the locale hierarchy. 150 `, 151 "ldml": ` 152 // LDML is the top-level type for locale-specific data. 153 `, 154 "collation": ` 155 // Collation contains rules that specify a certain sort-order, 156 // as a tailoring of the root order. 157 // The parsed rules are obtained by passing a RuleProcessor to Collation's 158 // Process method. 159 `, 160 "calendar": ` 161 // Calendar specifies the fields used for formatting and parsing dates and times. 162 // The month and quarter names are identified numerically, starting at 1. 163 // The day (of the week) names are identified with short strings, since there is 164 // no universally-accepted numeric designation. 165 `, 166 "dates": ` 167 // Dates contains information regarding the format and parsing of dates and times. 168 `, 169 "localeDisplayNames": ` 170 // LocaleDisplayNames specifies localized display names for for scripts, languages, 171 // countries, currencies, and variants. 172 `, 173 "numbers": ` 174 // Numbers supplies information for formatting and parsing numbers and currencies. 175 `, 176 } 177 178 type element struct { 179 name string // XML element name 180 category string // elements contained by this element 181 signature string // category + attrKey* 182 183 attr []*attribute // attributes supported by this element. 184 sub []struct { // parsed and evaluated sub elements of this element. 185 e *element 186 repeat bool // true if the element needs to be a slice 187 } 188 189 resolved bool // prevent multiple resolutions of this element. 190 } 191 192 type attribute struct { 193 name string 194 key string 195 list []string 196 197 tag string // Go tag 198 } 199 200 var ( 201 reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`) 202 reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"(\d+)[\.\d]*\")?)? *("[\w\-:]*")?`) 203 reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`) 204 reToken = regexp.MustCompile(`\w\-`) 205 ) 206 207 // builder is used to read in the DTD files from CLDR and generate Go code 208 // to be used with the encoding/xml package. 209 type builder struct { 210 w io.Writer 211 index map[string]*element 212 elem []*element 213 info dtd 214 version uint64 215 } 216 217 func makeBuilder(w io.Writer, d dtd) builder { 218 return builder{ 219 w: w, 220 index: make(map[string]*element), 221 elem: []*element{}, 222 info: d, 223 } 224 } 225 226 // parseDTD parses a DTD file. 227 func (b *builder) parseDTD(r io.Reader) { 228 for d := xml.NewDecoder(r); ; { 229 t, err := d.Token() 230 if t == nil { 231 break 232 } 233 failOnError(err) 234 dir, ok := t.(xml.Directive) 235 if !ok { 236 continue 237 } 238 m := reHead.FindSubmatch(dir) 239 dir = dir[len(m[0]):] 240 ename := string(m[2]) 241 el, elementFound := b.index[ename] 242 switch string(m[1]) { 243 case "ELEMENT": 244 if elementFound { 245 log.Fatal("parseDTD: duplicate entry for element %q", ename) 246 } 247 m := reElem.FindSubmatch(dir) 248 if m == nil { 249 log.Fatalf("parseDTD: invalid element %q", string(dir)) 250 } 251 if len(m[0]) != len(dir) { 252 log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0])) 253 } 254 s := string(m[1]) 255 el = &element{ 256 name: ename, 257 category: s, 258 } 259 b.index[ename] = el 260 case "ATTLIST": 261 if !elementFound { 262 log.Fatalf("parseDTD: unknown element %q", ename) 263 } 264 s := string(dir) 265 m := reAttr.FindStringSubmatch(s) 266 if m == nil { 267 log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir))) 268 } 269 if m[4] == "FIXED" { 270 b.version, err = strconv.ParseUint(m[5], 10, 16) 271 failOnError(err) 272 } else { 273 switch m[1] { 274 case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ : 275 case "type", "choice": 276 default: 277 el.attr = append(el.attr, &attribute{ 278 name: m[1], 279 key: s, 280 list: reToken.FindAllString(m[3], -1), 281 }) 282 el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2]) 283 } 284 } 285 } 286 } 287 } 288 289 var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`) 290 291 // resolve takes a parsed element and converts it into structured data 292 // that can be used to generate the XML code. 293 func (b *builder) resolve(e *element) { 294 if e.resolved { 295 return 296 } 297 b.elem = append(b.elem, e) 298 e.resolved = true 299 s := e.category 300 found := make(map[string]bool) 301 sequenceStart := []int{} 302 for len(s) > 0 { 303 m := reCat.FindStringSubmatch(s) 304 if m == nil { 305 log.Fatalf("%s: invalid category string %q", e.name, s) 306 } 307 repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1]) 308 switch m[1] { 309 case "": 310 case "(": 311 sequenceStart = append(sequenceStart, len(e.sub)) 312 case ")": 313 if len(sequenceStart) == 0 { 314 log.Fatalf("%s: unmatched closing parenthesis", e.name) 315 } 316 for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ { 317 e.sub[i].repeat = e.sub[i].repeat || repeat 318 } 319 sequenceStart = sequenceStart[:len(sequenceStart)-1] 320 default: 321 if in(b.info.skipElem, m[1]) { 322 } else if sub, ok := b.index[m[1]]; ok { 323 if !found[sub.name] { 324 e.sub = append(e.sub, struct { 325 e *element 326 repeat bool 327 }{sub, repeat}) 328 found[sub.name] = true 329 b.resolve(sub) 330 } 331 } else if m[1] == "#PCDATA" || m[1] == "ANY" { 332 } else if m[1] != "EMPTY" { 333 log.Fatalf("resolve:%s: element %q not found", e.name, m[1]) 334 } 335 } 336 s = s[len(m[0]):] 337 } 338 } 339 340 // return true if s is contained in set. 341 func in(set []string, s string) bool { 342 for _, v := range set { 343 if v == s { 344 return true 345 } 346 } 347 return false 348 } 349 350 var repl = strings.NewReplacer("-", " ", "_", " ") 351 352 // title puts the first character or each character following '_' in title case and 353 // removes all occurrences of '_'. 354 func title(s string) string { 355 return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1) 356 } 357 358 // writeElem generates Go code for a single element, recursively. 359 func (b *builder) writeElem(tab int, e *element) { 360 p := func(f string, x ...interface{}) { 361 f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1) 362 fmt.Fprintf(b.w, f, x...) 363 } 364 if len(e.sub) == 0 && len(e.attr) == 0 { 365 p("Common") 366 return 367 } 368 p("struct {") 369 tab++ 370 p("\nCommon") 371 for _, attr := range e.attr { 372 if !in(b.info.skipAttr, attr.name) { 373 p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name) 374 } 375 } 376 for _, sub := range e.sub { 377 if in(b.info.predefined, sub.e.name) { 378 p("\n%sElem", sub.e.name) 379 continue 380 } 381 if in(b.info.skipElem, sub.e.name) { 382 continue 383 } 384 p("\n%s ", title(sub.e.name)) 385 if sub.repeat { 386 p("[]") 387 } 388 p("*") 389 if in(b.info.top, sub.e.name) { 390 p(title(sub.e.name)) 391 } else { 392 b.writeElem(tab, sub.e) 393 } 394 p(" `xml:\"%s\"`", sub.e.name) 395 } 396 tab-- 397 p("\n}") 398 } 399 400 // write generates the Go XML code. 401 func (b *builder) write() { 402 for i, name := range b.info.top { 403 e := b.index[name] 404 if e != nil { 405 fmt.Fprintf(b.w, comments[name]) 406 name := title(e.name) 407 if i == 0 { 408 name = b.info.root 409 } 410 fmt.Fprintf(b.w, "type %s ", name) 411 b.writeElem(0, e) 412 fmt.Fprint(b.w, "\n") 413 } 414 } 415 } 416 417 // openArchive gets the file for the given url and opens it as a Zip archive. 418 func openArchive(url *string) *zip.Reader { 419 var r io.ReadCloser 420 if *localDir != "" { 421 dir, err := filepath.Abs(*localDir) 422 failOnError(err) 423 r, err = os.Open(filepath.Join(dir, path.Base(*url))) 424 failOnError(err) 425 } else { 426 resp, err := http.Get(*url) 427 if err != nil { 428 log.Fatalf("HTTP GET: %v", err) 429 } 430 if resp.StatusCode != 200 { 431 log.Fatalf(`bad GET status for "%s": %s`, *url, resp.Status) 432 } 433 r = resp.Body 434 } 435 buffer, err := ioutil.ReadAll(r) 436 r.Close() 437 failOnError(err) 438 archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) 439 failOnError(err) 440 return archive 441 }