github.com/go-xe2/third@v1.0.3/golang.org/x/text/unicode/cldr/makexml.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 // This tool generates types for the various XML formats of CLDR. 8 package main 9 10 import ( 11 "archive/zip" 12 "bytes" 13 "encoding/xml" 14 "flag" 15 "fmt" 16 "io" 17 "io/ioutil" 18 "log" 19 "os" 20 "regexp" 21 "strings" 22 23 "github.com/go-xe2/third/golang.org/x/text/internal/gen" 24 ) 25 26 var outputFile = flag.String("output", "xml.go", "output file name") 27 28 func main() { 29 flag.Parse() 30 31 r := gen.OpenCLDRCoreZip() 32 buffer, err := ioutil.ReadAll(r) 33 if err != nil { 34 log.Fatal("Could not read zip file") 35 } 36 r.Close() 37 z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) 38 if err != nil { 39 log.Fatalf("Could not read zip archive: %v", err) 40 } 41 42 var buf bytes.Buffer 43 44 version := gen.CLDRVersion() 45 46 for _, dtd := range files { 47 for _, f := range z.File { 48 if strings.HasSuffix(f.Name, dtd.file+".dtd") { 49 r, err := f.Open() 50 failOnError(err) 51 52 b := makeBuilder(&buf, dtd) 53 b.parseDTD(r) 54 b.resolve(b.index[dtd.top[0]]) 55 b.write() 56 if b.version != "" && version != b.version { 57 println(f.Name) 58 log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version) 59 } 60 break 61 } 62 } 63 } 64 fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.") 65 fmt.Fprintf(&buf, "const Version = %q\n", version) 66 67 gen.WriteGoFile(*outputFile, "cldr", buf.Bytes()) 68 } 69 70 func failOnError(err error) { 71 if err != nil { 72 log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error()) 73 os.Exit(1) 74 } 75 } 76 77 // configuration data per DTD type 78 type dtd struct { 79 file string // base file name 80 root string // Go name of the root XML element 81 top []string // create a different type for this section 82 83 skipElem []string // hard-coded or deprecated elements 84 skipAttr []string // attributes to exclude 85 predefined []string // hard-coded elements exist of the form <name>Elem 86 forceRepeat []string // elements to make slices despite DTD 87 } 88 89 var files = []dtd{ 90 { 91 file: "ldmlBCP47", 92 root: "LDMLBCP47", 93 top: []string{"ldmlBCP47"}, 94 skipElem: []string{ 95 "cldrVersion", // deprecated, not used 96 }, 97 }, 98 { 99 file: "ldmlSupplemental", 100 root: "SupplementalData", 101 top: []string{"supplementalData"}, 102 skipElem: []string{ 103 "cldrVersion", // deprecated, not used 104 }, 105 forceRepeat: []string{ 106 "plurals", // data defined in plurals.xml and ordinals.xml 107 }, 108 }, 109 { 110 file: "ldml", 111 root: "LDML", 112 top: []string{ 113 "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers", 114 }, 115 skipElem: []string{ 116 "cp", // not used anywhere 117 "special", // not used anywhere 118 "fallback", // deprecated, not used 119 "alias", // in Common 120 "default", // in Common 121 }, 122 skipAttr: []string{ 123 "hiraganaQuarternary", // typo in DTD, correct version included as well 124 }, 125 predefined: []string{"rules"}, 126 }, 127 } 128 129 var comments = map[string]string{ 130 "ldmlBCP47": ` 131 // LDMLBCP47 holds information on allowable values for various variables in LDML. 132 `, 133 "supplementalData": ` 134 // SupplementalData holds information relevant for internationalization 135 // and proper use of CLDR, but that is not contained in the locale hierarchy. 136 `, 137 "ldml": ` 138 // LDML is the top-level type for locale-specific data. 139 `, 140 "collation": ` 141 // Collation contains rules that specify a certain sort-order, 142 // as a tailoring of the root order. 143 // The parsed rules are obtained by passing a RuleProcessor to Collation's 144 // Process method. 145 `, 146 "calendar": ` 147 // Calendar specifies the fields used for formatting and parsing dates and times. 148 // The month and quarter names are identified numerically, starting at 1. 149 // The day (of the week) names are identified with short strings, since there is 150 // no universally-accepted numeric designation. 151 `, 152 "dates": ` 153 // Dates contains information regarding the format and parsing of dates and times. 154 `, 155 "localeDisplayNames": ` 156 // LocaleDisplayNames specifies localized display names for for scripts, languages, 157 // countries, currencies, and variants. 158 `, 159 "numbers": ` 160 // Numbers supplies information for formatting and parsing numbers and currencies. 161 `, 162 } 163 164 type element struct { 165 name string // XML element name 166 category string // elements contained by this element 167 signature string // category + attrKey* 168 169 attr []*attribute // attributes supported by this element. 170 sub []struct { // parsed and evaluated sub elements of this element. 171 e *element 172 repeat bool // true if the element needs to be a slice 173 } 174 175 resolved bool // prevent multiple resolutions of this element. 176 } 177 178 type attribute struct { 179 name string 180 key string 181 list []string 182 183 tag string // Go tag 184 } 185 186 var ( 187 reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`) 188 reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`) 189 reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`) 190 reToken = regexp.MustCompile(`\w\-`) 191 ) 192 193 // builder is used to read in the DTD files from CLDR and generate Go code 194 // to be used with the encoding/xml package. 195 type builder struct { 196 w io.Writer 197 index map[string]*element 198 elem []*element 199 info dtd 200 version string 201 } 202 203 func makeBuilder(w io.Writer, d dtd) builder { 204 return builder{ 205 w: w, 206 index: make(map[string]*element), 207 elem: []*element{}, 208 info: d, 209 } 210 } 211 212 // parseDTD parses a DTD file. 213 func (b *builder) parseDTD(r io.Reader) { 214 for d := xml.NewDecoder(r); ; { 215 t, err := d.Token() 216 if t == nil { 217 break 218 } 219 failOnError(err) 220 dir, ok := t.(xml.Directive) 221 if !ok { 222 continue 223 } 224 m := reHead.FindSubmatch(dir) 225 dir = dir[len(m[0]):] 226 ename := string(m[2]) 227 el, elementFound := b.index[ename] 228 switch string(m[1]) { 229 case "ELEMENT": 230 if elementFound { 231 log.Fatal("parseDTD: duplicate entry for element %q", ename) 232 } 233 m := reElem.FindSubmatch(dir) 234 if m == nil { 235 log.Fatalf("parseDTD: invalid element %q", string(dir)) 236 } 237 if len(m[0]) != len(dir) { 238 log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0])) 239 } 240 s := string(m[1]) 241 el = &element{ 242 name: ename, 243 category: s, 244 } 245 b.index[ename] = el 246 case "ATTLIST": 247 if !elementFound { 248 log.Fatalf("parseDTD: unknown element %q", ename) 249 } 250 s := string(dir) 251 m := reAttr.FindStringSubmatch(s) 252 if m == nil { 253 log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir))) 254 } 255 if m[4] == "FIXED" { 256 b.version = m[5] 257 } else { 258 switch m[1] { 259 case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ : 260 case "type", "choice": 261 default: 262 el.attr = append(el.attr, &attribute{ 263 name: m[1], 264 key: s, 265 list: reToken.FindAllString(m[3], -1), 266 }) 267 el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2]) 268 } 269 } 270 } 271 } 272 } 273 274 var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`) 275 276 // resolve takes a parsed element and converts it into structured data 277 // that can be used to generate the XML code. 278 func (b *builder) resolve(e *element) { 279 if e.resolved { 280 return 281 } 282 b.elem = append(b.elem, e) 283 e.resolved = true 284 s := e.category 285 found := make(map[string]bool) 286 sequenceStart := []int{} 287 for len(s) > 0 { 288 m := reCat.FindStringSubmatch(s) 289 if m == nil { 290 log.Fatalf("%s: invalid category string %q", e.name, s) 291 } 292 repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1]) 293 switch m[1] { 294 case "": 295 case "(": 296 sequenceStart = append(sequenceStart, len(e.sub)) 297 case ")": 298 if len(sequenceStart) == 0 { 299 log.Fatalf("%s: unmatched closing parenthesis", e.name) 300 } 301 for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ { 302 e.sub[i].repeat = e.sub[i].repeat || repeat 303 } 304 sequenceStart = sequenceStart[:len(sequenceStart)-1] 305 default: 306 if in(b.info.skipElem, m[1]) { 307 } else if sub, ok := b.index[m[1]]; ok { 308 if !found[sub.name] { 309 e.sub = append(e.sub, struct { 310 e *element 311 repeat bool 312 }{sub, repeat}) 313 found[sub.name] = true 314 b.resolve(sub) 315 } 316 } else if m[1] == "#PCDATA" || m[1] == "ANY" { 317 } else if m[1] != "EMPTY" { 318 log.Fatalf("resolve:%s: element %q not found", e.name, m[1]) 319 } 320 } 321 s = s[len(m[0]):] 322 } 323 } 324 325 // return true if s is contained in set. 326 func in(set []string, s string) bool { 327 for _, v := range set { 328 if v == s { 329 return true 330 } 331 } 332 return false 333 } 334 335 var repl = strings.NewReplacer("-", " ", "_", " ") 336 337 // title puts the first character or each character following '_' in title case and 338 // removes all occurrences of '_'. 339 func title(s string) string { 340 return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1) 341 } 342 343 // writeElem generates Go code for a single element, recursively. 344 func (b *builder) writeElem(tab int, e *element) { 345 p := func(f string, x ...interface{}) { 346 f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1) 347 fmt.Fprintf(b.w, f, x...) 348 } 349 if len(e.sub) == 0 && len(e.attr) == 0 { 350 p("Common") 351 return 352 } 353 p("struct {") 354 tab++ 355 p("\nCommon") 356 for _, attr := range e.attr { 357 if !in(b.info.skipAttr, attr.name) { 358 p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name) 359 } 360 } 361 for _, sub := range e.sub { 362 if in(b.info.predefined, sub.e.name) { 363 p("\n%sElem", sub.e.name) 364 continue 365 } 366 if in(b.info.skipElem, sub.e.name) { 367 continue 368 } 369 p("\n%s ", title(sub.e.name)) 370 if sub.repeat { 371 p("[]") 372 } 373 p("*") 374 if in(b.info.top, sub.e.name) { 375 p(title(sub.e.name)) 376 } else { 377 b.writeElem(tab, sub.e) 378 } 379 p(" `xml:\"%s\"`", sub.e.name) 380 } 381 tab-- 382 p("\n}") 383 } 384 385 // write generates the Go XML code. 386 func (b *builder) write() { 387 for i, name := range b.info.top { 388 e := b.index[name] 389 if e != nil { 390 fmt.Fprintf(b.w, comments[name]) 391 name := title(e.name) 392 if i == 0 { 393 name = b.info.root 394 } 395 fmt.Fprintf(b.w, "type %s ", name) 396 b.writeElem(0, e) 397 fmt.Fprint(b.w, "\n") 398 } 399 } 400 }