golang.org/x/text@v0.14.0/unicode/cldr/makexml.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ignore 6 7 // This tool generates types for the various XML formats of CLDR. 8 package main 9 10 import ( 11 "archive/zip" 12 "bytes" 13 "encoding/xml" 14 "flag" 15 "fmt" 16 "io" 17 "log" 18 "os" 19 "regexp" 20 "strings" 21 22 "golang.org/x/text/internal/gen" 23 ) 24 25 var outputFile = flag.String("output", "xml.go", "output file name") 26 27 func main() { 28 flag.Parse() 29 30 r := gen.OpenCLDRCoreZip() 31 buffer, err := io.ReadAll(r) 32 if err != nil { 33 log.Fatal("Could not read zip file") 34 } 35 r.Close() 36 z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) 37 if err != nil { 38 log.Fatalf("Could not read zip archive: %v", err) 39 } 40 41 var buf bytes.Buffer 42 43 version := gen.CLDRVersion() 44 45 for _, dtd := range files { 46 for _, f := range z.File { 47 if strings.HasSuffix(f.Name, dtd.file+".dtd") { 48 r, err := f.Open() 49 failOnError(err) 50 51 b := makeBuilder(&buf, dtd) 52 b.parseDTD(r) 53 b.resolve(b.index[dtd.top[0]]) 54 b.write() 55 if b.version != "" && version != b.version { 56 println(f.Name) 57 log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version) 58 } 59 break 60 } 61 } 62 } 63 fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.") 64 fmt.Fprintf(&buf, "const Version = %q\n", version) 65 66 gen.WriteGoFile(*outputFile, "cldr", buf.Bytes()) 67 } 68 69 func failOnError(err error) { 70 if err != nil { 71 log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error()) 72 os.Exit(1) 73 } 74 } 75 76 // configuration data per DTD type 77 type dtd struct { 78 file string // base file name 79 root string // Go name of the root XML element 80 top []string // create a different type for this section 81 82 skipElem []string // hard-coded or deprecated elements 83 skipAttr []string // attributes to exclude 84 predefined []string // hard-coded elements exist of the form <name>Elem 85 forceRepeat []string // elements to make slices despite DTD 86 } 87 88 var files = []dtd{ 89 { 90 file: "ldmlBCP47", 91 root: "LDMLBCP47", 92 top: []string{"ldmlBCP47"}, 93 skipElem: []string{ 94 "cldrVersion", // deprecated, not used 95 }, 96 }, 97 { 98 file: "ldmlSupplemental", 99 root: "SupplementalData", 100 top: []string{"supplementalData"}, 101 skipElem: []string{ 102 "cldrVersion", // deprecated, not used 103 }, 104 forceRepeat: []string{ 105 "plurals", // data defined in plurals.xml and ordinals.xml 106 }, 107 }, 108 { 109 file: "ldml", 110 root: "LDML", 111 top: []string{ 112 "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers", 113 }, 114 skipElem: []string{ 115 "cp", // not used anywhere 116 "special", // not used anywhere 117 "fallback", // deprecated, not used 118 "alias", // in Common 119 "default", // in Common 120 }, 121 skipAttr: []string{ 122 "hiraganaQuarternary", // typo in DTD, correct version included as well 123 }, 124 predefined: []string{"rules"}, 125 }, 126 } 127 128 var comments = map[string]string{ 129 "ldmlBCP47": ` 130 // LDMLBCP47 holds information on allowable values for various variables in LDML. 131 `, 132 "supplementalData": ` 133 // SupplementalData holds information relevant for internationalization 134 // and proper use of CLDR, but that is not contained in the locale hierarchy. 135 `, 136 "ldml": ` 137 // LDML is the top-level type for locale-specific data. 138 `, 139 "collation": ` 140 // Collation contains rules that specify a certain sort-order, 141 // as a tailoring of the root order. 142 // The parsed rules are obtained by passing a RuleProcessor to Collation's 143 // Process method. 144 `, 145 "calendar": ` 146 // Calendar specifies the fields used for formatting and parsing dates and times. 147 // The month and quarter names are identified numerically, starting at 1. 148 // The day (of the week) names are identified with short strings, since there is 149 // no universally-accepted numeric designation. 150 `, 151 "dates": ` 152 // Dates contains information regarding the format and parsing of dates and times. 153 `, 154 "localeDisplayNames": ` 155 // LocaleDisplayNames specifies localized display names for scripts, languages, 156 // countries, currencies, and variants. 157 `, 158 "numbers": ` 159 // Numbers supplies information for formatting and parsing numbers and currencies. 160 `, 161 } 162 163 type element struct { 164 name string // XML element name 165 category string // elements contained by this element 166 signature string // category + attrKey* 167 168 attr []*attribute // attributes supported by this element. 169 sub []struct { // parsed and evaluated sub elements of this element. 170 e *element 171 repeat bool // true if the element needs to be a slice 172 } 173 174 resolved bool // prevent multiple resolutions of this element. 175 } 176 177 type attribute struct { 178 name string 179 key string 180 list []string 181 182 tag string // Go tag 183 } 184 185 var ( 186 reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`) 187 reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`) 188 reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`) 189 reToken = regexp.MustCompile(`\w\-`) 190 ) 191 192 // builder is used to read in the DTD files from CLDR and generate Go code 193 // to be used with the encoding/xml package. 194 type builder struct { 195 w io.Writer 196 index map[string]*element 197 elem []*element 198 info dtd 199 version string 200 } 201 202 func makeBuilder(w io.Writer, d dtd) builder { 203 return builder{ 204 w: w, 205 index: make(map[string]*element), 206 elem: []*element{}, 207 info: d, 208 } 209 } 210 211 // parseDTD parses a DTD file. 212 func (b *builder) parseDTD(r io.Reader) { 213 for d := xml.NewDecoder(r); ; { 214 t, err := d.Token() 215 if t == nil { 216 break 217 } 218 failOnError(err) 219 dir, ok := t.(xml.Directive) 220 if !ok { 221 continue 222 } 223 m := reHead.FindSubmatch(dir) 224 dir = dir[len(m[0]):] 225 ename := string(m[2]) 226 el, elementFound := b.index[ename] 227 switch string(m[1]) { 228 case "ELEMENT": 229 if elementFound { 230 log.Fatal("parseDTD: duplicate entry for element %q", ename) 231 } 232 m := reElem.FindSubmatch(dir) 233 if m == nil { 234 log.Fatalf("parseDTD: invalid element %q", string(dir)) 235 } 236 if len(m[0]) != len(dir) { 237 log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0])) 238 } 239 s := string(m[1]) 240 el = &element{ 241 name: ename, 242 category: s, 243 } 244 b.index[ename] = el 245 case "ATTLIST": 246 if !elementFound { 247 log.Fatalf("parseDTD: unknown element %q", ename) 248 } 249 s := string(dir) 250 m := reAttr.FindStringSubmatch(s) 251 if m == nil { 252 log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir))) 253 } 254 if m[4] == "FIXED" { 255 b.version = m[5] 256 } else { 257 switch m[1] { 258 case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ : 259 case "type", "choice": 260 default: 261 el.attr = append(el.attr, &attribute{ 262 name: m[1], 263 key: s, 264 list: reToken.FindAllString(m[3], -1), 265 }) 266 el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2]) 267 } 268 } 269 } 270 } 271 } 272 273 var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`) 274 275 // resolve takes a parsed element and converts it into structured data 276 // that can be used to generate the XML code. 277 func (b *builder) resolve(e *element) { 278 if e.resolved { 279 return 280 } 281 b.elem = append(b.elem, e) 282 e.resolved = true 283 s := e.category 284 found := make(map[string]bool) 285 sequenceStart := []int{} 286 for len(s) > 0 { 287 m := reCat.FindStringSubmatch(s) 288 if m == nil { 289 log.Fatalf("%s: invalid category string %q", e.name, s) 290 } 291 repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1]) 292 switch m[1] { 293 case "": 294 case "(": 295 sequenceStart = append(sequenceStart, len(e.sub)) 296 case ")": 297 if len(sequenceStart) == 0 { 298 log.Fatalf("%s: unmatched closing parenthesis", e.name) 299 } 300 for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ { 301 e.sub[i].repeat = e.sub[i].repeat || repeat 302 } 303 sequenceStart = sequenceStart[:len(sequenceStart)-1] 304 default: 305 if in(b.info.skipElem, m[1]) { 306 } else if sub, ok := b.index[m[1]]; ok { 307 if !found[sub.name] { 308 e.sub = append(e.sub, struct { 309 e *element 310 repeat bool 311 }{sub, repeat}) 312 found[sub.name] = true 313 b.resolve(sub) 314 } 315 } else if m[1] == "#PCDATA" || m[1] == "ANY" { 316 } else if m[1] != "EMPTY" { 317 log.Fatalf("resolve:%s: element %q not found", e.name, m[1]) 318 } 319 } 320 s = s[len(m[0]):] 321 } 322 } 323 324 // return true if s is contained in set. 325 func in(set []string, s string) bool { 326 for _, v := range set { 327 if v == s { 328 return true 329 } 330 } 331 return false 332 } 333 334 var repl = strings.NewReplacer("-", " ", "_", " ") 335 336 // title puts the first character or each character following '_' in title case and 337 // removes all occurrences of '_'. 338 func title(s string) string { 339 return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1) 340 } 341 342 // writeElem generates Go code for a single element, recursively. 343 func (b *builder) writeElem(tab int, e *element) { 344 p := func(f string, x ...interface{}) { 345 f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1) 346 fmt.Fprintf(b.w, f, x...) 347 } 348 if len(e.sub) == 0 && len(e.attr) == 0 { 349 p("Common") 350 return 351 } 352 p("struct {") 353 tab++ 354 p("\nCommon") 355 for _, attr := range e.attr { 356 if !in(b.info.skipAttr, attr.name) { 357 p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name) 358 } 359 } 360 for _, sub := range e.sub { 361 if in(b.info.predefined, sub.e.name) { 362 p("\n%sElem", sub.e.name) 363 continue 364 } 365 if in(b.info.skipElem, sub.e.name) { 366 continue 367 } 368 p("\n%s ", title(sub.e.name)) 369 if sub.repeat { 370 p("[]") 371 } 372 p("*") 373 if in(b.info.top, sub.e.name) { 374 p(title(sub.e.name)) 375 } else { 376 b.writeElem(tab, sub.e) 377 } 378 p(" `xml:\"%s\"`", sub.e.name) 379 } 380 tab-- 381 p("\n}") 382 } 383 384 // write generates the Go XML code. 385 func (b *builder) write() { 386 for i, name := range b.info.top { 387 e := b.index[name] 388 if e != nil { 389 fmt.Fprintf(b.w, comments[name]) 390 name := title(e.name) 391 if i == 0 { 392 name = b.info.root 393 } 394 fmt.Fprintf(b.w, "type %s ", name) 395 b.writeElem(0, e) 396 fmt.Fprint(b.w, "\n") 397 } 398 } 399 }