github.com/go-xe2/third@v1.0.3/golang.org/x/text/unicode/cldr/makexml.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  // This tool generates types for the various XML formats of CLDR.
     8  package main
     9  
    10  import (
    11  	"archive/zip"
    12  	"bytes"
    13  	"encoding/xml"
    14  	"flag"
    15  	"fmt"
    16  	"io"
    17  	"io/ioutil"
    18  	"log"
    19  	"os"
    20  	"regexp"
    21  	"strings"
    22  
    23  	"github.com/go-xe2/third/golang.org/x/text/internal/gen"
    24  )
    25  
    26  var outputFile = flag.String("output", "xml.go", "output file name")
    27  
    28  func main() {
    29  	flag.Parse()
    30  
    31  	r := gen.OpenCLDRCoreZip()
    32  	buffer, err := ioutil.ReadAll(r)
    33  	if err != nil {
    34  		log.Fatal("Could not read zip file")
    35  	}
    36  	r.Close()
    37  	z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
    38  	if err != nil {
    39  		log.Fatalf("Could not read zip archive: %v", err)
    40  	}
    41  
    42  	var buf bytes.Buffer
    43  
    44  	version := gen.CLDRVersion()
    45  
    46  	for _, dtd := range files {
    47  		for _, f := range z.File {
    48  			if strings.HasSuffix(f.Name, dtd.file+".dtd") {
    49  				r, err := f.Open()
    50  				failOnError(err)
    51  
    52  				b := makeBuilder(&buf, dtd)
    53  				b.parseDTD(r)
    54  				b.resolve(b.index[dtd.top[0]])
    55  				b.write()
    56  				if b.version != "" && version != b.version {
    57  					println(f.Name)
    58  					log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
    59  				}
    60  				break
    61  			}
    62  		}
    63  	}
    64  	fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
    65  	fmt.Fprintf(&buf, "const Version = %q\n", version)
    66  
    67  	gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
    68  }
    69  
    70  func failOnError(err error) {
    71  	if err != nil {
    72  		log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
    73  		os.Exit(1)
    74  	}
    75  }
    76  
    77  // configuration data per DTD type
    78  type dtd struct {
    79  	file string   // base file name
    80  	root string   // Go name of the root XML element
    81  	top  []string // create a different type for this section
    82  
    83  	skipElem    []string // hard-coded or deprecated elements
    84  	skipAttr    []string // attributes to exclude
    85  	predefined  []string // hard-coded elements exist of the form <name>Elem
    86  	forceRepeat []string // elements to make slices despite DTD
    87  }
    88  
    89  var files = []dtd{
    90  	{
    91  		file: "ldmlBCP47",
    92  		root: "LDMLBCP47",
    93  		top:  []string{"ldmlBCP47"},
    94  		skipElem: []string{
    95  			"cldrVersion", // deprecated, not used
    96  		},
    97  	},
    98  	{
    99  		file: "ldmlSupplemental",
   100  		root: "SupplementalData",
   101  		top:  []string{"supplementalData"},
   102  		skipElem: []string{
   103  			"cldrVersion", // deprecated, not used
   104  		},
   105  		forceRepeat: []string{
   106  			"plurals", // data defined in plurals.xml and ordinals.xml
   107  		},
   108  	},
   109  	{
   110  		file: "ldml",
   111  		root: "LDML",
   112  		top: []string{
   113  			"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
   114  		},
   115  		skipElem: []string{
   116  			"cp",       // not used anywhere
   117  			"special",  // not used anywhere
   118  			"fallback", // deprecated, not used
   119  			"alias",    // in Common
   120  			"default",  // in Common
   121  		},
   122  		skipAttr: []string{
   123  			"hiraganaQuarternary", // typo in DTD, correct version included as well
   124  		},
   125  		predefined: []string{"rules"},
   126  	},
   127  }
   128  
   129  var comments = map[string]string{
   130  	"ldmlBCP47": `
   131  // LDMLBCP47 holds information on allowable values for various variables in LDML.
   132  `,
   133  	"supplementalData": `
   134  // SupplementalData holds information relevant for internationalization
   135  // and proper use of CLDR, but that is not contained in the locale hierarchy.
   136  `,
   137  	"ldml": `
   138  // LDML is the top-level type for locale-specific data.
   139  `,
   140  	"collation": `
   141  // Collation contains rules that specify a certain sort-order,
   142  // as a tailoring of the root order. 
   143  // The parsed rules are obtained by passing a RuleProcessor to Collation's
   144  // Process method.
   145  `,
   146  	"calendar": `
   147  // Calendar specifies the fields used for formatting and parsing dates and times.
   148  // The month and quarter names are identified numerically, starting at 1.
   149  // The day (of the week) names are identified with short strings, since there is
   150  // no universally-accepted numeric designation.
   151  `,
   152  	"dates": `
   153  // Dates contains information regarding the format and parsing of dates and times.
   154  `,
   155  	"localeDisplayNames": `
   156  // LocaleDisplayNames specifies localized display names for for scripts, languages,
   157  // countries, currencies, and variants.
   158  `,
   159  	"numbers": `
   160  // Numbers supplies information for formatting and parsing numbers and currencies.
   161  `,
   162  }
   163  
   164  type element struct {
   165  	name      string // XML element name
   166  	category  string // elements contained by this element
   167  	signature string // category + attrKey*
   168  
   169  	attr []*attribute // attributes supported by this element.
   170  	sub  []struct {   // parsed and evaluated sub elements of this element.
   171  		e      *element
   172  		repeat bool // true if the element needs to be a slice
   173  	}
   174  
   175  	resolved bool // prevent multiple resolutions of this element.
   176  }
   177  
   178  type attribute struct {
   179  	name string
   180  	key  string
   181  	list []string
   182  
   183  	tag string // Go tag
   184  }
   185  
   186  var (
   187  	reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
   188  	reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
   189  	reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
   190  	reToken = regexp.MustCompile(`\w\-`)
   191  )
   192  
   193  // builder is used to read in the DTD files from CLDR and generate Go code
   194  // to be used with the encoding/xml package.
   195  type builder struct {
   196  	w       io.Writer
   197  	index   map[string]*element
   198  	elem    []*element
   199  	info    dtd
   200  	version string
   201  }
   202  
   203  func makeBuilder(w io.Writer, d dtd) builder {
   204  	return builder{
   205  		w:     w,
   206  		index: make(map[string]*element),
   207  		elem:  []*element{},
   208  		info:  d,
   209  	}
   210  }
   211  
   212  // parseDTD parses a DTD file.
   213  func (b *builder) parseDTD(r io.Reader) {
   214  	for d := xml.NewDecoder(r); ; {
   215  		t, err := d.Token()
   216  		if t == nil {
   217  			break
   218  		}
   219  		failOnError(err)
   220  		dir, ok := t.(xml.Directive)
   221  		if !ok {
   222  			continue
   223  		}
   224  		m := reHead.FindSubmatch(dir)
   225  		dir = dir[len(m[0]):]
   226  		ename := string(m[2])
   227  		el, elementFound := b.index[ename]
   228  		switch string(m[1]) {
   229  		case "ELEMENT":
   230  			if elementFound {
   231  				log.Fatal("parseDTD: duplicate entry for element %q", ename)
   232  			}
   233  			m := reElem.FindSubmatch(dir)
   234  			if m == nil {
   235  				log.Fatalf("parseDTD: invalid element %q", string(dir))
   236  			}
   237  			if len(m[0]) != len(dir) {
   238  				log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
   239  			}
   240  			s := string(m[1])
   241  			el = &element{
   242  				name:     ename,
   243  				category: s,
   244  			}
   245  			b.index[ename] = el
   246  		case "ATTLIST":
   247  			if !elementFound {
   248  				log.Fatalf("parseDTD: unknown element %q", ename)
   249  			}
   250  			s := string(dir)
   251  			m := reAttr.FindStringSubmatch(s)
   252  			if m == nil {
   253  				log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
   254  			}
   255  			if m[4] == "FIXED" {
   256  				b.version = m[5]
   257  			} else {
   258  				switch m[1] {
   259  				case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
   260  				case "type", "choice":
   261  				default:
   262  					el.attr = append(el.attr, &attribute{
   263  						name: m[1],
   264  						key:  s,
   265  						list: reToken.FindAllString(m[3], -1),
   266  					})
   267  					el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
   268  				}
   269  			}
   270  		}
   271  	}
   272  }
   273  
   274  var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
   275  
   276  // resolve takes a parsed element and converts it into structured data
   277  // that can be used to generate the XML code.
   278  func (b *builder) resolve(e *element) {
   279  	if e.resolved {
   280  		return
   281  	}
   282  	b.elem = append(b.elem, e)
   283  	e.resolved = true
   284  	s := e.category
   285  	found := make(map[string]bool)
   286  	sequenceStart := []int{}
   287  	for len(s) > 0 {
   288  		m := reCat.FindStringSubmatch(s)
   289  		if m == nil {
   290  			log.Fatalf("%s: invalid category string %q", e.name, s)
   291  		}
   292  		repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
   293  		switch m[1] {
   294  		case "":
   295  		case "(":
   296  			sequenceStart = append(sequenceStart, len(e.sub))
   297  		case ")":
   298  			if len(sequenceStart) == 0 {
   299  				log.Fatalf("%s: unmatched closing parenthesis", e.name)
   300  			}
   301  			for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
   302  				e.sub[i].repeat = e.sub[i].repeat || repeat
   303  			}
   304  			sequenceStart = sequenceStart[:len(sequenceStart)-1]
   305  		default:
   306  			if in(b.info.skipElem, m[1]) {
   307  			} else if sub, ok := b.index[m[1]]; ok {
   308  				if !found[sub.name] {
   309  					e.sub = append(e.sub, struct {
   310  						e      *element
   311  						repeat bool
   312  					}{sub, repeat})
   313  					found[sub.name] = true
   314  					b.resolve(sub)
   315  				}
   316  			} else if m[1] == "#PCDATA" || m[1] == "ANY" {
   317  			} else if m[1] != "EMPTY" {
   318  				log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
   319  			}
   320  		}
   321  		s = s[len(m[0]):]
   322  	}
   323  }
   324  
   325  // return true if s is contained in set.
   326  func in(set []string, s string) bool {
   327  	for _, v := range set {
   328  		if v == s {
   329  			return true
   330  		}
   331  	}
   332  	return false
   333  }
   334  
   335  var repl = strings.NewReplacer("-", " ", "_", " ")
   336  
   337  // title puts the first character or each character following '_' in title case and
   338  // removes all occurrences of '_'.
   339  func title(s string) string {
   340  	return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
   341  }
   342  
   343  // writeElem generates Go code for a single element, recursively.
   344  func (b *builder) writeElem(tab int, e *element) {
   345  	p := func(f string, x ...interface{}) {
   346  		f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
   347  		fmt.Fprintf(b.w, f, x...)
   348  	}
   349  	if len(e.sub) == 0 && len(e.attr) == 0 {
   350  		p("Common")
   351  		return
   352  	}
   353  	p("struct {")
   354  	tab++
   355  	p("\nCommon")
   356  	for _, attr := range e.attr {
   357  		if !in(b.info.skipAttr, attr.name) {
   358  			p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
   359  		}
   360  	}
   361  	for _, sub := range e.sub {
   362  		if in(b.info.predefined, sub.e.name) {
   363  			p("\n%sElem", sub.e.name)
   364  			continue
   365  		}
   366  		if in(b.info.skipElem, sub.e.name) {
   367  			continue
   368  		}
   369  		p("\n%s ", title(sub.e.name))
   370  		if sub.repeat {
   371  			p("[]")
   372  		}
   373  		p("*")
   374  		if in(b.info.top, sub.e.name) {
   375  			p(title(sub.e.name))
   376  		} else {
   377  			b.writeElem(tab, sub.e)
   378  		}
   379  		p(" `xml:\"%s\"`", sub.e.name)
   380  	}
   381  	tab--
   382  	p("\n}")
   383  }
   384  
   385  // write generates the Go XML code.
   386  func (b *builder) write() {
   387  	for i, name := range b.info.top {
   388  		e := b.index[name]
   389  		if e != nil {
   390  			fmt.Fprintf(b.w, comments[name])
   391  			name := title(e.name)
   392  			if i == 0 {
   393  				name = b.info.root
   394  			}
   395  			fmt.Fprintf(b.w, "type %s ", name)
   396  			b.writeElem(0, e)
   397  			fmt.Fprint(b.w, "\n")
   398  		}
   399  	}
   400  }