github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/cldr/makexml.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  // This tool generates types for the various XML formats of CLDR.
     8  package main
     9  
    10  import (
    11  	"archive/zip"
    12  	"bytes"
    13  	"encoding/xml"
    14  	"flag"
    15  	"fmt"
    16  	"go/format"
    17  	"io"
    18  	"io/ioutil"
    19  	"log"
    20  	"net/http"
    21  	"os"
    22  	"path"
    23  	"path/filepath"
    24  	"regexp"
    25  	"strconv"
    26  	"strings"
    27  )
    28  
    29  var (
    30  	url = flag.String("url",
    31  		"http://www.unicode.org/Public/cldr/26/core.zip",
    32  		"Path to CLDR directory or zip archive.")
    33  	localDir = flag.String("local",
    34  		"",
    35  		"directory containing local data files; for debugging only.")
    36  	outputFile = flag.String("output", "xml.go", "output file name")
    37  )
    38  
    39  func main() {
    40  	flag.Parse()
    41  
    42  	z := openArchive(url)
    43  
    44  	var buf bytes.Buffer
    45  
    46  	fmt.Fprintf(&buf, header, *url)
    47  
    48  	var version uint64
    49  
    50  	for _, dtd := range files {
    51  		for _, f := range z.File {
    52  			if strings.HasSuffix(f.Name, dtd.file+".dtd") {
    53  				r, err := f.Open()
    54  				failOnError(err)
    55  
    56  				b := makeBuilder(&buf, dtd)
    57  				b.parseDTD(r)
    58  				b.resolve(b.index[dtd.top[0]])
    59  				b.write()
    60  				if version == 0 {
    61  					version = b.version
    62  				} else if b.version != 0 && version != b.version {
    63  					log.Fatalf("main: inconsistent versions: found %d; want %d", b.version, version)
    64  				}
    65  				break
    66  			}
    67  		}
    68  	}
    69  	fmt.Fprintf(&buf, "\nconst Version = \"%d\"\n", version)
    70  
    71  	data, err := format.Source(buf.Bytes())
    72  	failOnError(err)
    73  	failOnError(ioutil.WriteFile(*outputFile, data, 0644))
    74  }
    75  
    76  const header = `// Generated by running
    77  //       makexml --url=%s
    78  // automatically with go generate.
    79  // DO NOT EDIT
    80  
    81  package cldr
    82  `
    83  
    84  func failOnError(err error) {
    85  	if err != nil {
    86  		log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
    87  		os.Exit(1)
    88  	}
    89  }
    90  
    91  // configuration data per DTD type
    92  type dtd struct {
    93  	file string   // base file name
    94  	root string   // Go name of the root XML element
    95  	top  []string // create a different type for this section
    96  
    97  	skipElem    []string // hard-coded or deprecated elements
    98  	skipAttr    []string // attributes to exclude
    99  	predefined  []string // hard-coded elements exist of the form <name>Elem
   100  	forceRepeat []string // elements to make slices despite DTD
   101  }
   102  
   103  var files = []dtd{
   104  	{
   105  		file: "ldmlBCP47",
   106  		root: "LDMLBCP47",
   107  		top:  []string{"ldmlBCP47"},
   108  		skipElem: []string{
   109  			"cldrVersion", // deprecated, not used
   110  		},
   111  	},
   112  	{
   113  		file: "ldmlSupplemental",
   114  		root: "SupplementalData",
   115  		top:  []string{"supplementalData"},
   116  		skipElem: []string{
   117  			"cldrVersion", // deprecated, not used
   118  		},
   119  		forceRepeat: []string{
   120  			"plurals", // data defined in plurals.xml and ordinals.xml
   121  		},
   122  	},
   123  	{
   124  		file: "ldml",
   125  		root: "LDML",
   126  		top: []string{
   127  			"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
   128  		},
   129  		skipElem: []string{
   130  			"cp",       // not used anywhere
   131  			"special",  // not used anywhere
   132  			"fallback", // deprecated, not used
   133  			"alias",    // in Common
   134  			"default",  // in Common
   135  		},
   136  		skipAttr: []string{
   137  			"hiraganaQuarternary", // typo in DTD, correct version included as well
   138  		},
   139  		predefined: []string{"rules"},
   140  	},
   141  }
   142  
   143  var comments = map[string]string{
   144  	"ldmlBCP47": `
   145  // LDMLBCP47 holds information on allowable values for various variables in LDML.
   146  `,
   147  	"supplementalData": `
   148  // SupplementalData holds information relevant for internationalization
   149  // and proper use of CLDR, but that is not contained in the locale hierarchy.
   150  `,
   151  	"ldml": `
   152  // LDML is the top-level type for locale-specific data.
   153  `,
   154  	"collation": `
   155  // Collation contains rules that specify a certain sort-order,
   156  // as a tailoring of the root order. 
   157  // The parsed rules are obtained by passing a RuleProcessor to Collation's
   158  // Process method.
   159  `,
   160  	"calendar": `
   161  // Calendar specifies the fields used for formatting and parsing dates and times.
   162  // The month and quarter names are identified numerically, starting at 1.
   163  // The day (of the week) names are identified with short strings, since there is
   164  // no universally-accepted numeric designation.
   165  `,
   166  	"dates": `
   167  // Dates contains information regarding the format and parsing of dates and times.
   168  `,
   169  	"localeDisplayNames": `
   170  // LocaleDisplayNames specifies localized display names for for scripts, languages,
   171  // countries, currencies, and variants.
   172  `,
   173  	"numbers": `
   174  // Numbers supplies information for formatting and parsing numbers and currencies.
   175  `,
   176  }
   177  
   178  type element struct {
   179  	name      string // XML element name
   180  	category  string // elements contained by this element
   181  	signature string // category + attrKey*
   182  
   183  	attr []*attribute // attributes supported by this element.
   184  	sub  []struct {   // parsed and evaluated sub elements of this element.
   185  		e      *element
   186  		repeat bool // true if the element needs to be a slice
   187  	}
   188  
   189  	resolved bool // prevent multiple resolutions of this element.
   190  }
   191  
   192  type attribute struct {
   193  	name string
   194  	key  string
   195  	list []string
   196  
   197  	tag string // Go tag
   198  }
   199  
   200  var (
   201  	reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
   202  	reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"(\d+)[\.\d]*\")?)? *("[\w\-:]*")?`)
   203  	reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
   204  	reToken = regexp.MustCompile(`\w\-`)
   205  )
   206  
   207  // builder is used to read in the DTD files from CLDR and generate Go code
   208  // to be used with the encoding/xml package.
   209  type builder struct {
   210  	w       io.Writer
   211  	index   map[string]*element
   212  	elem    []*element
   213  	info    dtd
   214  	version uint64
   215  }
   216  
   217  func makeBuilder(w io.Writer, d dtd) builder {
   218  	return builder{
   219  		w:     w,
   220  		index: make(map[string]*element),
   221  		elem:  []*element{},
   222  		info:  d,
   223  	}
   224  }
   225  
   226  // parseDTD parses a DTD file.
   227  func (b *builder) parseDTD(r io.Reader) {
   228  	for d := xml.NewDecoder(r); ; {
   229  		t, err := d.Token()
   230  		if t == nil {
   231  			break
   232  		}
   233  		failOnError(err)
   234  		dir, ok := t.(xml.Directive)
   235  		if !ok {
   236  			continue
   237  		}
   238  		m := reHead.FindSubmatch(dir)
   239  		dir = dir[len(m[0]):]
   240  		ename := string(m[2])
   241  		el, elementFound := b.index[ename]
   242  		switch string(m[1]) {
   243  		case "ELEMENT":
   244  			if elementFound {
   245  				log.Fatal("parseDTD: duplicate entry for element %q", ename)
   246  			}
   247  			m := reElem.FindSubmatch(dir)
   248  			if m == nil {
   249  				log.Fatalf("parseDTD: invalid element %q", string(dir))
   250  			}
   251  			if len(m[0]) != len(dir) {
   252  				log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
   253  			}
   254  			s := string(m[1])
   255  			el = &element{
   256  				name:     ename,
   257  				category: s,
   258  			}
   259  			b.index[ename] = el
   260  		case "ATTLIST":
   261  			if !elementFound {
   262  				log.Fatalf("parseDTD: unknown element %q", ename)
   263  			}
   264  			s := string(dir)
   265  			m := reAttr.FindStringSubmatch(s)
   266  			if m == nil {
   267  				log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
   268  			}
   269  			if m[4] == "FIXED" {
   270  				b.version, err = strconv.ParseUint(m[5], 10, 16)
   271  				failOnError(err)
   272  			} else {
   273  				switch m[1] {
   274  				case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
   275  				case "type", "choice":
   276  				default:
   277  					el.attr = append(el.attr, &attribute{
   278  						name: m[1],
   279  						key:  s,
   280  						list: reToken.FindAllString(m[3], -1),
   281  					})
   282  					el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
   283  				}
   284  			}
   285  		}
   286  	}
   287  }
   288  
   289  var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
   290  
   291  // resolve takes a parsed element and converts it into structured data
   292  // that can be used to generate the XML code.
   293  func (b *builder) resolve(e *element) {
   294  	if e.resolved {
   295  		return
   296  	}
   297  	b.elem = append(b.elem, e)
   298  	e.resolved = true
   299  	s := e.category
   300  	found := make(map[string]bool)
   301  	sequenceStart := []int{}
   302  	for len(s) > 0 {
   303  		m := reCat.FindStringSubmatch(s)
   304  		if m == nil {
   305  			log.Fatalf("%s: invalid category string %q", e.name, s)
   306  		}
   307  		repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
   308  		switch m[1] {
   309  		case "":
   310  		case "(":
   311  			sequenceStart = append(sequenceStart, len(e.sub))
   312  		case ")":
   313  			if len(sequenceStart) == 0 {
   314  				log.Fatalf("%s: unmatched closing parenthesis", e.name)
   315  			}
   316  			for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
   317  				e.sub[i].repeat = e.sub[i].repeat || repeat
   318  			}
   319  			sequenceStart = sequenceStart[:len(sequenceStart)-1]
   320  		default:
   321  			if in(b.info.skipElem, m[1]) {
   322  			} else if sub, ok := b.index[m[1]]; ok {
   323  				if !found[sub.name] {
   324  					e.sub = append(e.sub, struct {
   325  						e      *element
   326  						repeat bool
   327  					}{sub, repeat})
   328  					found[sub.name] = true
   329  					b.resolve(sub)
   330  				}
   331  			} else if m[1] == "#PCDATA" || m[1] == "ANY" {
   332  			} else if m[1] != "EMPTY" {
   333  				log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
   334  			}
   335  		}
   336  		s = s[len(m[0]):]
   337  	}
   338  }
   339  
   340  // return true if s is contained in set.
   341  func in(set []string, s string) bool {
   342  	for _, v := range set {
   343  		if v == s {
   344  			return true
   345  		}
   346  	}
   347  	return false
   348  }
   349  
   350  var repl = strings.NewReplacer("-", " ", "_", " ")
   351  
   352  // title puts the first character or each character following '_' in title case and
   353  // removes all occurrences of '_'.
   354  func title(s string) string {
   355  	return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
   356  }
   357  
   358  // writeElem generates Go code for a single element, recursively.
   359  func (b *builder) writeElem(tab int, e *element) {
   360  	p := func(f string, x ...interface{}) {
   361  		f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
   362  		fmt.Fprintf(b.w, f, x...)
   363  	}
   364  	if len(e.sub) == 0 && len(e.attr) == 0 {
   365  		p("Common")
   366  		return
   367  	}
   368  	p("struct {")
   369  	tab++
   370  	p("\nCommon")
   371  	for _, attr := range e.attr {
   372  		if !in(b.info.skipAttr, attr.name) {
   373  			p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
   374  		}
   375  	}
   376  	for _, sub := range e.sub {
   377  		if in(b.info.predefined, sub.e.name) {
   378  			p("\n%sElem", sub.e.name)
   379  			continue
   380  		}
   381  		if in(b.info.skipElem, sub.e.name) {
   382  			continue
   383  		}
   384  		p("\n%s ", title(sub.e.name))
   385  		if sub.repeat {
   386  			p("[]")
   387  		}
   388  		p("*")
   389  		if in(b.info.top, sub.e.name) {
   390  			p(title(sub.e.name))
   391  		} else {
   392  			b.writeElem(tab, sub.e)
   393  		}
   394  		p(" `xml:\"%s\"`", sub.e.name)
   395  	}
   396  	tab--
   397  	p("\n}")
   398  }
   399  
   400  // write generates the Go XML code.
   401  func (b *builder) write() {
   402  	for i, name := range b.info.top {
   403  		e := b.index[name]
   404  		if e != nil {
   405  			fmt.Fprintf(b.w, comments[name])
   406  			name := title(e.name)
   407  			if i == 0 {
   408  				name = b.info.root
   409  			}
   410  			fmt.Fprintf(b.w, "type %s ", name)
   411  			b.writeElem(0, e)
   412  			fmt.Fprint(b.w, "\n")
   413  		}
   414  	}
   415  }
   416  
   417  // openArchive gets the file for the given url and opens it as a Zip archive.
   418  func openArchive(url *string) *zip.Reader {
   419  	var r io.ReadCloser
   420  	if *localDir != "" {
   421  		dir, err := filepath.Abs(*localDir)
   422  		failOnError(err)
   423  		r, err = os.Open(filepath.Join(dir, path.Base(*url)))
   424  		failOnError(err)
   425  	} else {
   426  		resp, err := http.Get(*url)
   427  		if err != nil {
   428  			log.Fatalf("HTTP GET: %v", err)
   429  		}
   430  		if resp.StatusCode != 200 {
   431  			log.Fatalf(`bad GET status for "%s": %s`, *url, resp.Status)
   432  		}
   433  		r = resp.Body
   434  	}
   435  	buffer, err := ioutil.ReadAll(r)
   436  	r.Close()
   437  	failOnError(err)
   438  	archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
   439  	failOnError(err)
   440  	return archive
   441  }