github.com/go-xe2/third@v1.0.3/golang.org/x/text/encoding/ianaindex/gen.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  package main
     8  
     9  import (
    10  	"encoding/xml"
    11  	"fmt"
    12  	"io"
    13  	"log"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  
    18  	"github.com/go-xe2/third/golang.org/x/text/encoding/internal/identifier"
    19  	"github.com/go-xe2/third/golang.org/x/text/internal/gen"
    20  )
    21  
    22  type registry struct {
    23  	XMLName  xml.Name `xml:"registry"`
    24  	Updated  string   `xml:"updated"`
    25  	Registry []struct {
    26  		ID     string `xml:"id,attr"`
    27  		Record []struct {
    28  			Name string `xml:"name"`
    29  			Xref []struct {
    30  				Type string `xml:"type,attr"`
    31  				Data string `xml:"data,attr"`
    32  			} `xml:"xref"`
    33  			Desc struct {
    34  				Data string `xml:",innerxml"`
    35  			} `xml:"description,"`
    36  			MIB   string   `xml:"value"`
    37  			Alias []string `xml:"alias"`
    38  			MIME  string   `xml:"preferred_alias"`
    39  		} `xml:"record"`
    40  	} `xml:"registry"`
    41  }
    42  
    43  func main() {
    44  	r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
    45  	reg := &registry{}
    46  	if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
    47  		log.Fatalf("Error decoding charset registry: %v", err)
    48  	}
    49  	if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
    50  		log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
    51  	}
    52  
    53  	x := &indexInfo{}
    54  
    55  	for _, rec := range reg.Registry[0].Record {
    56  		mib := identifier.MIB(parseInt(rec.MIB))
    57  		x.addEntry(mib, rec.Name)
    58  		for _, a := range rec.Alias {
    59  			a = strings.Split(a, " ")[0] // strip comments.
    60  			x.addAlias(a, mib)
    61  			// MIB name aliases are prefixed with a "cs" (character set) in the
    62  			// registry to identify them as display names and to ensure that
    63  			// the name starts with a lowercase letter in case it is used as
    64  			// an identifier. We remove it to be left with a nice clean name.
    65  			if strings.HasPrefix(a, "cs") {
    66  				x.setName(2, a[2:])
    67  			}
    68  		}
    69  		if rec.MIME != "" {
    70  			x.addAlias(rec.MIME, mib)
    71  			x.setName(1, rec.MIME)
    72  		}
    73  	}
    74  
    75  	w := gen.NewCodeWriter()
    76  
    77  	fmt.Fprintln(w, `import "github.com/go-xe2/third/golang.org/x/text/encoding/internal/identifier"`)
    78  
    79  	writeIndex(w, x)
    80  
    81  	w.WriteGoFile("tables.go", "ianaindex")
    82  }
    83  
    84  type alias struct {
    85  	name string
    86  	mib  identifier.MIB
    87  }
    88  
    89  type indexInfo struct {
    90  	// compacted index from code to MIB
    91  	codeToMIB []identifier.MIB
    92  	alias     []alias
    93  	names     [][3]string
    94  }
    95  
    96  func (ii *indexInfo) Len() int {
    97  	return len(ii.codeToMIB)
    98  }
    99  
   100  func (ii *indexInfo) Less(a, b int) bool {
   101  	return ii.codeToMIB[a] < ii.codeToMIB[b]
   102  }
   103  
   104  func (ii *indexInfo) Swap(a, b int) {
   105  	ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a]
   106  	// Co-sort the names.
   107  	ii.names[a], ii.names[b] = ii.names[b], ii.names[a]
   108  }
   109  
   110  func (ii *indexInfo) setName(i int, name string) {
   111  	ii.names[len(ii.names)-1][i] = name
   112  }
   113  
   114  func (ii *indexInfo) addEntry(mib identifier.MIB, name string) {
   115  	ii.names = append(ii.names, [3]string{name, name, name})
   116  	ii.addAlias(name, mib)
   117  	ii.codeToMIB = append(ii.codeToMIB, mib)
   118  }
   119  
   120  func (ii *indexInfo) addAlias(name string, mib identifier.MIB) {
   121  	// Don't add duplicates for the same mib. Adding duplicate aliases for
   122  	// different MIBs will cause the compiler to barf on an invalid map: great!.
   123  	for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- {
   124  		if ii.alias[i].name == name {
   125  			return
   126  		}
   127  	}
   128  	ii.alias = append(ii.alias, alias{name, mib})
   129  	lower := strings.ToLower(name)
   130  	if lower != name {
   131  		ii.addAlias(lower, mib)
   132  	}
   133  }
   134  
   135  const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
   136  
   137  func writeIndex(w *gen.CodeWriter, x *indexInfo) {
   138  	sort.Stable(x)
   139  
   140  	// Write constants.
   141  	fmt.Fprintln(w, "const (")
   142  	for i, m := range x.codeToMIB {
   143  		if i == 0 {
   144  			fmt.Fprintf(w, "enc%d = iota\n", m)
   145  		} else {
   146  			fmt.Fprintf(w, "enc%d\n", m)
   147  		}
   148  	}
   149  	fmt.Fprintln(w, "numIANA")
   150  	fmt.Fprintln(w, ")")
   151  
   152  	w.WriteVar("ianaToMIB", x.codeToMIB)
   153  
   154  	var ianaNames, mibNames []string
   155  	for _, names := range x.names {
   156  		n := names[0]
   157  		if names[0] != names[1] {
   158  			// MIME names are mostly identical to IANA names. We share the
   159  			// tables by setting the first byte of the string to an index into
   160  			// the string itself (< maxMIMENameLen) to the IANA name. The MIME
   161  			// name immediately follows the index.
   162  			x := len(names[1]) + 1
   163  			if x > maxMIMENameLen {
   164  				log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen)
   165  			}
   166  			n = string(x) + names[1] + names[0]
   167  		}
   168  		ianaNames = append(ianaNames, n)
   169  		mibNames = append(mibNames, names[2])
   170  	}
   171  
   172  	w.WriteVar("ianaNames", ianaNames)
   173  	w.WriteVar("mibNames", mibNames)
   174  
   175  	w.WriteComment(`
   176  	TODO: Instead of using a map, we could use binary search strings doing
   177  	on-the fly lower-casing per character. This allows to always avoid
   178  	allocation and will be considerably more compact.`)
   179  	fmt.Fprintln(w, "var ianaAliases = map[string]int{")
   180  	for _, a := range x.alias {
   181  		fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib)
   182  	}
   183  	fmt.Fprintln(w, "}")
   184  }
   185  
   186  func parseInt(s string) int {
   187  	x, err := strconv.ParseInt(s, 10, 64)
   188  	if err != nil {
   189  		log.Fatalf("Could not parse integer: %v", err)
   190  	}
   191  	return int(x)
   192  }