github.com/go-enjin/golang-org-x-text@v0.12.1-enjin.2/encoding/ianaindex/gen.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  // +build ignore
     7  
     8  package main
     9  
    10  import (
    11  	"encoding/xml"
    12  	"fmt"
    13  	"io"
    14  	"log"
    15  	"sort"
    16  	"strconv"
    17  	"strings"
    18  
    19  	"github.com/go-enjin/golang-org-x-text/encoding/internal/identifier"
    20  	"github.com/go-enjin/golang-org-x-text/internal/gen"
    21  )
    22  
    23  type registry struct {
    24  	XMLName  xml.Name `xml:"registry"`
    25  	Updated  string   `xml:"updated"`
    26  	Registry []struct {
    27  		ID     string `xml:"id,attr"`
    28  		Record []struct {
    29  			Name string `xml:"name"`
    30  			Xref []struct {
    31  				Type string `xml:"type,attr"`
    32  				Data string `xml:"data,attr"`
    33  			} `xml:"xref"`
    34  			Desc struct {
    35  				Data string `xml:",innerxml"`
    36  			} `xml:"description,"`
    37  			MIB   string   `xml:"value"`
    38  			Alias []string `xml:"alias"`
    39  			MIME  string   `xml:"preferred_alias"`
    40  		} `xml:"record"`
    41  	} `xml:"registry"`
    42  }
    43  
    44  func main() {
    45  	r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
    46  	reg := &registry{}
    47  	if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
    48  		log.Fatalf("Error decoding charset registry: %v", err)
    49  	}
    50  	if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
    51  		log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
    52  	}
    53  
    54  	x := &indexInfo{}
    55  
    56  	for _, rec := range reg.Registry[0].Record {
    57  		mib := identifier.MIB(parseInt(rec.MIB))
    58  		x.addEntry(mib, rec.Name)
    59  		for _, a := range rec.Alias {
    60  			a = strings.Split(a, " ")[0] // strip comments.
    61  			x.addAlias(a, mib)
    62  			// MIB name aliases are prefixed with a "cs" (character set) in the
    63  			// registry to identify them as display names and to ensure that
    64  			// the name starts with a lowercase letter in case it is used as
    65  			// an identifier. We remove it to be left with a nice clean name.
    66  			if strings.HasPrefix(a, "cs") {
    67  				x.setName(2, a[2:])
    68  			}
    69  		}
    70  		if rec.MIME != "" {
    71  			x.addAlias(rec.MIME, mib)
    72  			x.setName(1, rec.MIME)
    73  		}
    74  	}
    75  
    76  	w := gen.NewCodeWriter()
    77  
    78  	fmt.Fprintln(w, `import "github.com/go-enjin/golang-org-x-text/encoding/internal/identifier"`)
    79  
    80  	writeIndex(w, x)
    81  
    82  	w.WriteGoFile("tables.go", "ianaindex")
    83  }
    84  
    85  type alias struct {
    86  	name string
    87  	mib  identifier.MIB
    88  }
    89  
    90  type indexInfo struct {
    91  	// compacted index from code to MIB
    92  	codeToMIB []identifier.MIB
    93  	alias     []alias
    94  	names     [][3]string
    95  }
    96  
    97  func (ii *indexInfo) Len() int {
    98  	return len(ii.codeToMIB)
    99  }
   100  
   101  func (ii *indexInfo) Less(a, b int) bool {
   102  	return ii.codeToMIB[a] < ii.codeToMIB[b]
   103  }
   104  
   105  func (ii *indexInfo) Swap(a, b int) {
   106  	ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a]
   107  	// Co-sort the names.
   108  	ii.names[a], ii.names[b] = ii.names[b], ii.names[a]
   109  }
   110  
   111  func (ii *indexInfo) setName(i int, name string) {
   112  	ii.names[len(ii.names)-1][i] = name
   113  }
   114  
   115  func (ii *indexInfo) addEntry(mib identifier.MIB, name string) {
   116  	ii.names = append(ii.names, [3]string{name, name, name})
   117  	ii.addAlias(name, mib)
   118  	ii.codeToMIB = append(ii.codeToMIB, mib)
   119  }
   120  
   121  func (ii *indexInfo) addAlias(name string, mib identifier.MIB) {
   122  	// Don't add duplicates for the same mib. Adding duplicate aliases for
   123  	// different MIBs will cause the compiler to barf on an invalid map: great!.
   124  	for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- {
   125  		if ii.alias[i].name == name {
   126  			return
   127  		}
   128  	}
   129  	ii.alias = append(ii.alias, alias{name, mib})
   130  	lower := strings.ToLower(name)
   131  	if lower != name {
   132  		ii.addAlias(lower, mib)
   133  	}
   134  }
   135  
   136  const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
   137  
   138  func writeIndex(w *gen.CodeWriter, x *indexInfo) {
   139  	sort.Stable(x)
   140  
   141  	// Write constants.
   142  	fmt.Fprintln(w, "const (")
   143  	for i, m := range x.codeToMIB {
   144  		if i == 0 {
   145  			fmt.Fprintf(w, "enc%d = iota\n", m)
   146  		} else {
   147  			fmt.Fprintf(w, "enc%d\n", m)
   148  		}
   149  	}
   150  	fmt.Fprintln(w, "numIANA")
   151  	fmt.Fprintln(w, ")")
   152  
   153  	w.WriteVar("ianaToMIB", x.codeToMIB)
   154  
   155  	var ianaNames, mibNames []string
   156  	for _, names := range x.names {
   157  		n := names[0]
   158  		if names[0] != names[1] {
   159  			// MIME names are mostly identical to IANA names. We share the
   160  			// tables by setting the first byte of the string to an index into
   161  			// the string itself (< maxMIMENameLen) to the IANA name. The MIME
   162  			// name immediately follows the index.
   163  			x := len(names[1]) + 1
   164  			if x > maxMIMENameLen {
   165  				log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen)
   166  			}
   167  			n = string(x) + names[1] + names[0]
   168  		}
   169  		ianaNames = append(ianaNames, n)
   170  		mibNames = append(mibNames, names[2])
   171  	}
   172  
   173  	w.WriteVar("ianaNames", ianaNames)
   174  	w.WriteVar("mibNames", mibNames)
   175  
   176  	w.WriteComment(`
   177  	TODO: Instead of using a map, we could use binary search strings doing
   178  	on-the fly lower-casing per character. This allows to always avoid
   179  	allocation and will be considerably more compact.`)
   180  	fmt.Fprintln(w, "var ianaAliases = map[string]int{")
   181  	for _, a := range x.alias {
   182  		fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib)
   183  	}
   184  	fmt.Fprintln(w, "}")
   185  }
   186  
   187  func parseInt(s string) int {
   188  	x, err := strconv.ParseInt(s, 10, 64)
   189  	if err != nil {
   190  		log.Fatalf("Could not parse integer: %v", err)
   191  	}
   192  	return int(x)
   193  }