github.com/go-enjin/golang-org-x-text@v0.12.1-enjin.2/encoding/htmlindex/gen.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  // +build ignore
     7  
     8  package main
     9  
    10  import (
    11  	"bytes"
    12  	"encoding/json"
    13  	"fmt"
    14  	"log"
    15  	"strings"
    16  
    17  	"github.com/go-enjin/golang-org-x-text/internal/gen"
    18  )
    19  
    20  type group struct {
    21  	Encodings []struct {
    22  		Labels []string
    23  		Name   string
    24  	}
    25  }
    26  
    27  func main() {
    28  	gen.Init()
    29  
    30  	r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
    31  	var groups []group
    32  	if err := json.NewDecoder(r).Decode(&groups); err != nil {
    33  		log.Fatalf("Error reading encodings.json: %v", err)
    34  	}
    35  
    36  	w := &bytes.Buffer{}
    37  	fmt.Fprintln(w, "type htmlEncoding byte")
    38  	fmt.Fprintln(w, "const (")
    39  	for i, g := range groups {
    40  		for _, e := range g.Encodings {
    41  			key := strings.ToLower(e.Name)
    42  			name := consts[key]
    43  			if name == "" {
    44  				log.Fatalf("No const defined for %s.", key)
    45  			}
    46  			if i == 0 {
    47  				fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
    48  			} else {
    49  				fmt.Fprintf(w, "%s\n", name)
    50  			}
    51  		}
    52  	}
    53  	fmt.Fprintln(w, "numEncodings")
    54  	fmt.Fprint(w, ")\n\n")
    55  
    56  	fmt.Fprintln(w, "var canonical = [numEncodings]string{")
    57  	for _, g := range groups {
    58  		for _, e := range g.Encodings {
    59  			fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
    60  		}
    61  	}
    62  	fmt.Fprint(w, "}\n\n")
    63  
    64  	fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
    65  	for _, g := range groups {
    66  		for _, e := range g.Encodings {
    67  			for _, l := range e.Labels {
    68  				key := strings.ToLower(e.Name)
    69  				name := consts[key]
    70  				fmt.Fprintf(w, "%q: %s,\n", l, name)
    71  			}
    72  		}
    73  	}
    74  	fmt.Fprint(w, "}\n\n")
    75  
    76  	var tags []string
    77  	fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
    78  	for _, loc := range locales {
    79  		tags = append(tags, loc.tag)
    80  		fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
    81  	}
    82  	fmt.Fprint(w, "}\n\n")
    83  
    84  	fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
    85  
    86  	gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
    87  }
    88  
    89  // consts maps canonical encoding name to internal constant.
    90  var consts = map[string]string{
    91  	"utf-8":          "utf8",
    92  	"ibm866":         "ibm866",
    93  	"iso-8859-2":     "iso8859_2",
    94  	"iso-8859-3":     "iso8859_3",
    95  	"iso-8859-4":     "iso8859_4",
    96  	"iso-8859-5":     "iso8859_5",
    97  	"iso-8859-6":     "iso8859_6",
    98  	"iso-8859-7":     "iso8859_7",
    99  	"iso-8859-8":     "iso8859_8",
   100  	"iso-8859-8-i":   "iso8859_8I",
   101  	"iso-8859-10":    "iso8859_10",
   102  	"iso-8859-13":    "iso8859_13",
   103  	"iso-8859-14":    "iso8859_14",
   104  	"iso-8859-15":    "iso8859_15",
   105  	"iso-8859-16":    "iso8859_16",
   106  	"koi8-r":         "koi8r",
   107  	"koi8-u":         "koi8u",
   108  	"macintosh":      "macintosh",
   109  	"windows-874":    "windows874",
   110  	"windows-1250":   "windows1250",
   111  	"windows-1251":   "windows1251",
   112  	"windows-1252":   "windows1252",
   113  	"windows-1253":   "windows1253",
   114  	"windows-1254":   "windows1254",
   115  	"windows-1255":   "windows1255",
   116  	"windows-1256":   "windows1256",
   117  	"windows-1257":   "windows1257",
   118  	"windows-1258":   "windows1258",
   119  	"x-mac-cyrillic": "macintoshCyrillic",
   120  	"gbk":            "gbk",
   121  	"gb18030":        "gb18030",
   122  	// "hz-gb-2312":     "hzgb2312", // Was removed from WhatWG
   123  	"big5":           "big5",
   124  	"euc-jp":         "eucjp",
   125  	"iso-2022-jp":    "iso2022jp",
   126  	"shift_jis":      "shiftJIS",
   127  	"euc-kr":         "euckr",
   128  	"replacement":    "replacement",
   129  	"utf-16be":       "utf16be",
   130  	"utf-16le":       "utf16le",
   131  	"x-user-defined": "xUserDefined",
   132  }
   133  
   134  // locales is taken from
   135  // https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
   136  var locales = []struct{ tag, name string }{
   137  	// The default value. Explicitly state latin to benefit from the exact
   138  	// script option, while still making 1252 the default encoding for languages
   139  	// written in Latin script.
   140  	{"und_Latn", "windows-1252"},
   141  	{"ar", "windows-1256"},
   142  	{"ba", "windows-1251"},
   143  	{"be", "windows-1251"},
   144  	{"bg", "windows-1251"},
   145  	{"cs", "windows-1250"},
   146  	{"el", "iso-8859-7"},
   147  	{"et", "windows-1257"},
   148  	{"fa", "windows-1256"},
   149  	{"he", "windows-1255"},
   150  	{"hr", "windows-1250"},
   151  	{"hu", "iso-8859-2"},
   152  	{"ja", "shift_jis"},
   153  	{"kk", "windows-1251"},
   154  	{"ko", "euc-kr"},
   155  	{"ku", "windows-1254"},
   156  	{"ky", "windows-1251"},
   157  	{"lt", "windows-1257"},
   158  	{"lv", "windows-1257"},
   159  	{"mk", "windows-1251"},
   160  	{"pl", "iso-8859-2"},
   161  	{"ru", "windows-1251"},
   162  	{"sah", "windows-1251"},
   163  	{"sk", "windows-1250"},
   164  	{"sl", "iso-8859-2"},
   165  	{"sr", "windows-1251"},
   166  	{"tg", "windows-1251"},
   167  	{"th", "windows-874"},
   168  	{"tr", "windows-1254"},
   169  	{"tt", "windows-1251"},
   170  	{"uk", "windows-1251"},
   171  	{"vi", "windows-1258"},
   172  	{"zh-hans", "gb18030"},
   173  	{"zh-hant", "big5"},
   174  }