github.com/go-enjin/golang-org-x-text@v0.12.1-enjin.2/encoding/htmlindex/gen.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ignore 6 // +build ignore 7 8 package main 9 10 import ( 11 "bytes" 12 "encoding/json" 13 "fmt" 14 "log" 15 "strings" 16 17 "github.com/go-enjin/golang-org-x-text/internal/gen" 18 ) 19 20 type group struct { 21 Encodings []struct { 22 Labels []string 23 Name string 24 } 25 } 26 27 func main() { 28 gen.Init() 29 30 r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json") 31 var groups []group 32 if err := json.NewDecoder(r).Decode(&groups); err != nil { 33 log.Fatalf("Error reading encodings.json: %v", err) 34 } 35 36 w := &bytes.Buffer{} 37 fmt.Fprintln(w, "type htmlEncoding byte") 38 fmt.Fprintln(w, "const (") 39 for i, g := range groups { 40 for _, e := range g.Encodings { 41 key := strings.ToLower(e.Name) 42 name := consts[key] 43 if name == "" { 44 log.Fatalf("No const defined for %s.", key) 45 } 46 if i == 0 { 47 fmt.Fprintf(w, "%s htmlEncoding = iota\n", name) 48 } else { 49 fmt.Fprintf(w, "%s\n", name) 50 } 51 } 52 } 53 fmt.Fprintln(w, "numEncodings") 54 fmt.Fprint(w, ")\n\n") 55 56 fmt.Fprintln(w, "var canonical = [numEncodings]string{") 57 for _, g := range groups { 58 for _, e := range g.Encodings { 59 fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name)) 60 } 61 } 62 fmt.Fprint(w, "}\n\n") 63 64 fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{") 65 for _, g := range groups { 66 for _, e := range g.Encodings { 67 for _, l := range e.Labels { 68 key := strings.ToLower(e.Name) 69 name := consts[key] 70 fmt.Fprintf(w, "%q: %s,\n", l, name) 71 } 72 } 73 } 74 fmt.Fprint(w, "}\n\n") 75 76 var tags []string 77 fmt.Fprintln(w, "var localeMap = []htmlEncoding{") 78 for _, loc := range locales { 79 tags = append(tags, loc.tag) 80 fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag) 81 } 82 fmt.Fprint(w, "}\n\n") 83 84 fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " ")) 85 86 gen.WriteGoFile("tables.go", "htmlindex", w.Bytes()) 87 } 88 89 // consts maps canonical encoding name to internal constant. 90 var consts = map[string]string{ 91 "utf-8": "utf8", 92 "ibm866": "ibm866", 93 "iso-8859-2": "iso8859_2", 94 "iso-8859-3": "iso8859_3", 95 "iso-8859-4": "iso8859_4", 96 "iso-8859-5": "iso8859_5", 97 "iso-8859-6": "iso8859_6", 98 "iso-8859-7": "iso8859_7", 99 "iso-8859-8": "iso8859_8", 100 "iso-8859-8-i": "iso8859_8I", 101 "iso-8859-10": "iso8859_10", 102 "iso-8859-13": "iso8859_13", 103 "iso-8859-14": "iso8859_14", 104 "iso-8859-15": "iso8859_15", 105 "iso-8859-16": "iso8859_16", 106 "koi8-r": "koi8r", 107 "koi8-u": "koi8u", 108 "macintosh": "macintosh", 109 "windows-874": "windows874", 110 "windows-1250": "windows1250", 111 "windows-1251": "windows1251", 112 "windows-1252": "windows1252", 113 "windows-1253": "windows1253", 114 "windows-1254": "windows1254", 115 "windows-1255": "windows1255", 116 "windows-1256": "windows1256", 117 "windows-1257": "windows1257", 118 "windows-1258": "windows1258", 119 "x-mac-cyrillic": "macintoshCyrillic", 120 "gbk": "gbk", 121 "gb18030": "gb18030", 122 // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG 123 "big5": "big5", 124 "euc-jp": "eucjp", 125 "iso-2022-jp": "iso2022jp", 126 "shift_jis": "shiftJIS", 127 "euc-kr": "euckr", 128 "replacement": "replacement", 129 "utf-16be": "utf16be", 130 "utf-16le": "utf16le", 131 "x-user-defined": "xUserDefined", 132 } 133 134 // locales is taken from 135 // https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm. 136 var locales = []struct{ tag, name string }{ 137 // The default value. Explicitly state latin to benefit from the exact 138 // script option, while still making 1252 the default encoding for languages 139 // written in Latin script. 140 {"und_Latn", "windows-1252"}, 141 {"ar", "windows-1256"}, 142 {"ba", "windows-1251"}, 143 {"be", "windows-1251"}, 144 {"bg", "windows-1251"}, 145 {"cs", "windows-1250"}, 146 {"el", "iso-8859-7"}, 147 {"et", "windows-1257"}, 148 {"fa", "windows-1256"}, 149 {"he", "windows-1255"}, 150 {"hr", "windows-1250"}, 151 {"hu", "iso-8859-2"}, 152 {"ja", "shift_jis"}, 153 {"kk", "windows-1251"}, 154 {"ko", "euc-kr"}, 155 {"ku", "windows-1254"}, 156 {"ky", "windows-1251"}, 157 {"lt", "windows-1257"}, 158 {"lv", "windows-1257"}, 159 {"mk", "windows-1251"}, 160 {"pl", "iso-8859-2"}, 161 {"ru", "windows-1251"}, 162 {"sah", "windows-1251"}, 163 {"sk", "windows-1250"}, 164 {"sl", "iso-8859-2"}, 165 {"sr", "windows-1251"}, 166 {"tg", "windows-1251"}, 167 {"th", "windows-874"}, 168 {"tr", "windows-1254"}, 169 {"tt", "windows-1251"}, 170 {"uk", "windows-1251"}, 171 {"vi", "windows-1258"}, 172 {"zh-hans", "gb18030"}, 173 {"zh-hant", "big5"}, 174 }