github.com/go-xe2/third@v1.0.3/golang.org/x/text/encoding/ianaindex/ianaindex.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run gen.go 6 7 // Package ianaindex maps names to Encodings as specified by the IANA registry. 8 // This includes both the MIME and IANA names. 9 // 10 // See http://www.iana.org/assignments/character-sets/character-sets.xhtml for 11 // more details. 12 package ianaindex 13 14 import ( 15 "errors" 16 "sort" 17 "strings" 18 19 "github.com/go-xe2/third/golang.org/x/text/encoding" 20 "github.com/go-xe2/third/golang.org/x/text/encoding/charmap" 21 "github.com/go-xe2/third/golang.org/x/text/encoding/internal/identifier" 22 "github.com/go-xe2/third/golang.org/x/text/encoding/japanese" 23 "github.com/go-xe2/third/golang.org/x/text/encoding/korean" 24 "github.com/go-xe2/third/golang.org/x/text/encoding/simplifiedchinese" 25 "github.com/go-xe2/third/golang.org/x/text/encoding/traditionalchinese" 26 "github.com/go-xe2/third/golang.org/x/text/encoding/unicode" 27 ) 28 29 // TODO: remove the "Status... incomplete" in the package doc comment. 30 // TODO: allow users to specify their own aliases? 31 // TODO: allow users to specify their own indexes? 32 // TODO: allow canonicalizing names 33 34 // NOTE: only use these top-level variables if we can get the linker to drop 35 // the indexes when they are not used. Make them a function or perhaps only 36 // support MIME otherwise. 37 38 var ( 39 // MIME is an index to map MIME names. 40 MIME *Index = mime 41 42 // IANA is an index that supports all names and aliases using IANA names as 43 // the canonical identifier. 44 IANA *Index = iana 45 46 // MIB is an index that associates the MIB display name with an Encoding. 47 MIB *Index = mib 48 49 mime = &Index{mimeName, ianaToMIB, ianaAliases, encodings[:]} 50 iana = &Index{ianaName, ianaToMIB, ianaAliases, encodings[:]} 51 mib = &Index{mibName, ianaToMIB, ianaAliases, encodings[:]} 52 ) 53 54 // Index maps names registered by IANA to Encodings. 55 // Currently different Indexes only differ in the names they return for 56 // encodings. In the future they may also differ in supported aliases. 57 type Index struct { 58 names func(i int) string 59 toMIB []identifier.MIB // Sorted slice of supported MIBs 60 alias map[string]int 61 enc []encoding.Encoding 62 } 63 64 var ( 65 errInvalidName = errors.New("ianaindex: invalid encoding name") 66 errUnknown = errors.New("ianaindex: unknown Encoding") 67 errUnsupported = errors.New("ianaindex: unsupported Encoding") 68 ) 69 70 // Encoding returns an Encoding for IANA-registered names. Matching is 71 // case-insensitive. 72 func (x *Index) Encoding(name string) (encoding.Encoding, error) { 73 name = strings.TrimSpace(name) 74 // First try without lowercasing (possibly creating an allocation). 75 i, ok := x.alias[name] 76 if !ok { 77 i, ok = x.alias[strings.ToLower(name)] 78 if !ok { 79 return nil, errInvalidName 80 } 81 } 82 return x.enc[i], nil 83 } 84 85 // Name reports the canonical name of the given Encoding. It will return an 86 // error if the e is not associated with a known encoding scheme. 87 func (x *Index) Name(e encoding.Encoding) (string, error) { 88 id, ok := e.(identifier.Interface) 89 if !ok { 90 return "", errUnknown 91 } 92 mib, _ := id.ID() 93 if mib == 0 { 94 return "", errUnknown 95 } 96 v := findMIB(x.toMIB, mib) 97 if v == -1 { 98 return "", errUnsupported 99 } 100 return x.names(v), nil 101 } 102 103 // TODO: the coverage of this index is rather spotty. Allowing users to set 104 // encodings would allow: 105 // - users to increase coverage 106 // - allow a partially loaded set of encodings in case the user doesn't need to 107 // them all. 108 // - write an OS-specific wrapper for supported encodings and set them. 109 // The exact definition of Set depends a bit on if and how we want to let users 110 // write their own Encoding implementations. Also, it is not possible yet to 111 // only partially load the encodings without doing some refactoring. Until this 112 // is solved, we might as well not support Set. 113 // // Set sets the e to be used for the encoding scheme identified by name. Only 114 // // canonical names may be used. An empty name assigns e to its internally 115 // // associated encoding scheme. 116 // func (x *Index) Set(name string, e encoding.Encoding) error { 117 // panic("TODO: implement") 118 // } 119 120 func findMIB(x []identifier.MIB, mib identifier.MIB) int { 121 i := sort.Search(len(x), func(i int) bool { return x[i] >= mib }) 122 if i < len(x) && x[i] == mib { 123 return i 124 } 125 return -1 126 } 127 128 const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer. 129 130 func mimeName(x int) string { 131 n := ianaNames[x] 132 // See gen.go for a description of the encoding. 133 if n[0] <= maxMIMENameLen { 134 return n[1:n[0]] 135 } 136 return n 137 } 138 139 func ianaName(x int) string { 140 n := ianaNames[x] 141 // See gen.go for a description of the encoding. 142 if n[0] <= maxMIMENameLen { 143 return n[n[0]:] 144 } 145 return n 146 } 147 148 func mibName(x int) string { 149 return mibNames[x] 150 } 151 152 var encodings = [numIANA]encoding.Encoding{ 153 enc106: unicode.UTF8, 154 enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM), 155 enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), 156 enc1014: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), 157 enc2028: charmap.CodePage037, 158 enc2011: charmap.CodePage437, 159 enc2009: charmap.CodePage850, 160 enc2010: charmap.CodePage852, 161 enc2046: charmap.CodePage855, 162 enc2089: charmap.CodePage858, 163 enc2048: charmap.CodePage860, 164 enc2013: charmap.CodePage862, 165 enc2050: charmap.CodePage863, 166 enc2052: charmap.CodePage865, 167 enc2086: charmap.CodePage866, 168 enc2102: charmap.CodePage1047, 169 enc2091: charmap.CodePage1140, 170 enc4: charmap.ISO8859_1, 171 enc5: charmap.ISO8859_2, 172 enc6: charmap.ISO8859_3, 173 enc7: charmap.ISO8859_4, 174 enc8: charmap.ISO8859_5, 175 enc9: charmap.ISO8859_6, 176 enc81: charmap.ISO8859_6E, 177 enc82: charmap.ISO8859_6I, 178 enc10: charmap.ISO8859_7, 179 enc11: charmap.ISO8859_8, 180 enc84: charmap.ISO8859_8E, 181 enc85: charmap.ISO8859_8I, 182 enc12: charmap.ISO8859_9, 183 enc13: charmap.ISO8859_10, 184 enc109: charmap.ISO8859_13, 185 enc110: charmap.ISO8859_14, 186 enc111: charmap.ISO8859_15, 187 enc112: charmap.ISO8859_16, 188 enc2084: charmap.KOI8R, 189 enc2088: charmap.KOI8U, 190 enc2027: charmap.Macintosh, 191 enc2109: charmap.Windows874, 192 enc2250: charmap.Windows1250, 193 enc2251: charmap.Windows1251, 194 enc2252: charmap.Windows1252, 195 enc2253: charmap.Windows1253, 196 enc2254: charmap.Windows1254, 197 enc2255: charmap.Windows1255, 198 enc2256: charmap.Windows1256, 199 enc2257: charmap.Windows1257, 200 enc2258: charmap.Windows1258, 201 enc18: japanese.EUCJP, 202 enc39: japanese.ISO2022JP, 203 enc17: japanese.ShiftJIS, 204 enc38: korean.EUCKR, 205 enc114: simplifiedchinese.GB18030, 206 enc113: simplifiedchinese.GBK, 207 enc2085: simplifiedchinese.HZGB2312, 208 enc2026: traditionalchinese.Big5, 209 }