gitee.com/quant1x/gox@v1.7.6/text/encoding/charset.go (about) 1 // This package is a character-set conversion library for Go. 2 package encoding 3 4 import ( 5 "bytes" 6 "unicode" 7 ) 8 9 // Status is the type for the status return value from a Decoder or Encoder. 10 type Status int 11 12 const ( 13 // SUCCESS means that the character was converted with no problems. 14 SUCCESS = Status(iota) 15 16 // INVALID_CHAR means that the source contained invalid bytes, or that the character 17 // could not be represented in the destination encoding. 18 // The Encoder or Decoder should have output a substitute character. 19 INVALID_CHAR 20 21 // NO_ROOM means there were not enough input bytes to form a complete character, 22 // or there was not enough room in the output buffer to write a complete character. 23 // No bytes were written, and no internal state was changed in the Encoder or Decoder. 24 NO_ROOM 25 26 // STATE_ONLY means that bytes were read or written indicating a state transition, 27 // but no actual character was processed. (Examples: byte order marks, ISO-2022 escape sequences) 28 STATE_ONLY 29 ) 30 31 // A Decoder is a function that decodes a character set, one character at a time. 32 // It works much like utf8.DecodeRune, but has an aditional status return value. 33 type Decoder func(p []byte) (c rune, size int, status Status) 34 35 // An Encoder is a function that encodes a character set, one character at a time. 36 // It works much like utf8.EncodeRune, but has an additional status return value. 37 type Encoder func(p []byte, c rune) (size int, status Status) 38 39 // A Charset represents a character set that can be converted, and contains functions 40 // to create Converters to encode and decode strings in that character set. 41 type Charset struct { 42 // Name is the character set's canonical name. 43 Name string 44 45 // Aliases returns a list of alternate names. 46 Aliases []string 47 48 // NewDecoder returns a Decoder to convert from the charset to Unicode. 49 NewDecoder func() Decoder 50 51 // NewEncoder returns an Encoder to convert from Unicode to the charset. 52 NewEncoder func() Encoder 53 } 54 55 // The charsets are stored in charsets under their canonical names. 56 var charsets = make(map[string]*Charset) 57 58 // aliases maps their aliases to their canonical names. 59 var aliases = make(map[string]string) 60 61 // simplifyName converts a name to lower case and removes non-alphanumeric characters. 62 // This is how the names are used as keys to the maps. 63 func simplifyName(name string) string { 64 var buf bytes.Buffer 65 for _, c := range name { 66 switch { 67 case unicode.IsDigit(c): 68 buf.WriteRune(c) 69 case unicode.IsLetter(c): 70 buf.WriteRune(unicode.ToLower(c)) 71 default: 72 73 } 74 } 75 76 return buf.String() 77 } 78 79 // RegisterCharset adds a charset to the charsetMap. 80 func RegisterCharset(cs *Charset) { 81 name := cs.Name 82 charsets[name] = cs 83 aliases[simplifyName(name)] = name 84 for _, alias := range cs.Aliases { 85 aliases[simplifyName(alias)] = name 86 } 87 } 88 89 // GetCharset fetches a charset by name. 90 // If the name is not found, it returns nil. 91 func GetCharset(name string) *Charset { 92 return charsets[aliases[simplifyName(name)]] 93 } 94 95 // NewDecoder returns a Decoder to decode the named charset. 96 // If the name is not found, it returns nil. 97 func NewDecoder(name string) Decoder { 98 cs := GetCharset(name) 99 if cs == nil { 100 return nil 101 } 102 return cs.NewDecoder() 103 } 104 105 // NewEncoder returns an Encoder to encode the named charset. 106 func NewEncoder(name string) Encoder { 107 cs := GetCharset(name) 108 if cs == nil { 109 return nil 110 } 111 return cs.NewEncoder() 112 }