github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/mahonia/charset.go (about) 1 // This package is a character-set conversion library for Go. 2 // 3 // (DEPRECATED: use code.google.com/p/go.text/encoding, perhaps along with 4 // code.google.com/p/go.net/html/charset) 5 package mahonia 6 7 import ( 8 "bytes" 9 "unicode" 10 ) 11 12 // Status is the type for the status return value from a Decoder or Encoder. 13 type Status int 14 15 const ( 16 // SUCCESS means that the character was converted with no problems. 17 SUCCESS = Status(iota) 18 19 // INVALID_CHAR means that the source contained invalid bytes, or that the character 20 // could not be represented in the destination encoding. 21 // The Encoder or Decoder should have output a substitute character. 22 INVALID_CHAR 23 24 // NO_ROOM means there were not enough input bytes to form a complete character, 25 // or there was not enough room in the output buffer to write a complete character. 26 // No bytes were written, and no internal state was changed in the Encoder or Decoder. 27 NO_ROOM 28 29 // STATE_ONLY means that bytes were read or written indicating a state transition, 30 // but no actual character was processed. (Examples: byte order marks, ISO-2022 escape sequences) 31 STATE_ONLY 32 ) 33 34 // A Decoder is a function that decodes a character set, one character at a time. 35 // It works much like utf8.DecodeRune, but has an aditional status return value. 36 type Decoder func(p []byte) (c rune, size int, status Status) 37 38 // An Encoder is a function that encodes a character set, one character at a time. 39 // It works much like utf8.EncodeRune, but has an additional status return value. 40 type Encoder func(p []byte, c rune) (size int, status Status) 41 42 // A Charset represents a character set that can be converted, and contains functions 43 // to create Converters to encode and decode strings in that character set. 44 type Charset struct { 45 // Name is the character set's canonical name. 46 Name string 47 48 // Aliases returns a list of alternate names. 49 Aliases []string 50 51 // NewDecoder returns a Decoder to convert from the charset to Unicode. 52 NewDecoder func() Decoder 53 54 // NewEncoder returns an Encoder to convert from Unicode to the charset. 55 NewEncoder func() Encoder 56 } 57 58 // The charsets are stored in charsets under their canonical names. 59 var charsets = make(map[string]*Charset) 60 61 // aliases maps their aliases to their canonical names. 62 var aliases = make(map[string]string) 63 64 // simplifyName converts a name to lower case and removes non-alphanumeric characters. 65 // This is how the names are used as keys to the maps. 66 func simplifyName(name string) string { 67 var buf bytes.Buffer 68 for _, c := range name { 69 switch { 70 case unicode.IsDigit(c): 71 buf.WriteRune(c) 72 case unicode.IsLetter(c): 73 buf.WriteRune(unicode.ToLower(c)) 74 default: 75 76 } 77 } 78 79 return buf.String() 80 } 81 82 // RegisterCharset adds a charset to the charsetMap. 83 func RegisterCharset(cs *Charset) { 84 name := cs.Name 85 charsets[name] = cs 86 aliases[simplifyName(name)] = name 87 for _, alias := range cs.Aliases { 88 aliases[simplifyName(alias)] = name 89 } 90 } 91 92 // GetCharset fetches a charset by name. 93 // If the name is not found, it returns nil. 94 func GetCharset(name string) *Charset { 95 return charsets[aliases[simplifyName(name)]] 96 } 97 98 // NewDecoder returns a Decoder to decode the named charset. 99 // If the name is not found, it returns nil. 100 func NewDecoder(name string) Decoder { 101 cs := GetCharset(name) 102 if cs == nil { 103 return nil 104 } 105 return cs.NewDecoder() 106 } 107 108 // NewEncoder returns an Encoder to encode the named charset. 109 func NewEncoder(name string) Encoder { 110 cs := GetCharset(name) 111 if cs == nil { 112 return nil 113 } 114 return cs.NewEncoder() 115 }