github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/mahonia/charset.go (about)

     1  // This package is a character-set conversion library for Go.
     2  //
     3  // (DEPRECATED: use code.google.com/p/go.text/encoding, perhaps along with
     4  // code.google.com/p/go.net/html/charset)
     5  package mahonia
     6  
     7  import (
     8  	"bytes"
     9  	"unicode"
    10  )
    11  
    12  // Status is the type for the status return value from a Decoder or Encoder.
    13  type Status int
    14  
    15  const (
    16  	// SUCCESS means that the character was converted with no problems.
    17  	SUCCESS = Status(iota)
    18  
    19  	// INVALID_CHAR means that the source contained invalid bytes, or that the character
    20  	// could not be represented in the destination encoding.
    21  	// The Encoder or Decoder should have output a substitute character.
    22  	INVALID_CHAR
    23  
    24  	// NO_ROOM means there were not enough input bytes to form a complete character,
    25  	// or there was not enough room in the output buffer to write a complete character.
    26  	// No bytes were written, and no internal state was changed in the Encoder or Decoder.
    27  	NO_ROOM
    28  
    29  	// STATE_ONLY means that bytes were read or written indicating a state transition,
    30  	// but no actual character was processed. (Examples: byte order marks, ISO-2022 escape sequences)
    31  	STATE_ONLY
    32  )
    33  
    34  // A Decoder is a function that decodes a character set, one character at a time.
    35  // It works much like utf8.DecodeRune, but has an aditional status return value.
    36  type Decoder func(p []byte) (c rune, size int, status Status)
    37  
    38  // An Encoder is a function that encodes a character set, one character at a time.
    39  // It works much like utf8.EncodeRune, but has an additional status return value.
    40  type Encoder func(p []byte, c rune) (size int, status Status)
    41  
    42  // A Charset represents a character set that can be converted, and contains functions
    43  // to create Converters to encode and decode strings in that character set.
    44  type Charset struct {
    45  	// Name is the character set's canonical name.
    46  	Name string
    47  
    48  	// Aliases returns a list of alternate names.
    49  	Aliases []string
    50  
    51  	// NewDecoder returns a Decoder to convert from the charset to Unicode.
    52  	NewDecoder func() Decoder
    53  
    54  	// NewEncoder returns an Encoder to convert from Unicode to the charset.
    55  	NewEncoder func() Encoder
    56  }
    57  
    58  // The charsets are stored in charsets under their canonical names.
    59  var charsets = make(map[string]*Charset)
    60  
    61  // aliases maps their aliases to their canonical names.
    62  var aliases = make(map[string]string)
    63  
    64  // simplifyName converts a name to lower case and removes non-alphanumeric characters.
    65  // This is how the names are used as keys to the maps.
    66  func simplifyName(name string) string {
    67  	var buf bytes.Buffer
    68  	for _, c := range name {
    69  		switch {
    70  		case unicode.IsDigit(c):
    71  			buf.WriteRune(c)
    72  		case unicode.IsLetter(c):
    73  			buf.WriteRune(unicode.ToLower(c))
    74  		default:
    75  
    76  		}
    77  	}
    78  
    79  	return buf.String()
    80  }
    81  
    82  // RegisterCharset adds a charset to the charsetMap.
    83  func RegisterCharset(cs *Charset) {
    84  	name := cs.Name
    85  	charsets[name] = cs
    86  	aliases[simplifyName(name)] = name
    87  	for _, alias := range cs.Aliases {
    88  		aliases[simplifyName(alias)] = name
    89  	}
    90  }
    91  
    92  // GetCharset fetches a charset by name.
    93  // If the name is not found, it returns nil.
    94  func GetCharset(name string) *Charset {
    95  	return charsets[aliases[simplifyName(name)]]
    96  }
    97  
    98  // NewDecoder returns a Decoder to decode the named charset.
    99  // If the name is not found, it returns nil.
   100  func NewDecoder(name string) Decoder {
   101  	cs := GetCharset(name)
   102  	if cs == nil {
   103  		return nil
   104  	}
   105  	return cs.NewDecoder()
   106  }
   107  
   108  // NewEncoder returns an Encoder to encode the named charset.
   109  func NewEncoder(name string) Encoder {
   110  	cs := GetCharset(name)
   111  	if cs == nil {
   112  		return nil
   113  	}
   114  	return cs.NewEncoder()
   115  }