gitee.com/quant1x/gox@v1.7.6/text/encoding/charset.go (about)

     1  // This package is a character-set conversion library for Go.
     2  package encoding
     3  
     4  import (
     5  	"bytes"
     6  	"unicode"
     7  )
     8  
     9  // Status is the type for the status return value from a Decoder or Encoder.
    10  type Status int
    11  
    12  const (
    13  	// SUCCESS means that the character was converted with no problems.
    14  	SUCCESS = Status(iota)
    15  
    16  	// INVALID_CHAR means that the source contained invalid bytes, or that the character
    17  	// could not be represented in the destination encoding.
    18  	// The Encoder or Decoder should have output a substitute character.
    19  	INVALID_CHAR
    20  
    21  	// NO_ROOM means there were not enough input bytes to form a complete character, 
    22  	// or there was not enough room in the output buffer to write a complete character.
    23  	// No bytes were written, and no internal state was changed in the Encoder or Decoder.
    24  	NO_ROOM
    25  
    26  	// STATE_ONLY means that bytes were read or written indicating a state transition,
    27  	// but no actual character was processed. (Examples: byte order marks, ISO-2022 escape sequences)
    28  	STATE_ONLY
    29  )
    30  
    31  // A Decoder is a function that decodes a character set, one character at a time.
    32  // It works much like utf8.DecodeRune, but has an aditional status return value.
    33  type Decoder func(p []byte) (c rune, size int, status Status)
    34  
    35  // An Encoder is a function that encodes a character set, one character at a time.
    36  // It works much like utf8.EncodeRune, but has an additional status return value.
    37  type Encoder func(p []byte, c rune) (size int, status Status)
    38  
    39  // A Charset represents a character set that can be converted, and contains functions
    40  // to create Converters to encode and decode strings in that character set.
    41  type Charset struct {
    42  	// Name is the character set's canonical name.
    43  	Name string
    44  
    45  	// Aliases returns a list of alternate names.
    46  	Aliases []string
    47  
    48  	// NewDecoder returns a Decoder to convert from the charset to Unicode.
    49  	NewDecoder func() Decoder
    50  
    51  	// NewEncoder returns an Encoder to convert from Unicode to the charset.
    52  	NewEncoder func() Encoder
    53  }
    54  
    55  // The charsets are stored in charsets under their canonical names. 
    56  var charsets = make(map[string]*Charset)
    57  
    58  // aliases maps their aliases to their canonical names.
    59  var aliases = make(map[string]string)
    60  
    61  // simplifyName converts a name to lower case and removes non-alphanumeric characters.
    62  // This is how the names are used as keys to the maps.
    63  func simplifyName(name string) string {
    64  	var buf bytes.Buffer
    65  	for _, c := range name {
    66  		switch {
    67  		case unicode.IsDigit(c):
    68  			buf.WriteRune(c)
    69  		case unicode.IsLetter(c):
    70  			buf.WriteRune(unicode.ToLower(c))
    71  		default:
    72  
    73  		}
    74  	}
    75  
    76  	return buf.String()
    77  }
    78  
    79  // RegisterCharset adds a charset to the charsetMap.
    80  func RegisterCharset(cs *Charset) {
    81  	name := cs.Name
    82  	charsets[name] = cs
    83  	aliases[simplifyName(name)] = name
    84  	for _, alias := range cs.Aliases {
    85  		aliases[simplifyName(alias)] = name
    86  	}
    87  }
    88  
    89  // GetCharset fetches a charset by name.
    90  // If the name is not found, it returns nil.
    91  func GetCharset(name string) *Charset {
    92  	return charsets[aliases[simplifyName(name)]]
    93  }
    94  
    95  // NewDecoder returns a Decoder to decode the named charset.
    96  // If the name is not found, it returns nil.
    97  func NewDecoder(name string) Decoder {
    98  	cs := GetCharset(name)
    99  	if cs == nil {
   100  		return nil
   101  	}
   102  	return cs.NewDecoder()
   103  }
   104  
   105  // NewEncoder returns an Encoder to encode the named charset.
   106  func NewEncoder(name string) Encoder {
   107  	cs := GetCharset(name)
   108  	if cs == nil {
   109  		return nil
   110  	}
   111  	return cs.NewEncoder()
   112  }