gitee.com/quant1x/gox@v1.7.6/text/encoding/mbcs.go (about)

     1  package encoding
     2  
     3  // Generic converters for multibyte character sets.
     4  
     5  // An mbcsTrie contains the data to convert from the character set to Unicode.
     6  // If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune
     7  // children either is nil or has 256 elements.
     8  type mbcsTrie struct {
     9  	// For leaf nodes, the Unicode character that is represented.
    10  	char rune
    11  
    12  	// For non-leaf nodes, the trie to decode the remainder of the character.
    13  	children []mbcsTrie
    14  }
    15  
    16  // A MBCSTable holds the data to convert to and from Unicode.
    17  type MBCSTable struct {
    18  	toUnicode   mbcsTrie
    19  	fromUnicode map[rune]string
    20  }
    21  
    22  // AddCharacter adds a character to the table. rune is its Unicode code point, 
    23  // and bytes contains the bytes used to encode it in the character set.
    24  func (table *MBCSTable) AddCharacter(c rune, bytes string) {
    25  	if table.fromUnicode == nil {
    26  		table.fromUnicode = make(map[rune]string)
    27  	}
    28  
    29  	table.fromUnicode[c] = bytes
    30  
    31  	trie := &table.toUnicode
    32  	for i := 0; i < len(bytes); i++ {
    33  		if trie.children == nil {
    34  			trie.children = make([]mbcsTrie, 256)
    35  		}
    36  
    37  		b := bytes[i]
    38  		trie = &trie.children[b]
    39  	}
    40  
    41  	trie.char = c
    42  }
    43  
    44  func (table *MBCSTable) Decoder() Decoder {
    45  	return func(p []byte) (c rune, size int, status Status) {
    46  		if len(p) == 0 {
    47  			status = NO_ROOM
    48  			return
    49  		}
    50  
    51  		if p[0] == 0 {
    52  			return 0, 1, SUCCESS
    53  		}
    54  
    55  		trie := &table.toUnicode
    56  		for trie.char == 0 {
    57  			if trie.children == nil {
    58  				return 0xfffd, 1, INVALID_CHAR
    59  			}
    60  			if len(p) < size+1 {
    61  				return 0, 0, NO_ROOM
    62  			}
    63  
    64  			trie = &trie.children[p[size]]
    65  			size++
    66  		}
    67  
    68  		c = trie.char
    69  		status = SUCCESS
    70  		return
    71  	}
    72  }
    73  
    74  func (table *MBCSTable) Encoder() Encoder {
    75  	return func(p []byte, c rune) (size int, status Status) {
    76  		bytes := table.fromUnicode[c]
    77  		if bytes == "" {
    78  			if len(p) > 0 {
    79  				p[0] = '?'
    80  				return 1, INVALID_CHAR
    81  			} else {
    82  				return 0, NO_ROOM
    83  			}
    84  		}
    85  
    86  		if len(p) < len(bytes) {
    87  			return 0, NO_ROOM
    88  		}
    89  
    90  		return copy(p, bytes), SUCCESS
    91  	}
    92  }