gitee.com/quant1x/gox@v1.7.6/text/encoding/mbcs.go (about) 1 package encoding 2 3 // Generic converters for multibyte character sets. 4 5 // An mbcsTrie contains the data to convert from the character set to Unicode. 6 // If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune 7 // children either is nil or has 256 elements. 8 type mbcsTrie struct { 9 // For leaf nodes, the Unicode character that is represented. 10 char rune 11 12 // For non-leaf nodes, the trie to decode the remainder of the character. 13 children []mbcsTrie 14 } 15 16 // A MBCSTable holds the data to convert to and from Unicode. 17 type MBCSTable struct { 18 toUnicode mbcsTrie 19 fromUnicode map[rune]string 20 } 21 22 // AddCharacter adds a character to the table. rune is its Unicode code point, 23 // and bytes contains the bytes used to encode it in the character set. 24 func (table *MBCSTable) AddCharacter(c rune, bytes string) { 25 if table.fromUnicode == nil { 26 table.fromUnicode = make(map[rune]string) 27 } 28 29 table.fromUnicode[c] = bytes 30 31 trie := &table.toUnicode 32 for i := 0; i < len(bytes); i++ { 33 if trie.children == nil { 34 trie.children = make([]mbcsTrie, 256) 35 } 36 37 b := bytes[i] 38 trie = &trie.children[b] 39 } 40 41 trie.char = c 42 } 43 44 func (table *MBCSTable) Decoder() Decoder { 45 return func(p []byte) (c rune, size int, status Status) { 46 if len(p) == 0 { 47 status = NO_ROOM 48 return 49 } 50 51 if p[0] == 0 { 52 return 0, 1, SUCCESS 53 } 54 55 trie := &table.toUnicode 56 for trie.char == 0 { 57 if trie.children == nil { 58 return 0xfffd, 1, INVALID_CHAR 59 } 60 if len(p) < size+1 { 61 return 0, 0, NO_ROOM 62 } 63 64 trie = &trie.children[p[size]] 65 size++ 66 } 67 68 c = trie.char 69 status = SUCCESS 70 return 71 } 72 } 73 74 func (table *MBCSTable) Encoder() Encoder { 75 return func(p []byte, c rune) (size int, status Status) { 76 bytes := table.fromUnicode[c] 77 if bytes == "" { 78 if len(p) > 0 { 79 p[0] = '?' 80 return 1, INVALID_CHAR 81 } else { 82 return 0, NO_ROOM 83 } 84 } 85 86 if len(p) < len(bytes) { 87 return 0, NO_ROOM 88 } 89 90 return copy(p, bytes), SUCCESS 91 } 92 }