gitee.com/quant1x/gox@v1.7.6/text/encoding/gb18030.go (about) 1 package encoding 2 3 import ( 4 "sync" 5 ) 6 7 // Converters for GB18030 encoding. 8 9 func init() { 10 RegisterCharset(&Charset{ 11 Name: "GB18030", 12 NewDecoder: func() Decoder { 13 gb18030Once.Do(buildGB18030Tables) 14 return decodeGB18030Rune 15 }, 16 NewEncoder: func() Encoder { 17 gb18030Once.Do(buildGB18030Tables) 18 return encodeGB18030Rune 19 }, 20 }) 21 } 22 23 func decodeGB18030Rune(p []byte) (r rune, size int, status Status) { 24 if len(p) == 0 { 25 status = NO_ROOM 26 return 27 } 28 29 b := p[0] 30 if b < 128 { 31 return rune(b), 1, SUCCESS 32 } 33 34 if len(p) < 2 { 35 status = NO_ROOM 36 return 37 } 38 39 if p[0] < 0x81 || p[0] > 0xfe { 40 return 0xfffd, 1, INVALID_CHAR 41 } 42 43 if p[1] >= 0x40 { 44 // 2-byte character 45 c := uint16(p[0])<<8 + uint16(p[1]) 46 r = rune(gbkToUnicode[c]) 47 if r == 0 { 48 r = gbkToUnicodeExtra[c] 49 } 50 51 if r != 0 { 52 return r, 2, SUCCESS 53 } 54 } else if p[1] >= 0x30 { 55 // 4-byte character 56 if len(p) < 4 { 57 return 0, 0, NO_ROOM 58 } 59 if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 { 60 return 0xfffd, 1, INVALID_CHAR 61 } 62 63 code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3]) 64 lin := gb18030Linear(code) 65 66 if lin <= maxGB18030Linear { 67 r = rune(gb18030LinearToUnicode[lin]) 68 if r != 0 { 69 return r, 4, SUCCESS 70 } 71 } 72 73 for _, rng := range gb18030Ranges { 74 if lin >= rng.firstGB && lin <= rng.lastGB { 75 return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS 76 } 77 } 78 } 79 80 return 0xfffd, 1, INVALID_CHAR 81 } 82 83 func encodeGB18030Rune(p []byte, r rune) (size int, status Status) { 84 if len(p) == 0 { 85 status = NO_ROOM 86 return 87 } 88 89 if r < 128 { 90 p[0] = byte(r) 91 return 1, SUCCESS 92 } 93 94 if len(p) < 2 { 95 status = NO_ROOM 96 return 97 } 98 99 var c uint16 100 if r < 0x10000 { 101 c = unicodeToGBK[r] 102 } else { 103 c = unicodeToGBKExtra[r] 104 } 105 106 if c != 0 { 107 p[0] = byte(c >> 8) 108 p[1] = byte(c) 109 return 2, SUCCESS 110 } 111 112 if len(p) < 4 { 113 return 0, NO_ROOM 114 } 115 116 if r < 0x10000 { 117 f := unicodeToGB18030[r] 118 if f != 0 { 119 p[0] = byte(f >> 24) 120 p[1] = byte(f >> 16) 121 p[2] = byte(f >> 8) 122 p[3] = byte(f) 123 return 4, SUCCESS 124 } 125 } 126 127 for _, rng := range gb18030Ranges { 128 if r >= rng.firstRune && r <= rng.lastRune { 129 lin := rng.firstGB + uint32(r) - uint32(rng.firstRune) 130 p[0] = byte(lin/(10*126*10)) + 0x81 131 p[1] = byte(lin/(126*10)%10) + 0x30 132 p[2] = byte(lin/10%126) + 0x81 133 p[3] = byte(lin%10) + 0x30 134 return 4, SUCCESS 135 } 136 } 137 138 p[0] = 0x1a 139 return 1, INVALID_CHAR 140 } 141 142 var gb18030Once sync.Once 143 144 // Mapping from gb18039Linear values to Unicode. 145 var gb18030LinearToUnicode []uint16 146 147 var unicodeToGB18030 []uint32 148 149 func buildGB18030Tables() { 150 gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1) 151 unicodeToGB18030 = make([]uint32, 65536) 152 for _, data := range gb18030Data { 153 gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode 154 unicodeToGB18030[data.unicode] = data.gb18030 155 } 156 }