github.com/go-xe2/third@v1.0.3/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package simplifiedchinese 6 7 import ( 8 "unicode/utf8" 9 10 "github.com/go-xe2/third/golang.org/x/text/encoding" 11 "github.com/go-xe2/third/golang.org/x/text/encoding/internal" 12 "github.com/go-xe2/third/golang.org/x/text/encoding/internal/identifier" 13 "github.com/go-xe2/third/golang.org/x/text/transform" 14 ) 15 16 // HZGB2312 is the HZ-GB2312 encoding. 17 var HZGB2312 encoding.Encoding = &hzGB2312 18 19 var hzGB2312 = internal.Encoding{ 20 internal.FuncEncoding{hzGB2312NewDecoder, hzGB2312NewEncoder}, 21 "HZ-GB2312", 22 identifier.HZGB2312, 23 } 24 25 func hzGB2312NewDecoder() transform.Transformer { 26 return new(hzGB2312Decoder) 27 } 28 29 func hzGB2312NewEncoder() transform.Transformer { 30 return new(hzGB2312Encoder) 31 } 32 33 const ( 34 asciiState = iota 35 gbState 36 ) 37 38 type hzGB2312Decoder int 39 40 func (d *hzGB2312Decoder) Reset() { 41 *d = asciiState 42 } 43 44 func (d *hzGB2312Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 45 r, size := rune(0), 0 46 loop: 47 for ; nSrc < len(src); nSrc += size { 48 c0 := src[nSrc] 49 if c0 >= utf8.RuneSelf { 50 r, size = utf8.RuneError, 1 51 goto write 52 } 53 54 if c0 == '~' { 55 if nSrc+1 >= len(src) { 56 if !atEOF { 57 err = transform.ErrShortSrc 58 break loop 59 } 60 r = utf8.RuneError 61 goto write 62 } 63 size = 2 64 switch src[nSrc+1] { 65 case '{': 66 *d = gbState 67 continue 68 case '}': 69 *d = asciiState 70 continue 71 case '~': 72 if nDst >= len(dst) { 73 err = transform.ErrShortDst 74 break loop 75 } 76 dst[nDst] = '~' 77 nDst++ 78 continue 79 case '\n': 80 continue 81 default: 82 r = utf8.RuneError 83 goto write 84 } 85 } 86 87 if *d == asciiState { 88 r, size = rune(c0), 1 89 } else { 90 if nSrc+1 >= len(src) { 91 if !atEOF { 92 err = transform.ErrShortSrc 93 break loop 94 } 95 r, size = utf8.RuneError, 1 96 goto write 97 } 98 size = 2 99 c1 := src[nSrc+1] 100 if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 { 101 // error 102 } else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) { 103 r = rune(decode[i]) 104 if r != 0 { 105 goto write 106 } 107 } 108 if c1 > utf8.RuneSelf { 109 // Be consistent and always treat non-ASCII as a single error. 110 size = 1 111 } 112 r = utf8.RuneError 113 } 114 115 write: 116 if nDst+utf8.RuneLen(r) > len(dst) { 117 err = transform.ErrShortDst 118 break loop 119 } 120 nDst += utf8.EncodeRune(dst[nDst:], r) 121 } 122 return nDst, nSrc, err 123 } 124 125 type hzGB2312Encoder int 126 127 func (d *hzGB2312Encoder) Reset() { 128 *d = asciiState 129 } 130 131 func (e *hzGB2312Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 132 r, size := rune(0), 0 133 for ; nSrc < len(src); nSrc += size { 134 r = rune(src[nSrc]) 135 136 // Decode a 1-byte rune. 137 if r < utf8.RuneSelf { 138 size = 1 139 if r == '~' { 140 if nDst+2 > len(dst) { 141 err = transform.ErrShortDst 142 break 143 } 144 dst[nDst+0] = '~' 145 dst[nDst+1] = '~' 146 nDst += 2 147 continue 148 } else if *e != asciiState { 149 if nDst+3 > len(dst) { 150 err = transform.ErrShortDst 151 break 152 } 153 *e = asciiState 154 dst[nDst+0] = '~' 155 dst[nDst+1] = '}' 156 nDst += 2 157 } else if nDst >= len(dst) { 158 err = transform.ErrShortDst 159 break 160 } 161 dst[nDst] = uint8(r) 162 nDst += 1 163 continue 164 165 } 166 167 // Decode a multi-byte rune. 168 r, size = utf8.DecodeRune(src[nSrc:]) 169 if size == 1 { 170 // All valid runes of size 1 (those below utf8.RuneSelf) were 171 // handled above. We have invalid UTF-8 or we haven't seen the 172 // full character yet. 173 if !atEOF && !utf8.FullRune(src[nSrc:]) { 174 err = transform.ErrShortSrc 175 break 176 } 177 } 178 179 // func init checks that the switch covers all tables. 180 switch { 181 case encode0Low <= r && r < encode0High: 182 if r = rune(encode0[r-encode0Low]); r != 0 { 183 goto writeGB 184 } 185 case encode1Low <= r && r < encode1High: 186 if r = rune(encode1[r-encode1Low]); r != 0 { 187 goto writeGB 188 } 189 case encode2Low <= r && r < encode2High: 190 if r = rune(encode2[r-encode2Low]); r != 0 { 191 goto writeGB 192 } 193 case encode3Low <= r && r < encode3High: 194 if r = rune(encode3[r-encode3Low]); r != 0 { 195 goto writeGB 196 } 197 case encode4Low <= r && r < encode4High: 198 if r = rune(encode4[r-encode4Low]); r != 0 { 199 goto writeGB 200 } 201 } 202 203 terminateInASCIIState: 204 // Switch back to ASCII state in case of error so that an ASCII 205 // replacement character can be written in the correct state. 206 if *e != asciiState { 207 if nDst+2 > len(dst) { 208 err = transform.ErrShortDst 209 break 210 } 211 dst[nDst+0] = '~' 212 dst[nDst+1] = '}' 213 nDst += 2 214 } 215 err = internal.ErrASCIIReplacement 216 break 217 218 writeGB: 219 c0 := uint8(r>>8) - 0x80 220 c1 := uint8(r) - 0x80 221 if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 { 222 goto terminateInASCIIState 223 } 224 if *e == asciiState { 225 if nDst+4 > len(dst) { 226 err = transform.ErrShortDst 227 break 228 } 229 *e = gbState 230 dst[nDst+0] = '~' 231 dst[nDst+1] = '{' 232 nDst += 2 233 } else if nDst+2 > len(dst) { 234 err = transform.ErrShortDst 235 break 236 } 237 dst[nDst+0] = c0 238 dst[nDst+1] = c1 239 nDst += 2 240 continue 241 } 242 // TODO: should one always terminate in ASCII state to make it safe to 243 // concatenate two HZ-GB2312-encoded strings? 244 return nDst, nSrc, err 245 }