github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/encoding/simplifiedchinese/hzgb2312.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package simplifiedchinese 6 7 import ( 8 "errors" 9 "unicode/utf8" 10 11 "golang.org/x/text/encoding" 12 "golang.org/x/text/encoding/internal" 13 "golang.org/x/text/encoding/internal/identifier" 14 "golang.org/x/text/transform" 15 ) 16 17 // HZGB2312 is the HZ-GB2312 encoding. 18 var HZGB2312 encoding.Encoding = &hzGB2312 19 20 var hzGB2312 = internal.Encoding{ 21 internal.FuncEncoding{hzGB2312NewDecoder, hzGB2312NewEncoder}, 22 "HZ-GB2312", 23 identifier.HZGB2312, 24 } 25 26 func hzGB2312NewDecoder() transform.Transformer { 27 return new(hzGB2312Decoder) 28 } 29 30 func hzGB2312NewEncoder() transform.Transformer { 31 return new(hzGB2312Encoder) 32 } 33 34 var errInvalidHZGB2312 = errors.New("simplifiedchinese: invalid HZ-GB2312 encoding") 35 36 const ( 37 asciiState = iota 38 gbState 39 ) 40 41 type hzGB2312Decoder int 42 43 func (d *hzGB2312Decoder) Reset() { 44 *d = asciiState 45 } 46 47 func (d *hzGB2312Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 48 r, size := rune(0), 0 49 loop: 50 for ; nSrc < len(src); nSrc += size { 51 c0 := src[nSrc] 52 if c0 >= utf8.RuneSelf { 53 err = errInvalidHZGB2312 54 break loop 55 } 56 57 if c0 == '~' { 58 if nSrc+1 >= len(src) { 59 err = transform.ErrShortSrc 60 break loop 61 } 62 size = 2 63 switch src[nSrc+1] { 64 case '{': 65 *d = gbState 66 continue 67 case '}': 68 *d = asciiState 69 continue 70 case '~': 71 if nDst >= len(dst) { 72 err = transform.ErrShortDst 73 break loop 74 } 75 dst[nDst] = '~' 76 nDst++ 77 continue 78 case '\n': 79 continue 80 default: 81 err = errInvalidHZGB2312 82 break loop 83 } 84 } 85 86 if *d == asciiState { 87 r, size = rune(c0), 1 88 } else { 89 if nSrc+1 >= len(src) { 90 err = transform.ErrShortSrc 91 break loop 92 } 93 c1 := src[nSrc+1] 94 if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 { 95 err = errInvalidHZGB2312 96 break loop 97 } 98 99 r, size = '\ufffd', 2 100 if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) { 101 r = rune(decode[i]) 102 if r == 0 { 103 r = '\ufffd' 104 } 105 } 106 } 107 108 if nDst+utf8.RuneLen(r) > len(dst) { 109 err = transform.ErrShortDst 110 break loop 111 } 112 nDst += utf8.EncodeRune(dst[nDst:], r) 113 } 114 if atEOF && err == transform.ErrShortSrc { 115 err = errInvalidHZGB2312 116 } 117 return nDst, nSrc, err 118 } 119 120 type hzGB2312Encoder int 121 122 func (d *hzGB2312Encoder) Reset() { 123 *d = asciiState 124 } 125 126 func (e *hzGB2312Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 127 r, size := rune(0), 0 128 for ; nSrc < len(src); nSrc += size { 129 r = rune(src[nSrc]) 130 131 // Decode a 1-byte rune. 132 if r < utf8.RuneSelf { 133 size = 1 134 if r == '~' { 135 if nDst+2 > len(dst) { 136 err = transform.ErrShortDst 137 break 138 } 139 dst[nDst+0] = '~' 140 dst[nDst+1] = '~' 141 nDst += 2 142 continue 143 } else if *e != asciiState { 144 if nDst+3 > len(dst) { 145 err = transform.ErrShortDst 146 break 147 } 148 *e = asciiState 149 dst[nDst+0] = '~' 150 dst[nDst+1] = '}' 151 nDst += 2 152 } else if nDst >= len(dst) { 153 err = transform.ErrShortDst 154 break 155 } 156 dst[nDst] = uint8(r) 157 nDst += 1 158 continue 159 160 } 161 162 // Decode a multi-byte rune. 163 r, size = utf8.DecodeRune(src[nSrc:]) 164 if size == 1 { 165 // All valid runes of size 1 (those below utf8.RuneSelf) were 166 // handled above. We have invalid UTF-8 or we haven't seen the 167 // full character yet. 168 if !atEOF && !utf8.FullRune(src[nSrc:]) { 169 err = transform.ErrShortSrc 170 break 171 } 172 } 173 174 // func init checks that the switch covers all tables. 175 switch { 176 case encode0Low <= r && r < encode0High: 177 if r = rune(encode0[r-encode0Low]); r != 0 { 178 goto writeGB 179 } 180 case encode1Low <= r && r < encode1High: 181 if r = rune(encode1[r-encode1Low]); r != 0 { 182 goto writeGB 183 } 184 case encode2Low <= r && r < encode2High: 185 if r = rune(encode2[r-encode2Low]); r != 0 { 186 goto writeGB 187 } 188 case encode3Low <= r && r < encode3High: 189 if r = rune(encode3[r-encode3Low]); r != 0 { 190 goto writeGB 191 } 192 case encode4Low <= r && r < encode4High: 193 if r = rune(encode4[r-encode4Low]); r != 0 { 194 goto writeGB 195 } 196 } 197 198 terminateInASCIIState: 199 // Switch back to ASCII state in case of error so that an ASCII 200 // replacement character can be written in the correct state. 201 if *e != asciiState { 202 if nDst+2 > len(dst) { 203 err = transform.ErrShortDst 204 break 205 } 206 dst[nDst+0] = '~' 207 dst[nDst+1] = '}' 208 nDst += 2 209 } 210 err = internal.ErrASCIIReplacement 211 break 212 213 writeGB: 214 c0 := uint8(r>>8) - 0x80 215 c1 := uint8(r) - 0x80 216 if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 { 217 goto terminateInASCIIState 218 } 219 if *e == asciiState { 220 if nDst+4 > len(dst) { 221 err = transform.ErrShortDst 222 break 223 } 224 *e = gbState 225 dst[nDst+0] = '~' 226 dst[nDst+1] = '{' 227 nDst += 2 228 } else if nDst+2 > len(dst) { 229 err = transform.ErrShortDst 230 break 231 } 232 dst[nDst+0] = c0 233 dst[nDst+1] = c1 234 nDst += 2 235 continue 236 } 237 // TODO: should one always terminate in ASCII state to make it safe to 238 // concatenate two HZ-GB2312-encoded strings? 239 return nDst, nSrc, err 240 }