github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/encoding/japanese/iso2022jp.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package japanese 6 7 import ( 8 "errors" 9 "unicode/utf8" 10 11 "golang.org/x/text/encoding" 12 "golang.org/x/text/encoding/internal" 13 "golang.org/x/text/encoding/internal/identifier" 14 "golang.org/x/text/transform" 15 ) 16 17 // ISO2022JP is the ISO-2022-JP encoding. 18 var ISO2022JP encoding.Encoding = &iso2022JP 19 20 var iso2022JP = internal.Encoding{ 21 internal.FuncEncoding{iso2022JPNewDecoder, iso2022JPNewEncoder}, 22 "ISO-2022-JP", 23 identifier.ISO2022JP, 24 } 25 26 func iso2022JPNewDecoder() transform.Transformer { 27 return new(iso2022JPDecoder) 28 } 29 30 func iso2022JPNewEncoder() transform.Transformer { 31 return new(iso2022JPEncoder) 32 } 33 34 var errInvalidISO2022JP = errors.New("japanese: invalid ISO-2022-JP encoding") 35 36 const ( 37 asciiState = iota 38 katakanaState 39 jis0208State 40 jis0212State 41 ) 42 43 const asciiEsc = 0x1b 44 45 type iso2022JPDecoder int 46 47 func (d *iso2022JPDecoder) Reset() { 48 *d = asciiState 49 } 50 51 func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 52 r, size := rune(0), 0 53 loop: 54 for ; nSrc < len(src); nSrc += size { 55 c0 := src[nSrc] 56 if c0 >= utf8.RuneSelf { 57 err = errInvalidISO2022JP 58 break loop 59 } 60 61 if c0 == asciiEsc { 62 if nSrc+2 >= len(src) { 63 err = transform.ErrShortSrc 64 break loop 65 } 66 size = 3 67 c1 := src[nSrc+1] 68 c2 := src[nSrc+2] 69 switch { 70 case c1 == '$' && (c2 == '@' || c2 == 'B'): 71 *d = jis0208State 72 continue 73 case c1 == '$' && c2 == '(': 74 if nSrc+3 >= len(src) { 75 err = transform.ErrShortSrc 76 break loop 77 } 78 size = 4 79 if src[nSrc]+3 == 'D' { 80 *d = jis0212State 81 continue 82 } 83 case c1 == '(' && (c2 == 'B' || c2 == 'J'): 84 *d = asciiState 85 continue 86 case c1 == '(' && c2 == 'I': 87 *d = katakanaState 88 continue 89 } 90 err = errInvalidISO2022JP 91 break loop 92 } 93 94 switch *d { 95 case asciiState: 96 r, size = rune(c0), 1 97 98 case katakanaState: 99 if c0 < 0x21 || 0x60 <= c0 { 100 err = errInvalidISO2022JP 101 break loop 102 } 103 r, size = rune(c0)+(0xff61-0x21), 1 104 105 default: 106 if c0 == 0x0a { 107 *d = asciiState 108 r, size = rune(c0), 1 109 break 110 } 111 if nSrc+1 >= len(src) { 112 err = transform.ErrShortSrc 113 break loop 114 } 115 size = 2 116 c1 := src[nSrc+1] 117 i := int(c0-0x21)*94 + int(c1-0x21) 118 if *d == jis0208State && i < len(jis0208Decode) { 119 r = rune(jis0208Decode[i]) 120 } else if *d == jis0212State && i < len(jis0212Decode) { 121 r = rune(jis0212Decode[i]) 122 } else { 123 r = '\ufffd' 124 break 125 } 126 if r == 0 { 127 r = '\ufffd' 128 } 129 } 130 131 if nDst+utf8.RuneLen(r) > len(dst) { 132 err = transform.ErrShortDst 133 break loop 134 } 135 nDst += utf8.EncodeRune(dst[nDst:], r) 136 } 137 if atEOF && err == transform.ErrShortSrc { 138 err = errInvalidISO2022JP 139 } 140 return nDst, nSrc, err 141 } 142 143 type iso2022JPEncoder int 144 145 func (e *iso2022JPEncoder) Reset() { 146 *e = asciiState 147 } 148 149 func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 150 r, size := rune(0), 0 151 for ; nSrc < len(src); nSrc += size { 152 r = rune(src[nSrc]) 153 154 // Decode a 1-byte rune. 155 if r < utf8.RuneSelf { 156 size = 1 157 158 } else { 159 // Decode a multi-byte rune. 160 r, size = utf8.DecodeRune(src[nSrc:]) 161 if size == 1 { 162 // All valid runes of size 1 (those below utf8.RuneSelf) were 163 // handled above. We have invalid UTF-8 or we haven't seen the 164 // full character yet. 165 if !atEOF && !utf8.FullRune(src[nSrc:]) { 166 err = transform.ErrShortSrc 167 break 168 } 169 } 170 171 // func init checks that the switch covers all tables. 172 // 173 // http://encoding.spec.whatwg.org/#iso-2022-jp says that "the index jis0212 174 // is not used by the iso-2022-jp encoder due to lack of widespread support". 175 // 176 // TODO: do we have to special-case U+00A5 and U+203E, as per 177 // http://encoding.spec.whatwg.org/#iso-2022-jp 178 // Doing so would mean that "\u00a5" would not be preserved 179 // after an encode-decode round trip. 180 switch { 181 case encode0Low <= r && r < encode0High: 182 if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 { 183 goto writeJIS 184 } 185 case encode1Low <= r && r < encode1High: 186 if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 { 187 goto writeJIS 188 } 189 case encode2Low <= r && r < encode2High: 190 if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 { 191 goto writeJIS 192 } 193 case encode3Low <= r && r < encode3High: 194 if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 { 195 goto writeJIS 196 } 197 case encode4Low <= r && r < encode4High: 198 if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 { 199 goto writeJIS 200 } 201 case encode5Low <= r && r < encode5High: 202 if 0xff61 <= r && r < 0xffa0 { 203 goto writeKatakana 204 } 205 if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 { 206 goto writeJIS 207 } 208 } 209 210 // Switch back to ASCII state in case of error so that an ASCII 211 // replacement character can be written in the correct state. 212 if *e != asciiState { 213 if nDst+3 > len(dst) { 214 err = transform.ErrShortDst 215 break 216 } 217 *e = asciiState 218 dst[nDst+0] = asciiEsc 219 dst[nDst+1] = '(' 220 dst[nDst+2] = 'B' 221 nDst += 3 222 } 223 err = internal.ErrASCIIReplacement 224 break 225 } 226 227 if *e != asciiState { 228 if nDst+4 > len(dst) { 229 err = transform.ErrShortDst 230 break 231 } 232 *e = asciiState 233 dst[nDst+0] = asciiEsc 234 dst[nDst+1] = '(' 235 dst[nDst+2] = 'B' 236 nDst += 3 237 } else if nDst >= len(dst) { 238 err = transform.ErrShortDst 239 break 240 } 241 dst[nDst] = uint8(r) 242 nDst++ 243 continue 244 245 writeJIS: 246 if *e != jis0208State { 247 if nDst+5 > len(dst) { 248 err = transform.ErrShortDst 249 break 250 } 251 *e = jis0208State 252 dst[nDst+0] = asciiEsc 253 dst[nDst+1] = '$' 254 dst[nDst+2] = 'B' 255 nDst += 3 256 } else if nDst+2 > len(dst) { 257 err = transform.ErrShortDst 258 break 259 } 260 dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask 261 dst[nDst+1] = 0x21 + uint8(r)&codeMask 262 nDst += 2 263 continue 264 265 writeKatakana: 266 if *e != katakanaState { 267 if nDst+4 > len(dst) { 268 err = transform.ErrShortDst 269 break 270 } 271 *e = katakanaState 272 dst[nDst+0] = asciiEsc 273 dst[nDst+1] = '(' 274 dst[nDst+2] = 'I' 275 nDst += 3 276 } else if nDst >= len(dst) { 277 err = transform.ErrShortDst 278 break 279 } 280 dst[nDst] = uint8(r - (0xff61 - 0x21)) 281 nDst++ 282 continue 283 } 284 if atEOF && err == nil && *e != asciiState { 285 if nDst+3 > len(dst) { 286 err = transform.ErrShortDst 287 } else { 288 *e = asciiState 289 dst[nDst+0] = asciiEsc 290 dst[nDst+1] = '(' 291 dst[nDst+2] = 'B' 292 nDst += 3 293 } 294 } 295 return nDst, nSrc, err 296 }