github.com/go-xe2/third@v1.0.3/golang.org/x/text/encoding/japanese/iso2022jp.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package japanese 6 7 import ( 8 "unicode/utf8" 9 10 "github.com/go-xe2/third/golang.org/x/text/encoding" 11 "github.com/go-xe2/third/golang.org/x/text/encoding/internal" 12 "github.com/go-xe2/third/golang.org/x/text/encoding/internal/identifier" 13 "github.com/go-xe2/third/golang.org/x/text/transform" 14 ) 15 16 // ISO2022JP is the ISO-2022-JP encoding. 17 var ISO2022JP encoding.Encoding = &iso2022JP 18 19 var iso2022JP = internal.Encoding{ 20 internal.FuncEncoding{iso2022JPNewDecoder, iso2022JPNewEncoder}, 21 "ISO-2022-JP", 22 identifier.ISO2022JP, 23 } 24 25 func iso2022JPNewDecoder() transform.Transformer { 26 return new(iso2022JPDecoder) 27 } 28 29 func iso2022JPNewEncoder() transform.Transformer { 30 return new(iso2022JPEncoder) 31 } 32 33 const ( 34 asciiState = iota 35 katakanaState 36 jis0208State 37 jis0212State 38 ) 39 40 const asciiEsc = 0x1b 41 42 type iso2022JPDecoder int 43 44 func (d *iso2022JPDecoder) Reset() { 45 *d = asciiState 46 } 47 48 func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 49 r, size := rune(0), 0 50 for ; nSrc < len(src); nSrc += size { 51 c0 := src[nSrc] 52 if c0 >= utf8.RuneSelf { 53 r, size = '\ufffd', 1 54 goto write 55 } 56 57 if c0 == asciiEsc { 58 if nSrc+2 >= len(src) { 59 if !atEOF { 60 return nDst, nSrc, transform.ErrShortSrc 61 } 62 // TODO: is it correct to only skip 1?? 63 r, size = '\ufffd', 1 64 goto write 65 } 66 size = 3 67 c1 := src[nSrc+1] 68 c2 := src[nSrc+2] 69 switch { 70 case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42} 71 *d = jis0208State 72 continue 73 case c1 == '$' && c2 == '(': // 0x24 0x28 74 if nSrc+3 >= len(src) { 75 if !atEOF { 76 return nDst, nSrc, transform.ErrShortSrc 77 } 78 r, size = '\ufffd', 1 79 goto write 80 } 81 size = 4 82 if src[nSrc+3] == 'D' { 83 *d = jis0212State 84 continue 85 } 86 case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A} 87 *d = asciiState 88 continue 89 case c1 == '(' && c2 == 'I': // 0x28 0x49 90 *d = katakanaState 91 continue 92 } 93 r, size = '\ufffd', 1 94 goto write 95 } 96 97 switch *d { 98 case asciiState: 99 r, size = rune(c0), 1 100 101 case katakanaState: 102 if c0 < 0x21 || 0x60 <= c0 { 103 r, size = '\ufffd', 1 104 goto write 105 } 106 r, size = rune(c0)+(0xff61-0x21), 1 107 108 default: 109 if c0 == 0x0a { 110 *d = asciiState 111 r, size = rune(c0), 1 112 goto write 113 } 114 if nSrc+1 >= len(src) { 115 if !atEOF { 116 return nDst, nSrc, transform.ErrShortSrc 117 } 118 r, size = '\ufffd', 1 119 goto write 120 } 121 size = 2 122 c1 := src[nSrc+1] 123 i := int(c0-0x21)*94 + int(c1-0x21) 124 if *d == jis0208State && i < len(jis0208Decode) { 125 r = rune(jis0208Decode[i]) 126 } else if *d == jis0212State && i < len(jis0212Decode) { 127 r = rune(jis0212Decode[i]) 128 } else { 129 r = '\ufffd' 130 goto write 131 } 132 if r == 0 { 133 r = '\ufffd' 134 } 135 } 136 137 write: 138 if nDst+utf8.RuneLen(r) > len(dst) { 139 return nDst, nSrc, transform.ErrShortDst 140 } 141 nDst += utf8.EncodeRune(dst[nDst:], r) 142 } 143 return nDst, nSrc, err 144 } 145 146 type iso2022JPEncoder int 147 148 func (e *iso2022JPEncoder) Reset() { 149 *e = asciiState 150 } 151 152 func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 153 r, size := rune(0), 0 154 for ; nSrc < len(src); nSrc += size { 155 r = rune(src[nSrc]) 156 157 // Decode a 1-byte rune. 158 if r < utf8.RuneSelf { 159 size = 1 160 161 } else { 162 // Decode a multi-byte rune. 163 r, size = utf8.DecodeRune(src[nSrc:]) 164 if size == 1 { 165 // All valid runes of size 1 (those below utf8.RuneSelf) were 166 // handled above. We have invalid UTF-8 or we haven't seen the 167 // full character yet. 168 if !atEOF && !utf8.FullRune(src[nSrc:]) { 169 err = transform.ErrShortSrc 170 break 171 } 172 } 173 174 // func init checks that the switch covers all tables. 175 // 176 // http://encoding.spec.whatwg.org/#iso-2022-jp says that "the index jis0212 177 // is not used by the iso-2022-jp encoder due to lack of widespread support". 178 // 179 // TODO: do we have to special-case U+00A5 and U+203E, as per 180 // http://encoding.spec.whatwg.org/#iso-2022-jp 181 // Doing so would mean that "\u00a5" would not be preserved 182 // after an encode-decode round trip. 183 switch { 184 case encode0Low <= r && r < encode0High: 185 if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 { 186 goto writeJIS 187 } 188 case encode1Low <= r && r < encode1High: 189 if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 { 190 goto writeJIS 191 } 192 case encode2Low <= r && r < encode2High: 193 if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 { 194 goto writeJIS 195 } 196 case encode3Low <= r && r < encode3High: 197 if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 { 198 goto writeJIS 199 } 200 case encode4Low <= r && r < encode4High: 201 if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 { 202 goto writeJIS 203 } 204 case encode5Low <= r && r < encode5High: 205 if 0xff61 <= r && r < 0xffa0 { 206 goto writeKatakana 207 } 208 if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 { 209 goto writeJIS 210 } 211 } 212 213 // Switch back to ASCII state in case of error so that an ASCII 214 // replacement character can be written in the correct state. 215 if *e != asciiState { 216 if nDst+3 > len(dst) { 217 err = transform.ErrShortDst 218 break 219 } 220 *e = asciiState 221 dst[nDst+0] = asciiEsc 222 dst[nDst+1] = '(' 223 dst[nDst+2] = 'B' 224 nDst += 3 225 } 226 err = internal.ErrASCIIReplacement 227 break 228 } 229 230 if *e != asciiState { 231 if nDst+4 > len(dst) { 232 err = transform.ErrShortDst 233 break 234 } 235 *e = asciiState 236 dst[nDst+0] = asciiEsc 237 dst[nDst+1] = '(' 238 dst[nDst+2] = 'B' 239 nDst += 3 240 } else if nDst >= len(dst) { 241 err = transform.ErrShortDst 242 break 243 } 244 dst[nDst] = uint8(r) 245 nDst++ 246 continue 247 248 writeJIS: 249 if *e != jis0208State { 250 if nDst+5 > len(dst) { 251 err = transform.ErrShortDst 252 break 253 } 254 *e = jis0208State 255 dst[nDst+0] = asciiEsc 256 dst[nDst+1] = '$' 257 dst[nDst+2] = 'B' 258 nDst += 3 259 } else if nDst+2 > len(dst) { 260 err = transform.ErrShortDst 261 break 262 } 263 dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask 264 dst[nDst+1] = 0x21 + uint8(r)&codeMask 265 nDst += 2 266 continue 267 268 writeKatakana: 269 if *e != katakanaState { 270 if nDst+4 > len(dst) { 271 err = transform.ErrShortDst 272 break 273 } 274 *e = katakanaState 275 dst[nDst+0] = asciiEsc 276 dst[nDst+1] = '(' 277 dst[nDst+2] = 'I' 278 nDst += 3 279 } else if nDst >= len(dst) { 280 err = transform.ErrShortDst 281 break 282 } 283 dst[nDst] = uint8(r - (0xff61 - 0x21)) 284 nDst++ 285 continue 286 } 287 if atEOF && err == nil && *e != asciiState { 288 if nDst+3 > len(dst) { 289 err = transform.ErrShortDst 290 } else { 291 *e = asciiState 292 dst[nDst+0] = asciiEsc 293 dst[nDst+1] = '(' 294 dst[nDst+2] = 'B' 295 nDst += 3 296 } 297 } 298 return nDst, nSrc, err 299 }