github.com/go-xe2/third@v1.0.3/golang.org/x/text/encoding/japanese/eucjp.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package japanese 6 7 import ( 8 "unicode/utf8" 9 10 "github.com/go-xe2/third/golang.org/x/text/encoding" 11 "github.com/go-xe2/third/golang.org/x/text/encoding/internal" 12 "github.com/go-xe2/third/golang.org/x/text/encoding/internal/identifier" 13 "github.com/go-xe2/third/golang.org/x/text/transform" 14 ) 15 16 // EUCJP is the EUC-JP encoding. 17 var EUCJP encoding.Encoding = &eucJP 18 19 var eucJP = internal.Encoding{ 20 &internal.SimpleEncoding{eucJPDecoder{}, eucJPEncoder{}}, 21 "EUC-JP", 22 identifier.EUCPkdFmtJapanese, 23 } 24 25 type eucJPDecoder struct{ transform.NopResetter } 26 27 // See https://encoding.spec.whatwg.org/#euc-jp-decoder. 28 func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 29 r, size := rune(0), 0 30 loop: 31 for ; nSrc < len(src); nSrc += size { 32 switch c0 := src[nSrc]; { 33 case c0 < utf8.RuneSelf: 34 r, size = rune(c0), 1 35 36 case c0 == 0x8e: 37 if nSrc+1 >= len(src) { 38 if !atEOF { 39 err = transform.ErrShortSrc 40 break loop 41 } 42 r, size = utf8.RuneError, 1 43 break 44 } 45 c1 := src[nSrc+1] 46 switch { 47 case c1 < 0xa1: 48 r, size = utf8.RuneError, 1 49 case c1 > 0xdf: 50 r, size = utf8.RuneError, 2 51 if c1 == 0xff { 52 size = 1 53 } 54 default: 55 r, size = rune(c1)+(0xff61-0xa1), 2 56 } 57 case c0 == 0x8f: 58 if nSrc+2 >= len(src) { 59 if !atEOF { 60 err = transform.ErrShortSrc 61 break loop 62 } 63 r, size = utf8.RuneError, 1 64 if p := nSrc + 1; p < len(src) && 0xa1 <= src[p] && src[p] < 0xfe { 65 size = 2 66 } 67 break 68 } 69 c1 := src[nSrc+1] 70 if c1 < 0xa1 || 0xfe < c1 { 71 r, size = utf8.RuneError, 1 72 break 73 } 74 c2 := src[nSrc+2] 75 if c2 < 0xa1 || 0xfe < c2 { 76 r, size = utf8.RuneError, 2 77 break 78 } 79 r, size = utf8.RuneError, 3 80 if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) { 81 r = rune(jis0212Decode[i]) 82 if r == 0 { 83 r = utf8.RuneError 84 } 85 } 86 87 case 0xa1 <= c0 && c0 <= 0xfe: 88 if nSrc+1 >= len(src) { 89 if !atEOF { 90 err = transform.ErrShortSrc 91 break loop 92 } 93 r, size = utf8.RuneError, 1 94 break 95 } 96 c1 := src[nSrc+1] 97 if c1 < 0xa1 || 0xfe < c1 { 98 r, size = utf8.RuneError, 1 99 break 100 } 101 r, size = utf8.RuneError, 2 102 if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) { 103 r = rune(jis0208Decode[i]) 104 if r == 0 { 105 r = utf8.RuneError 106 } 107 } 108 109 default: 110 r, size = utf8.RuneError, 1 111 } 112 113 if nDst+utf8.RuneLen(r) > len(dst) { 114 err = transform.ErrShortDst 115 break loop 116 } 117 nDst += utf8.EncodeRune(dst[nDst:], r) 118 } 119 return nDst, nSrc, err 120 } 121 122 type eucJPEncoder struct{ transform.NopResetter } 123 124 func (eucJPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 125 r, size := rune(0), 0 126 for ; nSrc < len(src); nSrc += size { 127 r = rune(src[nSrc]) 128 129 // Decode a 1-byte rune. 130 if r < utf8.RuneSelf { 131 size = 1 132 133 } else { 134 // Decode a multi-byte rune. 135 r, size = utf8.DecodeRune(src[nSrc:]) 136 if size == 1 { 137 // All valid runes of size 1 (those below utf8.RuneSelf) were 138 // handled above. We have invalid UTF-8 or we haven't seen the 139 // full character yet. 140 if !atEOF && !utf8.FullRune(src[nSrc:]) { 141 err = transform.ErrShortSrc 142 break 143 } 144 } 145 146 // func init checks that the switch covers all tables. 147 switch { 148 case encode0Low <= r && r < encode0High: 149 if r = rune(encode0[r-encode0Low]); r != 0 { 150 goto write2or3 151 } 152 case encode1Low <= r && r < encode1High: 153 if r = rune(encode1[r-encode1Low]); r != 0 { 154 goto write2or3 155 } 156 case encode2Low <= r && r < encode2High: 157 if r = rune(encode2[r-encode2Low]); r != 0 { 158 goto write2or3 159 } 160 case encode3Low <= r && r < encode3High: 161 if r = rune(encode3[r-encode3Low]); r != 0 { 162 goto write2or3 163 } 164 case encode4Low <= r && r < encode4High: 165 if r = rune(encode4[r-encode4Low]); r != 0 { 166 goto write2or3 167 } 168 case encode5Low <= r && r < encode5High: 169 if 0xff61 <= r && r < 0xffa0 { 170 goto write2 171 } 172 if r = rune(encode5[r-encode5Low]); r != 0 { 173 goto write2or3 174 } 175 } 176 err = internal.ErrASCIIReplacement 177 break 178 } 179 180 if nDst >= len(dst) { 181 err = transform.ErrShortDst 182 break 183 } 184 dst[nDst] = uint8(r) 185 nDst++ 186 continue 187 188 write2or3: 189 if r>>tableShift == jis0208 { 190 if nDst+2 > len(dst) { 191 err = transform.ErrShortDst 192 break 193 } 194 } else { 195 if nDst+3 > len(dst) { 196 err = transform.ErrShortDst 197 break 198 } 199 dst[nDst] = 0x8f 200 nDst++ 201 } 202 dst[nDst+0] = 0xa1 + uint8(r>>codeShift)&codeMask 203 dst[nDst+1] = 0xa1 + uint8(r)&codeMask 204 nDst += 2 205 continue 206 207 write2: 208 if nDst+2 > len(dst) { 209 err = transform.ErrShortDst 210 break 211 } 212 dst[nDst+0] = 0x8e 213 dst[nDst+1] = uint8(r - (0xff61 - 0xa1)) 214 nDst += 2 215 continue 216 } 217 return nDst, nSrc, err 218 } 219 220 func init() { 221 // Check that the hard-coded encode switch covers all tables. 222 if numEncodeTables != 6 { 223 panic("bad numEncodeTables") 224 } 225 }