github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/encoding/japanese/eucjp.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package japanese 6 7 import ( 8 "errors" 9 "unicode/utf8" 10 11 "golang.org/x/text/encoding" 12 "golang.org/x/text/encoding/internal" 13 "golang.org/x/text/encoding/internal/identifier" 14 "golang.org/x/text/transform" 15 ) 16 17 // EUCJP is the EUC-JP encoding. 18 var EUCJP encoding.Encoding = &eucJP 19 20 var eucJP = internal.Encoding{ 21 &internal.SimpleEncoding{eucJPDecoder{}, eucJPEncoder{}}, 22 "EUC-JP", 23 identifier.EUCPkdFmtJapanese, 24 } 25 26 var errInvalidEUCJP = errors.New("japanese: invalid EUC-JP encoding") 27 28 type eucJPDecoder struct{ transform.NopResetter } 29 30 func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 31 r, size := rune(0), 0 32 loop: 33 for ; nSrc < len(src); nSrc += size { 34 switch c0 := src[nSrc]; { 35 case c0 < utf8.RuneSelf: 36 r, size = rune(c0), 1 37 38 case c0 == 0x8e: 39 if nSrc+1 >= len(src) { 40 err = transform.ErrShortSrc 41 break loop 42 } 43 c1 := src[nSrc+1] 44 if c1 < 0xa1 || 0xdf < c1 { 45 err = errInvalidEUCJP 46 break loop 47 } 48 r, size = rune(c1)+(0xff61-0xa1), 2 49 50 case c0 == 0x8f: 51 if nSrc+2 >= len(src) { 52 err = transform.ErrShortSrc 53 break loop 54 } 55 c1 := src[nSrc+1] 56 if c1 < 0xa1 || 0xfe < c1 { 57 err = errInvalidEUCJP 58 break loop 59 } 60 c2 := src[nSrc+2] 61 if c2 < 0xa1 || 0xfe < c2 { 62 err = errInvalidEUCJP 63 break loop 64 } 65 r, size = '\ufffd', 3 66 if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) { 67 r = rune(jis0212Decode[i]) 68 if r == 0 { 69 r = '\ufffd' 70 } 71 } 72 73 case 0xa1 <= c0 && c0 <= 0xfe: 74 if nSrc+1 >= len(src) { 75 err = transform.ErrShortSrc 76 break loop 77 } 78 c1 := src[nSrc+1] 79 if c1 < 0xa1 || 0xfe < c1 { 80 err = errInvalidEUCJP 81 break loop 82 } 83 r, size = '\ufffd', 2 84 if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) { 85 r = rune(jis0208Decode[i]) 86 if r == 0 { 87 r = '\ufffd' 88 } 89 } 90 91 default: 92 err = errInvalidEUCJP 93 break loop 94 } 95 96 if nDst+utf8.RuneLen(r) > len(dst) { 97 err = transform.ErrShortDst 98 break loop 99 } 100 nDst += utf8.EncodeRune(dst[nDst:], r) 101 } 102 if atEOF && err == transform.ErrShortSrc { 103 err = errInvalidEUCJP 104 } 105 return nDst, nSrc, err 106 } 107 108 type eucJPEncoder struct{ transform.NopResetter } 109 110 func (eucJPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 111 r, size := rune(0), 0 112 for ; nSrc < len(src); nSrc += size { 113 r = rune(src[nSrc]) 114 115 // Decode a 1-byte rune. 116 if r < utf8.RuneSelf { 117 size = 1 118 119 } else { 120 // Decode a multi-byte rune. 121 r, size = utf8.DecodeRune(src[nSrc:]) 122 if size == 1 { 123 // All valid runes of size 1 (those below utf8.RuneSelf) were 124 // handled above. We have invalid UTF-8 or we haven't seen the 125 // full character yet. 126 if !atEOF && !utf8.FullRune(src[nSrc:]) { 127 err = transform.ErrShortSrc 128 break 129 } 130 } 131 132 // func init checks that the switch covers all tables. 133 switch { 134 case encode0Low <= r && r < encode0High: 135 if r = rune(encode0[r-encode0Low]); r != 0 { 136 goto write2or3 137 } 138 case encode1Low <= r && r < encode1High: 139 if r = rune(encode1[r-encode1Low]); r != 0 { 140 goto write2or3 141 } 142 case encode2Low <= r && r < encode2High: 143 if r = rune(encode2[r-encode2Low]); r != 0 { 144 goto write2or3 145 } 146 case encode3Low <= r && r < encode3High: 147 if r = rune(encode3[r-encode3Low]); r != 0 { 148 goto write2or3 149 } 150 case encode4Low <= r && r < encode4High: 151 if r = rune(encode4[r-encode4Low]); r != 0 { 152 goto write2or3 153 } 154 case encode5Low <= r && r < encode5High: 155 if 0xff61 <= r && r < 0xffa0 { 156 goto write2 157 } 158 if r = rune(encode5[r-encode5Low]); r != 0 { 159 goto write2or3 160 } 161 } 162 err = internal.ErrASCIIReplacement 163 break 164 } 165 166 if nDst >= len(dst) { 167 err = transform.ErrShortDst 168 break 169 } 170 dst[nDst] = uint8(r) 171 nDst++ 172 continue 173 174 write2or3: 175 if r>>tableShift == jis0208 { 176 if nDst+2 > len(dst) { 177 err = transform.ErrShortDst 178 break 179 } 180 } else { 181 if nDst+3 > len(dst) { 182 err = transform.ErrShortDst 183 break 184 } 185 dst[nDst] = 0x8f 186 nDst++ 187 } 188 dst[nDst+0] = 0xa1 + uint8(r>>codeShift)&codeMask 189 dst[nDst+1] = 0xa1 + uint8(r)&codeMask 190 nDst += 2 191 continue 192 193 write2: 194 if nDst+2 > len(dst) { 195 err = transform.ErrShortDst 196 break 197 } 198 dst[nDst+0] = 0x8e 199 dst[nDst+1] = uint8(r - (0xff61 - 0xa1)) 200 nDst += 2 201 continue 202 } 203 return nDst, nSrc, err 204 } 205 206 func init() { 207 // Check that the hard-coded encode switch covers all tables. 208 if numEncodeTables != 6 { 209 panic("bad numEncodeTables") 210 } 211 }