github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/encoding/japanese/eucjp.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package japanese
     6  
     7  import (
     8  	"errors"
     9  	"unicode/utf8"
    10  
    11  	"golang.org/x/text/encoding"
    12  	"golang.org/x/text/encoding/internal"
    13  	"golang.org/x/text/encoding/internal/identifier"
    14  	"golang.org/x/text/transform"
    15  )
    16  
    17  // EUCJP is the EUC-JP encoding.
    18  var EUCJP encoding.Encoding = &eucJP
    19  
    20  var eucJP = internal.Encoding{
    21  	&internal.SimpleEncoding{eucJPDecoder{}, eucJPEncoder{}},
    22  	"EUC-JP",
    23  	identifier.EUCPkdFmtJapanese,
    24  }
    25  
    26  var errInvalidEUCJP = errors.New("japanese: invalid EUC-JP encoding")
    27  
    28  type eucJPDecoder struct{ transform.NopResetter }
    29  
    30  func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    31  	r, size := rune(0), 0
    32  loop:
    33  	for ; nSrc < len(src); nSrc += size {
    34  		switch c0 := src[nSrc]; {
    35  		case c0 < utf8.RuneSelf:
    36  			r, size = rune(c0), 1
    37  
    38  		case c0 == 0x8e:
    39  			if nSrc+1 >= len(src) {
    40  				err = transform.ErrShortSrc
    41  				break loop
    42  			}
    43  			c1 := src[nSrc+1]
    44  			if c1 < 0xa1 || 0xdf < c1 {
    45  				err = errInvalidEUCJP
    46  				break loop
    47  			}
    48  			r, size = rune(c1)+(0xff61-0xa1), 2
    49  
    50  		case c0 == 0x8f:
    51  			if nSrc+2 >= len(src) {
    52  				err = transform.ErrShortSrc
    53  				break loop
    54  			}
    55  			c1 := src[nSrc+1]
    56  			if c1 < 0xa1 || 0xfe < c1 {
    57  				err = errInvalidEUCJP
    58  				break loop
    59  			}
    60  			c2 := src[nSrc+2]
    61  			if c2 < 0xa1 || 0xfe < c2 {
    62  				err = errInvalidEUCJP
    63  				break loop
    64  			}
    65  			r, size = '\ufffd', 3
    66  			if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) {
    67  				r = rune(jis0212Decode[i])
    68  				if r == 0 {
    69  					r = '\ufffd'
    70  				}
    71  			}
    72  
    73  		case 0xa1 <= c0 && c0 <= 0xfe:
    74  			if nSrc+1 >= len(src) {
    75  				err = transform.ErrShortSrc
    76  				break loop
    77  			}
    78  			c1 := src[nSrc+1]
    79  			if c1 < 0xa1 || 0xfe < c1 {
    80  				err = errInvalidEUCJP
    81  				break loop
    82  			}
    83  			r, size = '\ufffd', 2
    84  			if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) {
    85  				r = rune(jis0208Decode[i])
    86  				if r == 0 {
    87  					r = '\ufffd'
    88  				}
    89  			}
    90  
    91  		default:
    92  			err = errInvalidEUCJP
    93  			break loop
    94  		}
    95  
    96  		if nDst+utf8.RuneLen(r) > len(dst) {
    97  			err = transform.ErrShortDst
    98  			break loop
    99  		}
   100  		nDst += utf8.EncodeRune(dst[nDst:], r)
   101  	}
   102  	if atEOF && err == transform.ErrShortSrc {
   103  		err = errInvalidEUCJP
   104  	}
   105  	return nDst, nSrc, err
   106  }
   107  
   108  type eucJPEncoder struct{ transform.NopResetter }
   109  
   110  func (eucJPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   111  	r, size := rune(0), 0
   112  	for ; nSrc < len(src); nSrc += size {
   113  		r = rune(src[nSrc])
   114  
   115  		// Decode a 1-byte rune.
   116  		if r < utf8.RuneSelf {
   117  			size = 1
   118  
   119  		} else {
   120  			// Decode a multi-byte rune.
   121  			r, size = utf8.DecodeRune(src[nSrc:])
   122  			if size == 1 {
   123  				// All valid runes of size 1 (those below utf8.RuneSelf) were
   124  				// handled above. We have invalid UTF-8 or we haven't seen the
   125  				// full character yet.
   126  				if !atEOF && !utf8.FullRune(src[nSrc:]) {
   127  					err = transform.ErrShortSrc
   128  					break
   129  				}
   130  			}
   131  
   132  			// func init checks that the switch covers all tables.
   133  			switch {
   134  			case encode0Low <= r && r < encode0High:
   135  				if r = rune(encode0[r-encode0Low]); r != 0 {
   136  					goto write2or3
   137  				}
   138  			case encode1Low <= r && r < encode1High:
   139  				if r = rune(encode1[r-encode1Low]); r != 0 {
   140  					goto write2or3
   141  				}
   142  			case encode2Low <= r && r < encode2High:
   143  				if r = rune(encode2[r-encode2Low]); r != 0 {
   144  					goto write2or3
   145  				}
   146  			case encode3Low <= r && r < encode3High:
   147  				if r = rune(encode3[r-encode3Low]); r != 0 {
   148  					goto write2or3
   149  				}
   150  			case encode4Low <= r && r < encode4High:
   151  				if r = rune(encode4[r-encode4Low]); r != 0 {
   152  					goto write2or3
   153  				}
   154  			case encode5Low <= r && r < encode5High:
   155  				if 0xff61 <= r && r < 0xffa0 {
   156  					goto write2
   157  				}
   158  				if r = rune(encode5[r-encode5Low]); r != 0 {
   159  					goto write2or3
   160  				}
   161  			}
   162  			err = internal.ErrASCIIReplacement
   163  			break
   164  		}
   165  
   166  		if nDst >= len(dst) {
   167  			err = transform.ErrShortDst
   168  			break
   169  		}
   170  		dst[nDst] = uint8(r)
   171  		nDst++
   172  		continue
   173  
   174  	write2or3:
   175  		if r>>tableShift == jis0208 {
   176  			if nDst+2 > len(dst) {
   177  				err = transform.ErrShortDst
   178  				break
   179  			}
   180  		} else {
   181  			if nDst+3 > len(dst) {
   182  				err = transform.ErrShortDst
   183  				break
   184  			}
   185  			dst[nDst] = 0x8f
   186  			nDst++
   187  		}
   188  		dst[nDst+0] = 0xa1 + uint8(r>>codeShift)&codeMask
   189  		dst[nDst+1] = 0xa1 + uint8(r)&codeMask
   190  		nDst += 2
   191  		continue
   192  
   193  	write2:
   194  		if nDst+2 > len(dst) {
   195  			err = transform.ErrShortDst
   196  			break
   197  		}
   198  		dst[nDst+0] = 0x8e
   199  		dst[nDst+1] = uint8(r - (0xff61 - 0xa1))
   200  		nDst += 2
   201  		continue
   202  	}
   203  	return nDst, nSrc, err
   204  }
   205  
   206  func init() {
   207  	// Check that the hard-coded encode switch covers all tables.
   208  	if numEncodeTables != 6 {
   209  		panic("bad numEncodeTables")
   210  	}
   211  }