github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/encoding/japanese/shiftjis.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package japanese
     6  
     7  import (
     8  	"errors"
     9  	"unicode/utf8"
    10  
    11  	"golang.org/x/text/encoding"
    12  	"golang.org/x/text/encoding/internal"
    13  	"golang.org/x/text/encoding/internal/identifier"
    14  	"golang.org/x/text/transform"
    15  )
    16  
    17  // ShiftJIS is the Shift JIS encoding, also known as Code Page 932 and
    18  // Windows-31J.
    19  var ShiftJIS encoding.Encoding = &shiftJIS
    20  
    21  var shiftJIS = internal.Encoding{
    22  	&internal.SimpleEncoding{shiftJISDecoder{}, shiftJISEncoder{}},
    23  	"Shift JIS",
    24  	identifier.ShiftJIS,
    25  }
    26  
    27  var errInvalidShiftJIS = errors.New("japanese: invalid Shift JIS encoding")
    28  
    29  type shiftJISDecoder struct{ transform.NopResetter }
    30  
    31  func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    32  	r, size := rune(0), 0
    33  loop:
    34  	for ; nSrc < len(src); nSrc += size {
    35  		switch c0 := src[nSrc]; {
    36  		case c0 < utf8.RuneSelf:
    37  			r, size = rune(c0), 1
    38  
    39  		case 0xa1 <= c0 && c0 < 0xe0:
    40  			r, size = rune(c0)+(0xff61-0xa1), 1
    41  
    42  		case (0x81 <= c0 && c0 < 0xa0) || (0xe0 <= c0 && c0 < 0xf0):
    43  			if c0 <= 0x9f {
    44  				c0 -= 0x70
    45  			} else {
    46  				c0 -= 0xb0
    47  			}
    48  			c0 = 2*c0 - 0x21
    49  
    50  			if nSrc+1 >= len(src) {
    51  				err = transform.ErrShortSrc
    52  				break loop
    53  			}
    54  			c1 := src[nSrc+1]
    55  			switch {
    56  			case c1 < 0x40:
    57  				err = errInvalidShiftJIS
    58  				break loop
    59  			case c1 < 0x7f:
    60  				c0--
    61  				c1 -= 0x40
    62  			case c1 == 0x7f:
    63  				err = errInvalidShiftJIS
    64  				break loop
    65  			case c1 < 0x9f:
    66  				c0--
    67  				c1 -= 0x41
    68  			case c1 < 0xfd:
    69  				c1 -= 0x9f
    70  			default:
    71  				err = errInvalidShiftJIS
    72  				break loop
    73  			}
    74  			r, size = '\ufffd', 2
    75  			if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
    76  				r = rune(jis0208Decode[i])
    77  				if r == 0 {
    78  					r = '\ufffd'
    79  				}
    80  			}
    81  
    82  		default:
    83  			err = errInvalidShiftJIS
    84  			break loop
    85  		}
    86  
    87  		if nDst+utf8.RuneLen(r) > len(dst) {
    88  			err = transform.ErrShortDst
    89  			break loop
    90  		}
    91  		nDst += utf8.EncodeRune(dst[nDst:], r)
    92  	}
    93  	if atEOF && err == transform.ErrShortSrc {
    94  		err = errInvalidShiftJIS
    95  	}
    96  	return nDst, nSrc, err
    97  }
    98  
    99  type shiftJISEncoder struct{ transform.NopResetter }
   100  
   101  func (shiftJISEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   102  	r, size := rune(0), 0
   103  loop:
   104  	for ; nSrc < len(src); nSrc += size {
   105  		r = rune(src[nSrc])
   106  
   107  		// Decode a 1-byte rune.
   108  		if r < utf8.RuneSelf {
   109  			size = 1
   110  
   111  		} else {
   112  			// Decode a multi-byte rune.
   113  			r, size = utf8.DecodeRune(src[nSrc:])
   114  			if size == 1 {
   115  				// All valid runes of size 1 (those below utf8.RuneSelf) were
   116  				// handled above. We have invalid UTF-8 or we haven't seen the
   117  				// full character yet.
   118  				if !atEOF && !utf8.FullRune(src[nSrc:]) {
   119  					err = transform.ErrShortSrc
   120  					break loop
   121  				}
   122  			}
   123  
   124  			// func init checks that the switch covers all tables.
   125  			switch {
   126  			case encode0Low <= r && r < encode0High:
   127  				if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
   128  					goto write2
   129  				}
   130  			case encode1Low <= r && r < encode1High:
   131  				if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
   132  					goto write2
   133  				}
   134  			case encode2Low <= r && r < encode2High:
   135  				if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
   136  					goto write2
   137  				}
   138  			case encode3Low <= r && r < encode3High:
   139  				if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
   140  					goto write2
   141  				}
   142  			case encode4Low <= r && r < encode4High:
   143  				if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
   144  					goto write2
   145  				}
   146  			case encode5Low <= r && r < encode5High:
   147  				if 0xff61 <= r && r < 0xffa0 {
   148  					r -= 0xff61 - 0xa1
   149  					goto write1
   150  				}
   151  				if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
   152  					goto write2
   153  				}
   154  			}
   155  			err = internal.ErrASCIIReplacement
   156  			break
   157  		}
   158  
   159  	write1:
   160  		if nDst >= len(dst) {
   161  			err = transform.ErrShortDst
   162  			break
   163  		}
   164  		dst[nDst] = uint8(r)
   165  		nDst++
   166  		continue
   167  
   168  	write2:
   169  		j1 := uint8(r>>codeShift) & codeMask
   170  		j2 := uint8(r) & codeMask
   171  		if nDst+2 > len(dst) {
   172  			err = transform.ErrShortDst
   173  			break loop
   174  		}
   175  		if j1 <= 61 {
   176  			dst[nDst+0] = 129 + j1/2
   177  		} else {
   178  			dst[nDst+0] = 193 + j1/2
   179  		}
   180  		if j1&1 == 0 {
   181  			dst[nDst+1] = j2 + j2/63 + 64
   182  		} else {
   183  			dst[nDst+1] = j2 + 159
   184  		}
   185  		nDst += 2
   186  		continue
   187  	}
   188  	return nDst, nSrc, err
   189  }