github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/encoding/traditionalchinese/big5.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package traditionalchinese
     6  
     7  import (
     8  	"errors"
     9  	"unicode/utf8"
    10  
    11  	"golang.org/x/text/encoding"
    12  	"golang.org/x/text/encoding/internal"
    13  	"golang.org/x/text/encoding/internal/identifier"
    14  	"golang.org/x/text/transform"
    15  )
    16  
    17  // All is a list of all defined encodings in this package.
    18  var All = []encoding.Encoding{Big5}
    19  
    20  // Big5 is the Big5 encoding, also known as Code Page 950.
    21  var Big5 encoding.Encoding = &big5
    22  
    23  var big5 = internal.Encoding{
    24  	&internal.SimpleEncoding{big5Decoder{}, big5Encoder{}},
    25  	"Big5",
    26  	identifier.Big5,
    27  }
    28  
    29  var errInvalidBig5 = errors.New("traditionalchinese: invalid Big5 encoding")
    30  
    31  type big5Decoder struct{ transform.NopResetter }
    32  
    33  func (big5Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    34  	r, size, s := rune(0), 0, ""
    35  loop:
    36  	for ; nSrc < len(src); nSrc += size {
    37  		switch c0 := src[nSrc]; {
    38  		case c0 < utf8.RuneSelf:
    39  			r, size = rune(c0), 1
    40  
    41  		case 0x81 <= c0 && c0 < 0xff:
    42  			if nSrc+1 >= len(src) {
    43  				err = transform.ErrShortSrc
    44  				break loop
    45  			}
    46  			c1 := src[nSrc+1]
    47  			switch {
    48  			case 0x40 <= c1 && c1 < 0x7f:
    49  				c1 -= 0x40
    50  			case 0xa1 <= c1 && c1 < 0xff:
    51  				c1 -= 0x62
    52  			default:
    53  				err = errInvalidBig5
    54  				break loop
    55  			}
    56  			r, size = '\ufffd', 2
    57  			if i := int(c0-0x81)*157 + int(c1); i < len(decode) {
    58  				if 1133 <= i && i < 1167 {
    59  					// The two-rune special cases for LATIN CAPITAL / SMALL E WITH CIRCUMFLEX
    60  					// AND MACRON / CARON are from http://encoding.spec.whatwg.org/#big5
    61  					switch i {
    62  					case 1133:
    63  						s = "\u00CA\u0304"
    64  						goto writeStr
    65  					case 1135:
    66  						s = "\u00CA\u030C"
    67  						goto writeStr
    68  					case 1164:
    69  						s = "\u00EA\u0304"
    70  						goto writeStr
    71  					case 1166:
    72  						s = "\u00EA\u030C"
    73  						goto writeStr
    74  					}
    75  				}
    76  				r = rune(decode[i])
    77  				if r == 0 {
    78  					r = '\ufffd'
    79  				}
    80  			}
    81  
    82  		default:
    83  			err = errInvalidBig5
    84  			break loop
    85  		}
    86  
    87  		if nDst+utf8.RuneLen(r) > len(dst) {
    88  			err = transform.ErrShortDst
    89  			break loop
    90  		}
    91  		nDst += utf8.EncodeRune(dst[nDst:], r)
    92  		continue loop
    93  
    94  	writeStr:
    95  		if nDst+len(s) > len(dst) {
    96  			err = transform.ErrShortDst
    97  			break loop
    98  		}
    99  		nDst += copy(dst[nDst:], s)
   100  		continue loop
   101  	}
   102  	if atEOF && err == transform.ErrShortSrc {
   103  		err = errInvalidBig5
   104  	}
   105  	return nDst, nSrc, err
   106  }
   107  
   108  type big5Encoder struct{ transform.NopResetter }
   109  
   110  func (big5Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   111  	r, size := rune(0), 0
   112  	for ; nSrc < len(src); nSrc += size {
   113  		r = rune(src[nSrc])
   114  
   115  		// Decode a 1-byte rune.
   116  		if r < utf8.RuneSelf {
   117  			size = 1
   118  			if nDst >= len(dst) {
   119  				err = transform.ErrShortDst
   120  				break
   121  			}
   122  			dst[nDst] = uint8(r)
   123  			nDst++
   124  			continue
   125  
   126  		} else {
   127  			// Decode a multi-byte rune.
   128  			r, size = utf8.DecodeRune(src[nSrc:])
   129  			if size == 1 {
   130  				// All valid runes of size 1 (those below utf8.RuneSelf) were
   131  				// handled above. We have invalid UTF-8 or we haven't seen the
   132  				// full character yet.
   133  				if !atEOF && !utf8.FullRune(src[nSrc:]) {
   134  					err = transform.ErrShortSrc
   135  					break
   136  				}
   137  			}
   138  		}
   139  
   140  		if r >= utf8.RuneSelf {
   141  			// func init checks that the switch covers all tables.
   142  			switch {
   143  			case encode0Low <= r && r < encode0High:
   144  				if r = rune(encode0[r-encode0Low]); r != 0 {
   145  					goto write2
   146  				}
   147  			case encode1Low <= r && r < encode1High:
   148  				if r = rune(encode1[r-encode1Low]); r != 0 {
   149  					goto write2
   150  				}
   151  			case encode2Low <= r && r < encode2High:
   152  				if r = rune(encode2[r-encode2Low]); r != 0 {
   153  					goto write2
   154  				}
   155  			case encode3Low <= r && r < encode3High:
   156  				if r = rune(encode3[r-encode3Low]); r != 0 {
   157  					goto write2
   158  				}
   159  			case encode4Low <= r && r < encode4High:
   160  				if r = rune(encode4[r-encode4Low]); r != 0 {
   161  					goto write2
   162  				}
   163  			case encode5Low <= r && r < encode5High:
   164  				if r = rune(encode5[r-encode5Low]); r != 0 {
   165  					goto write2
   166  				}
   167  			case encode6Low <= r && r < encode6High:
   168  				if r = rune(encode6[r-encode6Low]); r != 0 {
   169  					goto write2
   170  				}
   171  			case encode7Low <= r && r < encode7High:
   172  				if r = rune(encode7[r-encode7Low]); r != 0 {
   173  					goto write2
   174  				}
   175  			}
   176  			err = internal.ErrASCIIReplacement
   177  			break
   178  		}
   179  
   180  	write2:
   181  		if nDst+2 > len(dst) {
   182  			err = transform.ErrShortDst
   183  			break
   184  		}
   185  		dst[nDst+0] = uint8(r >> 8)
   186  		dst[nDst+1] = uint8(r)
   187  		nDst += 2
   188  		continue
   189  	}
   190  	return nDst, nSrc, err
   191  }
   192  
   193  func init() {
   194  	// Check that the hard-coded encode switch covers all tables.
   195  	if numEncodeTables != 8 {
   196  		panic("bad numEncodeTables")
   197  	}
   198  }