gitee.com/quant1x/gox@v1.7.6/text/encoding/gb18030.go (about)

     1  package encoding
     2  
     3  import (
     4  	"sync"
     5  )
     6  
     7  // Converters for GB18030 encoding.
     8  
     9  func init() {
    10  	RegisterCharset(&Charset{
    11  		Name: "GB18030",
    12  		NewDecoder: func() Decoder {
    13  			gb18030Once.Do(buildGB18030Tables)
    14  			return decodeGB18030Rune
    15  		},
    16  		NewEncoder: func() Encoder {
    17  			gb18030Once.Do(buildGB18030Tables)
    18  			return encodeGB18030Rune
    19  		},
    20  	})
    21  }
    22  
    23  func decodeGB18030Rune(p []byte) (r rune, size int, status Status) {
    24  	if len(p) == 0 {
    25  		status = NO_ROOM
    26  		return
    27  	}
    28  
    29  	b := p[0]
    30  	if b < 128 {
    31  		return rune(b), 1, SUCCESS
    32  	}
    33  
    34  	if len(p) < 2 {
    35  		status = NO_ROOM
    36  		return
    37  	}
    38  
    39  	if p[0] < 0x81 || p[0] > 0xfe {
    40  		return 0xfffd, 1, INVALID_CHAR
    41  	}
    42  
    43  	if p[1] >= 0x40 {
    44  		// 2-byte character
    45  		c := uint16(p[0])<<8 + uint16(p[1])
    46  		r = rune(gbkToUnicode[c])
    47  		if r == 0 {
    48  			r = gbkToUnicodeExtra[c]
    49  		}
    50  
    51  		if r != 0 {
    52  			return r, 2, SUCCESS
    53  		}
    54  	} else if p[1] >= 0x30 {
    55  		// 4-byte character
    56  		if len(p) < 4 {
    57  			return 0, 0, NO_ROOM
    58  		}
    59  		if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 {
    60  			return 0xfffd, 1, INVALID_CHAR
    61  		}
    62  
    63  		code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3])
    64  		lin := gb18030Linear(code)
    65  
    66  		if lin <= maxGB18030Linear {
    67  			r = rune(gb18030LinearToUnicode[lin])
    68  			if r != 0 {
    69  				return r, 4, SUCCESS
    70  			}
    71  		}
    72  
    73  		for _, rng := range gb18030Ranges {
    74  			if lin >= rng.firstGB && lin <= rng.lastGB {
    75  				return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS
    76  			}
    77  		}
    78  	}
    79  
    80  	return 0xfffd, 1, INVALID_CHAR
    81  }
    82  
    83  func encodeGB18030Rune(p []byte, r rune) (size int, status Status) {
    84  	if len(p) == 0 {
    85  		status = NO_ROOM
    86  		return
    87  	}
    88  
    89  	if r < 128 {
    90  		p[0] = byte(r)
    91  		return 1, SUCCESS
    92  	}
    93  
    94  	if len(p) < 2 {
    95  		status = NO_ROOM
    96  		return
    97  	}
    98  
    99  	var c uint16
   100  	if r < 0x10000 {
   101  		c = unicodeToGBK[r]
   102  	} else {
   103  		c = unicodeToGBKExtra[r]
   104  	}
   105  
   106  	if c != 0 {
   107  		p[0] = byte(c >> 8)
   108  		p[1] = byte(c)
   109  		return 2, SUCCESS
   110  	}
   111  
   112  	if len(p) < 4 {
   113  		return 0, NO_ROOM
   114  	}
   115  
   116  	if r < 0x10000 {
   117  		f := unicodeToGB18030[r]
   118  		if f != 0 {
   119  			p[0] = byte(f >> 24)
   120  			p[1] = byte(f >> 16)
   121  			p[2] = byte(f >> 8)
   122  			p[3] = byte(f)
   123  			return 4, SUCCESS
   124  		}
   125  	}
   126  
   127  	for _, rng := range gb18030Ranges {
   128  		if r >= rng.firstRune && r <= rng.lastRune {
   129  			lin := rng.firstGB + uint32(r) - uint32(rng.firstRune)
   130  			p[0] = byte(lin/(10*126*10)) + 0x81
   131  			p[1] = byte(lin/(126*10)%10) + 0x30
   132  			p[2] = byte(lin/10%126) + 0x81
   133  			p[3] = byte(lin%10) + 0x30
   134  			return 4, SUCCESS
   135  		}
   136  	}
   137  
   138  	p[0] = 0x1a
   139  	return 1, INVALID_CHAR
   140  }
   141  
   142  var gb18030Once sync.Once
   143  
   144  // Mapping from gb18039Linear values to Unicode.
   145  var gb18030LinearToUnicode []uint16
   146  
   147  var unicodeToGB18030 []uint32
   148  
   149  func buildGB18030Tables() {
   150  	gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1)
   151  	unicodeToGB18030 = make([]uint32, 65536)
   152  	for _, data := range gb18030Data {
   153  		gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode
   154  		unicodeToGB18030[data.unicode] = data.gb18030
   155  	}
   156  }