gitee.com/quant1x/gox@v1.7.6/text/encoding/utf16.go (about)

     1  package encoding
     2  
     3  import (
     4  	"unicode/utf16"
     5  )
     6  
     7  func init() {
     8  	for i := 0; i < len(utf16Charsets); i++ {
     9  		RegisterCharset(&utf16Charsets[i])
    10  	}
    11  }
    12  
    13  var utf16Charsets = []Charset{
    14  	{
    15  		Name: "UTF-16",
    16  		NewDecoder: func() Decoder {
    17  			var decodeRune Decoder
    18  			return func(p []byte) (c rune, size int, status Status) {
    19  				if decodeRune == nil {
    20  					// haven't read the BOM yet
    21  					if len(p) < 2 {
    22  						status = NO_ROOM
    23  						return
    24  					}
    25  
    26  					switch {
    27  					case p[0] == 0xfe && p[1] == 0xff:
    28  						decodeRune = decodeUTF16beRune
    29  						return 0, 2, STATE_ONLY
    30  					case p[0] == 0xff && p[1] == 0xfe:
    31  						decodeRune = decodeUTF16leRune
    32  						return 0, 2, STATE_ONLY
    33  					default:
    34  						decodeRune = decodeUTF16beRune
    35  					}
    36  				}
    37  
    38  				return decodeRune(p)
    39  			}
    40  		},
    41  		NewEncoder: func() Encoder {
    42  			wroteBOM := false
    43  			return func(p []byte, c rune) (size int, status Status) {
    44  				if !wroteBOM {
    45  					if len(p) < 2 {
    46  						status = NO_ROOM
    47  						return
    48  					}
    49  
    50  					p[0] = 0xfe
    51  					p[1] = 0xff
    52  					wroteBOM = true
    53  					return 2, STATE_ONLY
    54  				}
    55  
    56  				return encodeUTF16beRune(p, c)
    57  			}
    58  		},
    59  	},
    60  	{
    61  		Name:       "UTF-16BE",
    62  		NewDecoder: func() Decoder { return decodeUTF16beRune },
    63  		NewEncoder: func() Encoder { return encodeUTF16beRune },
    64  	},
    65  	{
    66  		Name:       "UTF-16LE",
    67  		NewDecoder: func() Decoder { return decodeUTF16leRune },
    68  		NewEncoder: func() Encoder { return encodeUTF16leRune },
    69  	},
    70  }
    71  
    72  func decodeUTF16beRune(p []byte) (r rune, size int, status Status) {
    73  	if len(p) < 2 {
    74  		status = NO_ROOM
    75  		return
    76  	}
    77  
    78  	c := rune(p[0])<<8 + rune(p[1])
    79  
    80  	if utf16.IsSurrogate(c) {
    81  		if len(p) < 4 {
    82  			status = NO_ROOM
    83  			return
    84  		}
    85  
    86  		c2 := rune(p[2])<<8 + rune(p[3])
    87  		c = utf16.DecodeRune(c, c2)
    88  
    89  		if c == 0xfffd {
    90  			return c, 2, INVALID_CHAR
    91  		} else {
    92  			return c, 4, SUCCESS
    93  		}
    94  	}
    95  
    96  	return c, 2, SUCCESS
    97  }
    98  
    99  func encodeUTF16beRune(p []byte, c rune) (size int, status Status) {
   100  	if c < 0x10000 {
   101  		if len(p) < 2 {
   102  			status = NO_ROOM
   103  			return
   104  		}
   105  		p[0] = byte(c >> 8)
   106  		p[1] = byte(c)
   107  		return 2, SUCCESS
   108  	}
   109  
   110  	if len(p) < 4 {
   111  		status = NO_ROOM
   112  		return
   113  	}
   114  	s1, s2 := utf16.EncodeRune(c)
   115  	p[0] = byte(s1 >> 8)
   116  	p[1] = byte(s1)
   117  	p[2] = byte(s2 >> 8)
   118  	p[3] = byte(s2)
   119  	return 4, SUCCESS
   120  }
   121  
   122  func decodeUTF16leRune(p []byte) (r rune, size int, status Status) {
   123  	if len(p) < 2 {
   124  		status = NO_ROOM
   125  		return
   126  	}
   127  
   128  	c := rune(p[1])<<8 + rune(p[0])
   129  
   130  	if utf16.IsSurrogate(c) {
   131  		if len(p) < 4 {
   132  			status = NO_ROOM
   133  			return
   134  		}
   135  
   136  		c2 := rune(p[3])<<8 + rune(p[2])
   137  		c = utf16.DecodeRune(c, c2)
   138  
   139  		if c == 0xfffd {
   140  			return c, 2, INVALID_CHAR
   141  		} else {
   142  			return c, 4, SUCCESS
   143  		}
   144  	}
   145  
   146  	return c, 2, SUCCESS
   147  }
   148  
   149  func encodeUTF16leRune(p []byte, c rune) (size int, status Status) {
   150  	if c < 0x10000 {
   151  		if len(p) < 2 {
   152  			status = NO_ROOM
   153  			return
   154  		}
   155  		p[1] = byte(c >> 8)
   156  		p[0] = byte(c)
   157  		return 2, SUCCESS
   158  	}
   159  
   160  	if len(p) < 4 {
   161  		status = NO_ROOM
   162  		return
   163  	}
   164  	s1, s2 := utf16.EncodeRune(c)
   165  	p[1] = byte(s1 >> 8)
   166  	p[0] = byte(s1)
   167  	p[3] = byte(s2 >> 8)
   168  	p[2] = byte(s2)
   169  	return 4, SUCCESS
   170  }