vitess.io/vitess@v0.16.2/go/mysql/collations/internal/charset/unicode/utf16.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package unicode
    18  
    19  import (
    20  	"unicode/utf16"
    21  	"unicode/utf8"
    22  
    23  	"vitess.io/vitess/go/mysql/collations/internal/charset/types"
    24  )
    25  
    26  // 0xd800-0xdc00 encodes the high 10 bits of a pair.
    27  // 0xdc00-0xe000 encodes the low 10 bits of a pair.
    28  // the value is those 20 bits plus 0x10000.
    29  const (
    30  	surr1    = 0xd800
    31  	surr2    = 0xdc00
    32  	surr3    = 0xe000
    33  	surrSelf = 0x10000
    34  )
    35  
    36  type Charset_utf16be struct{}
    37  
    38  func (Charset_utf16be) Name() string {
    39  	return "utf16"
    40  }
    41  
    42  func (Charset_utf16be) IsSuperset(other types.Charset) bool {
    43  	switch other.(type) {
    44  	case Charset_utf16be, Charset_ucs2:
    45  		return true
    46  	default:
    47  		return false
    48  	}
    49  }
    50  
    51  func (Charset_utf16be) EncodeRune(dst []byte, r rune) int {
    52  	_ = dst[3]
    53  
    54  	if r <= 0xffff {
    55  		dst[0] = uint8(r >> 8)
    56  		dst[1] = uint8(r)
    57  		return 2
    58  	} else {
    59  		r1, r2 := utf16.EncodeRune(r)
    60  		dst[0] = uint8(r1 >> 8)
    61  		dst[1] = uint8(r1)
    62  		dst[2] = uint8(r2 >> 8)
    63  		dst[3] = uint8(r2)
    64  		return 4
    65  	}
    66  }
    67  
    68  func (Charset_utf16be) DecodeRune(b []byte) (rune, int) {
    69  	if len(b) < 2 {
    70  		return utf8.RuneError, 0
    71  	}
    72  
    73  	r1 := uint16(b[1]) | uint16(b[0])<<8
    74  	if r1 < surr1 || surr3 <= r1 {
    75  		return rune(r1), 2
    76  	}
    77  
    78  	if len(b) < 4 {
    79  		return utf8.RuneError, 0
    80  	}
    81  
    82  	r2 := uint16(b[3]) | uint16(b[2])<<8
    83  	if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 {
    84  		return (rune(r1)-surr1)<<10 | (rune(r2) - surr2) + surrSelf, 4
    85  	}
    86  
    87  	return utf8.RuneError, 1
    88  }
    89  
    90  func (Charset_utf16be) SupportsSupplementaryChars() bool {
    91  	return true
    92  }
    93  
    94  type Charset_utf16le struct{}
    95  
    96  func (Charset_utf16le) Name() string {
    97  	return "utf16le"
    98  }
    99  
   100  func (Charset_utf16le) IsSuperset(other types.Charset) bool {
   101  	switch other.(type) {
   102  	case Charset_utf16le:
   103  		return true
   104  	default:
   105  		return false
   106  	}
   107  }
   108  
   109  func (Charset_utf16le) EncodeRune(dst []byte, r rune) int {
   110  	_ = dst[3]
   111  
   112  	if r <= 0xffff {
   113  		dst[0] = uint8(r)
   114  		dst[1] = uint8(r >> 8)
   115  		return 2
   116  	} else {
   117  		r1, r2 := utf16.EncodeRune(r)
   118  		dst[0] = uint8(r1)
   119  		dst[1] = uint8(r1 >> 8)
   120  		dst[2] = uint8(r2)
   121  		dst[3] = uint8(r2 >> 8)
   122  		return 4
   123  	}
   124  }
   125  
   126  func (Charset_utf16le) DecodeRune(b []byte) (rune, int) {
   127  	if len(b) < 2 {
   128  		return utf8.RuneError, 0
   129  	}
   130  
   131  	r1 := uint16(b[0]) | uint16(b[1])<<8
   132  	if r1 < surr1 || surr3 <= r1 {
   133  		return rune(r1), 2
   134  	}
   135  
   136  	if len(b) < 4 {
   137  		return utf8.RuneError, 0
   138  	}
   139  
   140  	r2 := uint16(b[2]) | uint16(b[3])<<8
   141  	if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 {
   142  		return (rune(r1)-surr1)<<10 | (rune(r2) - surr2) + surrSelf, 4
   143  	}
   144  
   145  	return utf8.RuneError, 1
   146  }
   147  
   148  func (Charset_utf16le) SupportsSupplementaryChars() bool {
   149  	return true
   150  }
   151  
   152  type Charset_ucs2 struct{}
   153  
   154  func (Charset_ucs2) Name() string {
   155  	return "ucs2"
   156  }
   157  
   158  func (Charset_ucs2) IsSuperset(other types.Charset) bool {
   159  	switch other.(type) {
   160  	case Charset_ucs2:
   161  		return true
   162  	default:
   163  		return false
   164  	}
   165  }
   166  
   167  func (Charset_ucs2) EncodeRune(dst []byte, r rune) int {
   168  	_ = dst[1]
   169  
   170  	if r <= 0xffff {
   171  		dst[0] = uint8(r >> 8)
   172  		dst[1] = uint8(r)
   173  		return 2
   174  	}
   175  	return -1
   176  }
   177  
   178  func (Charset_ucs2) DecodeRune(p []byte) (rune, int) {
   179  	if len(p) < 2 {
   180  		return utf8.RuneError, 0
   181  	}
   182  	return rune(p[0])<<8 | rune(p[1]), 2
   183  }
   184  
   185  func (Charset_ucs2) SupportsSupplementaryChars() bool {
   186  	return false
   187  }
   188  
   189  func (Charset_ucs2) Length(src []byte) int {
   190  	cnt := len(src)
   191  	if cnt%2 != 0 {
   192  		return cnt/2 + 1
   193  	}
   194  	return cnt / 2
   195  }