vitess.io/vitess@v0.16.2/go/mysql/collations/internal/charset/unicode/utf16.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package unicode 18 19 import ( 20 "unicode/utf16" 21 "unicode/utf8" 22 23 "vitess.io/vitess/go/mysql/collations/internal/charset/types" 24 ) 25 26 // 0xd800-0xdc00 encodes the high 10 bits of a pair. 27 // 0xdc00-0xe000 encodes the low 10 bits of a pair. 28 // the value is those 20 bits plus 0x10000. 29 const ( 30 surr1 = 0xd800 31 surr2 = 0xdc00 32 surr3 = 0xe000 33 surrSelf = 0x10000 34 ) 35 36 type Charset_utf16be struct{} 37 38 func (Charset_utf16be) Name() string { 39 return "utf16" 40 } 41 42 func (Charset_utf16be) IsSuperset(other types.Charset) bool { 43 switch other.(type) { 44 case Charset_utf16be, Charset_ucs2: 45 return true 46 default: 47 return false 48 } 49 } 50 51 func (Charset_utf16be) EncodeRune(dst []byte, r rune) int { 52 _ = dst[3] 53 54 if r <= 0xffff { 55 dst[0] = uint8(r >> 8) 56 dst[1] = uint8(r) 57 return 2 58 } else { 59 r1, r2 := utf16.EncodeRune(r) 60 dst[0] = uint8(r1 >> 8) 61 dst[1] = uint8(r1) 62 dst[2] = uint8(r2 >> 8) 63 dst[3] = uint8(r2) 64 return 4 65 } 66 } 67 68 func (Charset_utf16be) DecodeRune(b []byte) (rune, int) { 69 if len(b) < 2 { 70 return utf8.RuneError, 0 71 } 72 73 r1 := uint16(b[1]) | uint16(b[0])<<8 74 if r1 < surr1 || surr3 <= r1 { 75 return rune(r1), 2 76 } 77 78 if len(b) < 4 { 79 return utf8.RuneError, 0 80 } 81 82 r2 := uint16(b[3]) | uint16(b[2])<<8 83 if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 { 84 return (rune(r1)-surr1)<<10 | (rune(r2) - surr2) + surrSelf, 4 85 } 86 87 return utf8.RuneError, 1 88 } 89 90 func (Charset_utf16be) SupportsSupplementaryChars() bool { 91 return true 92 } 93 94 type Charset_utf16le struct{} 95 96 func (Charset_utf16le) Name() string { 97 return "utf16le" 98 } 99 100 func (Charset_utf16le) IsSuperset(other types.Charset) bool { 101 switch other.(type) { 102 case Charset_utf16le: 103 return true 104 default: 105 return false 106 } 107 } 108 109 func (Charset_utf16le) EncodeRune(dst []byte, r rune) int { 110 _ = dst[3] 111 112 if r <= 0xffff { 113 dst[0] = uint8(r) 114 dst[1] = uint8(r >> 8) 115 return 2 116 } else { 117 r1, r2 := utf16.EncodeRune(r) 118 dst[0] = uint8(r1) 119 dst[1] = uint8(r1 >> 8) 120 dst[2] = uint8(r2) 121 dst[3] = uint8(r2 >> 8) 122 return 4 123 } 124 } 125 126 func (Charset_utf16le) DecodeRune(b []byte) (rune, int) { 127 if len(b) < 2 { 128 return utf8.RuneError, 0 129 } 130 131 r1 := uint16(b[0]) | uint16(b[1])<<8 132 if r1 < surr1 || surr3 <= r1 { 133 return rune(r1), 2 134 } 135 136 if len(b) < 4 { 137 return utf8.RuneError, 0 138 } 139 140 r2 := uint16(b[2]) | uint16(b[3])<<8 141 if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 { 142 return (rune(r1)-surr1)<<10 | (rune(r2) - surr2) + surrSelf, 4 143 } 144 145 return utf8.RuneError, 1 146 } 147 148 func (Charset_utf16le) SupportsSupplementaryChars() bool { 149 return true 150 } 151 152 type Charset_ucs2 struct{} 153 154 func (Charset_ucs2) Name() string { 155 return "ucs2" 156 } 157 158 func (Charset_ucs2) IsSuperset(other types.Charset) bool { 159 switch other.(type) { 160 case Charset_ucs2: 161 return true 162 default: 163 return false 164 } 165 } 166 167 func (Charset_ucs2) EncodeRune(dst []byte, r rune) int { 168 _ = dst[1] 169 170 if r <= 0xffff { 171 dst[0] = uint8(r >> 8) 172 dst[1] = uint8(r) 173 return 2 174 } 175 return -1 176 } 177 178 func (Charset_ucs2) DecodeRune(p []byte) (rune, int) { 179 if len(p) < 2 { 180 return utf8.RuneError, 0 181 } 182 return rune(p[0])<<8 | rune(p[1]), 2 183 } 184 185 func (Charset_ucs2) SupportsSupplementaryChars() bool { 186 return false 187 } 188 189 func (Charset_ucs2) Length(src []byte) int { 190 cnt := len(src) 191 if cnt%2 != 0 { 192 return cnt/2 + 1 193 } 194 return cnt / 2 195 }