vitess.io/vitess@v0.16.2/go/mysql/collations/internal/charset/convert.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package charset 18 19 import ( 20 "fmt" 21 "unicode/utf8" 22 ) 23 24 func failedConversionError(from, to Charset, input []byte) error { 25 return fmt.Errorf("Cannot convert string %q from %s to %s", input, from.Name(), to.Name()) 26 } 27 28 func convertFastFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, error) { 29 var failed, nDst int 30 31 if dst == nil { 32 dst = make([]byte, len(src)*3) 33 } else { 34 dst = dst[:cap(dst)] 35 } 36 37 for _, cp := range string(src) { 38 if len(dst)-nDst < 4 { 39 newDst := make([]byte, len(dst)*2) 40 copy(newDst, dst[:nDst]) 41 dst = newDst 42 } 43 w := dstCharset.EncodeRune(dst[nDst:], cp) 44 if w < 0 { 45 failed++ 46 if w = dstCharset.EncodeRune(dst[nDst:], '?'); w < 0 { 47 break 48 } 49 } 50 nDst += w 51 } 52 53 if failed > 0 { 54 return dst[:nDst], failedConversionError(&Charset_utf8mb4{}, dstCharset, src) 55 } 56 return dst[:nDst], nil 57 } 58 59 func convertSlow(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) ([]byte, error) { 60 var failed, nDst int 61 var original = src 62 63 if dst == nil { 64 dst = make([]byte, len(src)*3) 65 } else { 66 dst = dst[:cap(dst)] 67 } 68 69 for len(src) > 0 { 70 cp, width := srcCharset.DecodeRune(src) 71 if cp == utf8.RuneError && width < 3 { 72 failed++ 73 cp = '?' 74 } 75 src = src[width:] 76 77 if len(dst)-nDst < 4 { 78 newDst := make([]byte, len(dst)*2) 79 copy(newDst, dst[:nDst]) 80 dst = newDst 81 } 82 83 w := dstCharset.EncodeRune(dst[nDst:], cp) 84 if w < 0 { 85 failed++ 86 w = dstCharset.EncodeRune(dst[nDst:], '?') 87 if w < 0 { 88 break 89 } 90 } 91 nDst += w 92 } 93 94 if failed > 0 { 95 return dst[:nDst], failedConversionError(srcCharset, dstCharset, original) 96 } 97 return dst[:nDst], nil 98 } 99 100 type Convertible interface { 101 Charset 102 Convert(dst, src []byte, from Charset) ([]byte, error) 103 } 104 105 // Convert transforms `src`, encoded with Charset `srcCharset`, and 106 // changes its encoding so that it becomes encoded with `dstCharset`. 107 // The result is appended to `dst` if `dst` is not nil; otherwise 108 // a new byte slice will be allocated to store the result. 109 func Convert(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) ([]byte, error) { 110 if dstCharset.IsSuperset(srcCharset) { 111 return src, nil 112 } 113 if trans, ok := dstCharset.(Convertible); ok { 114 return trans.Convert(dst, src, srcCharset) 115 } 116 switch srcCharset.(type) { 117 case Charset_binary: 118 return ConvertFromBinary(dst, dstCharset, src) 119 case Charset_utf8mb3, Charset_utf8mb4: 120 return convertFastFromUTF8(dst, dstCharset, src) 121 default: 122 return convertSlow(dst, dstCharset, src, srcCharset) 123 } 124 } 125 126 func ConvertFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, error) { 127 return Convert(dst, dstCharset, src, Charset_utf8mb4{}) 128 } 129 130 func ConvertFromBinary(dst []byte, dstCharset Charset, src []byte) ([]byte, error) { 131 switch dstCharset.(type) { 132 case Charset_utf16, Charset_utf16le, Charset_ucs2: 133 if len(src)%2 == 1 { 134 dst = append(dst, 0) 135 } 136 case Charset_utf32: 137 // TODO: it doesn't look like mysql pads binary for 4-byte encodings 138 } 139 if dst == nil { 140 dst = src 141 } else { 142 dst = append(dst, src...) 143 } 144 if !Validate(dstCharset, dst) { 145 return nil, failedConversionError(&Charset_binary{}, dstCharset, src) 146 } 147 return dst, nil 148 }