github.com/pingcap/tidb/parser@v0.0.0-20231013125129-93a834a6bf8d/charset/encoding_base.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package charset 15 16 import ( 17 "bytes" 18 "fmt" 19 "reflect" 20 "strings" 21 "unsafe" 22 23 "github.com/pingcap/tidb/parser/mysql" 24 "github.com/pingcap/tidb/parser/terror" 25 "golang.org/x/text/encoding" 26 "golang.org/x/text/transform" 27 ) 28 29 // ErrInvalidCharacterString returns when the string is invalid in the specific charset. 30 var ErrInvalidCharacterString = terror.ClassParser.NewStd(mysql.ErrInvalidCharacterString) 31 32 // encodingBase defines some generic functions. 33 type encodingBase struct { 34 enc encoding.Encoding 35 self Encoding 36 } 37 38 func (encodingBase) MbLen(_ string) int { 39 return 0 40 } 41 42 func (encodingBase) ToUpper(src string) string { 43 return strings.ToUpper(src) 44 } 45 46 func (encodingBase) ToLower(src string) string { 47 return strings.ToLower(src) 48 } 49 50 func (b encodingBase) IsValid(src []byte) bool { 51 isValid := true 52 b.self.Foreach(src, opFromUTF8, func(from, to []byte, ok bool) bool { 53 isValid = ok 54 return ok 55 }) 56 return isValid 57 } 58 59 func (b encodingBase) Transform(dest *bytes.Buffer, src []byte, op Op) (result []byte, err error) { 60 if dest == nil { 61 dest = &bytes.Buffer{} 62 dest.Grow(len(src)) 63 } 64 dest.Reset() 65 b.self.Foreach(src, op, func(from, to []byte, ok bool) bool { 66 if !ok { 67 if err == nil && (op&opSkipError == 0) { 68 err = generateEncodingErr(b.self.Name(), from) 69 } 70 if op&opTruncateTrim != 0 { 71 return false 72 } 73 if op&opTruncateReplace != 0 { 74 dest.WriteByte('?') 75 return true 76 } 77 } 78 if op&opCollectFrom != 0 { 79 dest.Write(from) 80 } else if op&opCollectTo != 0 { 81 dest.Write(to) 82 } 83 return true 84 }) 85 return dest.Bytes(), err 86 } 87 88 func (b encodingBase) Foreach(src []byte, op Op, fn func(from, to []byte, ok bool) bool) { 89 var tfm transform.Transformer 90 var peek func([]byte) []byte 91 if op&opFromUTF8 != 0 { 92 tfm = b.enc.NewEncoder() 93 peek = EncodingUTF8Impl.Peek 94 } else { 95 tfm = b.enc.NewDecoder() 96 peek = b.self.Peek 97 } 98 var buf [4]byte 99 for i, w := 0, 0; i < len(src); i += w { 100 w = len(peek(src[i:])) 101 nDst, _, err := tfm.Transform(buf[:], src[i:i+w], false) 102 meetErr := err != nil || (op&opToUTF8 != 0 && beginWithReplacementChar(buf[:nDst])) 103 if !fn(src[i:i+w], buf[:nDst], !meetErr) { 104 return 105 } 106 } 107 } 108 109 // replacementBytes are bytes for the replacement rune 0xfffd. 110 var replacementBytes = []byte{0xEF, 0xBF, 0xBD} 111 112 // beginWithReplacementChar check if dst has the prefix '0xEFBFBD'. 113 func beginWithReplacementChar(dst []byte) bool { 114 return bytes.HasPrefix(dst, replacementBytes) 115 } 116 117 // generateEncodingErr generates an invalid string in charset error. 118 func generateEncodingErr(name string, invalidBytes []byte) error { 119 arg := fmt.Sprintf("%X", invalidBytes) 120 return ErrInvalidCharacterString.FastGenByArgs(name, arg) 121 } 122 123 // HackSlice converts string to slice without copy. 124 // Use at your own risk. 125 func HackSlice(s string) (b []byte) { 126 pBytes := (*reflect.SliceHeader)(unsafe.Pointer(&b)) 127 pString := (*reflect.StringHeader)(unsafe.Pointer(&s)) 128 pBytes.Data = pString.Data 129 pBytes.Len = pString.Len 130 pBytes.Cap = pString.Len 131 return 132 } 133 134 // HackString converts slice to string without copy. 135 // Use it at your own risk. 136 func HackString(b []byte) (s string) { 137 if len(b) == 0 { 138 return "" 139 } 140 pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b)) 141 pstring := (*reflect.StringHeader)(unsafe.Pointer(&s)) 142 pstring.Data = pbytes.Data 143 pstring.Len = pbytes.Len 144 return 145 }