github.com/pingcap/tidb/parser@v0.0.0-20231013125129-93a834a6bf8d/charset/encoding_base.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package charset
    15  
    16  import (
    17  	"bytes"
    18  	"fmt"
    19  	"reflect"
    20  	"strings"
    21  	"unsafe"
    22  
    23  	"github.com/pingcap/tidb/parser/mysql"
    24  	"github.com/pingcap/tidb/parser/terror"
    25  	"golang.org/x/text/encoding"
    26  	"golang.org/x/text/transform"
    27  )
    28  
    29  // ErrInvalidCharacterString returns when the string is invalid in the specific charset.
    30  var ErrInvalidCharacterString = terror.ClassParser.NewStd(mysql.ErrInvalidCharacterString)
    31  
    32  // encodingBase defines some generic functions.
    33  type encodingBase struct {
    34  	enc  encoding.Encoding
    35  	self Encoding
    36  }
    37  
    38  func (encodingBase) MbLen(_ string) int {
    39  	return 0
    40  }
    41  
    42  func (encodingBase) ToUpper(src string) string {
    43  	return strings.ToUpper(src)
    44  }
    45  
    46  func (encodingBase) ToLower(src string) string {
    47  	return strings.ToLower(src)
    48  }
    49  
    50  func (b encodingBase) IsValid(src []byte) bool {
    51  	isValid := true
    52  	b.self.Foreach(src, opFromUTF8, func(from, to []byte, ok bool) bool {
    53  		isValid = ok
    54  		return ok
    55  	})
    56  	return isValid
    57  }
    58  
    59  func (b encodingBase) Transform(dest *bytes.Buffer, src []byte, op Op) (result []byte, err error) {
    60  	if dest == nil {
    61  		dest = &bytes.Buffer{}
    62  		dest.Grow(len(src))
    63  	}
    64  	dest.Reset()
    65  	b.self.Foreach(src, op, func(from, to []byte, ok bool) bool {
    66  		if !ok {
    67  			if err == nil && (op&opSkipError == 0) {
    68  				err = generateEncodingErr(b.self.Name(), from)
    69  			}
    70  			if op&opTruncateTrim != 0 {
    71  				return false
    72  			}
    73  			if op&opTruncateReplace != 0 {
    74  				dest.WriteByte('?')
    75  				return true
    76  			}
    77  		}
    78  		if op&opCollectFrom != 0 {
    79  			dest.Write(from)
    80  		} else if op&opCollectTo != 0 {
    81  			dest.Write(to)
    82  		}
    83  		return true
    84  	})
    85  	return dest.Bytes(), err
    86  }
    87  
    88  func (b encodingBase) Foreach(src []byte, op Op, fn func(from, to []byte, ok bool) bool) {
    89  	var tfm transform.Transformer
    90  	var peek func([]byte) []byte
    91  	if op&opFromUTF8 != 0 {
    92  		tfm = b.enc.NewEncoder()
    93  		peek = EncodingUTF8Impl.Peek
    94  	} else {
    95  		tfm = b.enc.NewDecoder()
    96  		peek = b.self.Peek
    97  	}
    98  	var buf [4]byte
    99  	for i, w := 0, 0; i < len(src); i += w {
   100  		w = len(peek(src[i:]))
   101  		nDst, _, err := tfm.Transform(buf[:], src[i:i+w], false)
   102  		meetErr := err != nil || (op&opToUTF8 != 0 && beginWithReplacementChar(buf[:nDst]))
   103  		if !fn(src[i:i+w], buf[:nDst], !meetErr) {
   104  			return
   105  		}
   106  	}
   107  }
   108  
   109  // replacementBytes are bytes for the replacement rune 0xfffd.
   110  var replacementBytes = []byte{0xEF, 0xBF, 0xBD}
   111  
   112  // beginWithReplacementChar check if dst has the prefix '0xEFBFBD'.
   113  func beginWithReplacementChar(dst []byte) bool {
   114  	return bytes.HasPrefix(dst, replacementBytes)
   115  }
   116  
   117  // generateEncodingErr generates an invalid string in charset error.
   118  func generateEncodingErr(name string, invalidBytes []byte) error {
   119  	arg := fmt.Sprintf("%X", invalidBytes)
   120  	return ErrInvalidCharacterString.FastGenByArgs(name, arg)
   121  }
   122  
   123  // HackSlice converts string to slice without copy.
   124  // Use at your own risk.
   125  func HackSlice(s string) (b []byte) {
   126  	pBytes := (*reflect.SliceHeader)(unsafe.Pointer(&b))
   127  	pString := (*reflect.StringHeader)(unsafe.Pointer(&s))
   128  	pBytes.Data = pString.Data
   129  	pBytes.Len = pString.Len
   130  	pBytes.Cap = pString.Len
   131  	return
   132  }
   133  
   134  // HackString converts slice to string without copy.
   135  // Use it at your own risk.
   136  func HackString(b []byte) (s string) {
   137  	if len(b) == 0 {
   138  		return ""
   139  	}
   140  	pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b))
   141  	pstring := (*reflect.StringHeader)(unsafe.Pointer(&s))
   142  	pstring.Data = pbytes.Data
   143  	pstring.Len = pbytes.Len
   144  	return
   145  }