vitess.io/vitess@v0.16.2/go/mysql/collations/internal/charset/convert.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package charset
    18  
    19  import (
    20  	"fmt"
    21  	"unicode/utf8"
    22  )
    23  
    24  func failedConversionError(from, to Charset, input []byte) error {
    25  	return fmt.Errorf("Cannot convert string %q from %s to %s", input, from.Name(), to.Name())
    26  }
    27  
    28  func convertFastFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, error) {
    29  	var failed, nDst int
    30  
    31  	if dst == nil {
    32  		dst = make([]byte, len(src)*3)
    33  	} else {
    34  		dst = dst[:cap(dst)]
    35  	}
    36  
    37  	for _, cp := range string(src) {
    38  		if len(dst)-nDst < 4 {
    39  			newDst := make([]byte, len(dst)*2)
    40  			copy(newDst, dst[:nDst])
    41  			dst = newDst
    42  		}
    43  		w := dstCharset.EncodeRune(dst[nDst:], cp)
    44  		if w < 0 {
    45  			failed++
    46  			if w = dstCharset.EncodeRune(dst[nDst:], '?'); w < 0 {
    47  				break
    48  			}
    49  		}
    50  		nDst += w
    51  	}
    52  
    53  	if failed > 0 {
    54  		return dst[:nDst], failedConversionError(&Charset_utf8mb4{}, dstCharset, src)
    55  	}
    56  	return dst[:nDst], nil
    57  }
    58  
    59  func convertSlow(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) ([]byte, error) {
    60  	var failed, nDst int
    61  	var original = src
    62  
    63  	if dst == nil {
    64  		dst = make([]byte, len(src)*3)
    65  	} else {
    66  		dst = dst[:cap(dst)]
    67  	}
    68  
    69  	for len(src) > 0 {
    70  		cp, width := srcCharset.DecodeRune(src)
    71  		if cp == utf8.RuneError && width < 3 {
    72  			failed++
    73  			cp = '?'
    74  		}
    75  		src = src[width:]
    76  
    77  		if len(dst)-nDst < 4 {
    78  			newDst := make([]byte, len(dst)*2)
    79  			copy(newDst, dst[:nDst])
    80  			dst = newDst
    81  		}
    82  
    83  		w := dstCharset.EncodeRune(dst[nDst:], cp)
    84  		if w < 0 {
    85  			failed++
    86  			w = dstCharset.EncodeRune(dst[nDst:], '?')
    87  			if w < 0 {
    88  				break
    89  			}
    90  		}
    91  		nDst += w
    92  	}
    93  
    94  	if failed > 0 {
    95  		return dst[:nDst], failedConversionError(srcCharset, dstCharset, original)
    96  	}
    97  	return dst[:nDst], nil
    98  }
    99  
   100  type Convertible interface {
   101  	Charset
   102  	Convert(dst, src []byte, from Charset) ([]byte, error)
   103  }
   104  
   105  // Convert transforms `src`, encoded with Charset `srcCharset`, and
   106  // changes its encoding so that it becomes encoded with `dstCharset`.
   107  // The result is appended to `dst` if `dst` is not nil; otherwise
   108  // a new byte slice will be allocated to store the result.
   109  func Convert(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) ([]byte, error) {
   110  	if dstCharset.IsSuperset(srcCharset) {
   111  		return src, nil
   112  	}
   113  	if trans, ok := dstCharset.(Convertible); ok {
   114  		return trans.Convert(dst, src, srcCharset)
   115  	}
   116  	switch srcCharset.(type) {
   117  	case Charset_binary:
   118  		return ConvertFromBinary(dst, dstCharset, src)
   119  	case Charset_utf8mb3, Charset_utf8mb4:
   120  		return convertFastFromUTF8(dst, dstCharset, src)
   121  	default:
   122  		return convertSlow(dst, dstCharset, src, srcCharset)
   123  	}
   124  }
   125  
   126  func ConvertFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, error) {
   127  	return Convert(dst, dstCharset, src, Charset_utf8mb4{})
   128  }
   129  
   130  func ConvertFromBinary(dst []byte, dstCharset Charset, src []byte) ([]byte, error) {
   131  	switch dstCharset.(type) {
   132  	case Charset_utf16, Charset_utf16le, Charset_ucs2:
   133  		if len(src)%2 == 1 {
   134  			dst = append(dst, 0)
   135  		}
   136  	case Charset_utf32:
   137  		// TODO: it doesn't look like mysql pads binary for 4-byte encodings
   138  	}
   139  	if dst == nil {
   140  		dst = src
   141  	} else {
   142  		dst = append(dst, src...)
   143  	}
   144  	if !Validate(dstCharset, dst) {
   145  		return nil, failedConversionError(&Charset_binary{}, dstCharset, src)
   146  	}
   147  	return dst, nil
   148  }