github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/soliton/collate/collate.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package defCauslate
    15  
    16  import (
    17  	"sort"
    18  	"sync/atomic"
    19  
    20  	"github.com/whtcorpsinc/errors"
    21  	"github.com/whtcorpsinc/BerolinaSQL/charset"
    22  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    23  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    24  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    25  	"go.uber.org/zap"
    26  )
    27  
    28  var (
    29  	newDefCauslatorMap      map[string]DefCauslator
    30  	newDefCauslatorIDMap    map[int]DefCauslator
    31  	newDefCauslationEnabled int32
    32  
    33  	// binDefCauslatorInstance is a singleton used for all defCauslations when newDefCauslationEnabled is false.
    34  	binDefCauslatorInstance = &binDefCauslator{}
    35  
    36  	// ErrUnsupportedDefCauslation is returned when an unsupported defCauslation is specified.
    37  	ErrUnsupportedDefCauslation = terror.ClassDBS.New(allegrosql.ErrUnknownDefCauslation, "Unsupported defCauslation when new defCauslation is enabled: '%-.64s'")
    38  	// ErrIllegalMixDefCauslation is returned when illegal mix of defCauslations.
    39  	ErrIllegalMixDefCauslation = terror.ClassExpression.New(allegrosql.ErrCantAggregateNdefCauslations, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrCantAggregateNdefCauslations])
    40  	// ErrIllegalMix2DefCauslation is returned when illegal mix of 2 defCauslations.
    41  	ErrIllegalMix2DefCauslation = terror.ClassExpression.New(allegrosql.ErrCantAggregate2defCauslations, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrCantAggregate2defCauslations])
    42  	// ErrIllegalMix3DefCauslation is returned when illegal mix of 3 defCauslations.
    43  	ErrIllegalMix3DefCauslation = terror.ClassExpression.New(allegrosql.ErrCantAggregate3defCauslations, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrCantAggregate3defCauslations])
    44  )
    45  
    46  // DefaultLen is set for causet if the string causet don't know its length.
    47  const (
    48  	DefaultLen = 0
    49  )
    50  
    51  // DefCauslator provides functionality for comparing strings for a given
    52  // defCauslation order.
    53  type DefCauslator interface {
    54  	// Compare returns an integer comparing the two strings. The result will be 0 if a == b, -1 if a < b, and +1 if a > b.
    55  	Compare(a, b string) int
    56  	// Key returns the defCauslate key for str. If the defCauslation is padding, make sure the PadLen >= len(rune[]str) in opt.
    57  	Key(str string) []byte
    58  	// Pattern get a defCauslation-aware WildcardPattern.
    59  	Pattern() WildcardPattern
    60  }
    61  
    62  // WildcardPattern is the interface used for wildcard pattern match.
    63  type WildcardPattern interface {
    64  	// Compile compiles the patternStr with specified escape character.
    65  	Compile(patternStr string, escape byte)
    66  	// DoMatch tries to match the str with compiled pattern, `Compile()` must be called before calling it.
    67  	DoMatch(str string) bool
    68  }
    69  
    70  // EnableNewDefCauslations enables the new defCauslation.
    71  func EnableNewDefCauslations() {
    72  	SetNewDefCauslationEnabledForTest(true)
    73  }
    74  
    75  // SetNewDefCauslationEnabledForTest sets if the new defCauslation are enabled in test.
    76  // Note: Be careful to use this function, if this functions is used in tests, make sure the tests are serial.
    77  func SetNewDefCauslationEnabledForTest(flag bool) {
    78  	if flag {
    79  		atomic.StoreInt32(&newDefCauslationEnabled, 1)
    80  		return
    81  	}
    82  	atomic.StoreInt32(&newDefCauslationEnabled, 0)
    83  }
    84  
    85  // NewDefCauslationEnabled returns if the new defCauslations are enabled.
    86  func NewDefCauslationEnabled() bool {
    87  	return atomic.LoadInt32(&newDefCauslationEnabled) == 1
    88  }
    89  
    90  // CompatibleDefCauslate checks whether the two defCauslate are the same.
    91  func CompatibleDefCauslate(defCauslate1, defCauslate2 string) bool {
    92  	if (defCauslate1 == "utf8mb4_general_ci" || defCauslate1 == "utf8_general_ci") && (defCauslate2 == "utf8mb4_general_ci" || defCauslate2 == "utf8_general_ci") {
    93  		return true
    94  	} else if (defCauslate1 == "utf8mb4_bin" || defCauslate1 == "utf8_bin") && (defCauslate2 == "utf8mb4_bin" || defCauslate2 == "utf8_bin") {
    95  		return true
    96  	} else if (defCauslate1 == "utf8mb4_unicode_ci" || defCauslate1 == "utf8_unicode_ci") && (defCauslate2 == "utf8mb4_unicode_ci" || defCauslate2 == "utf8_unicode_ci") {
    97  		return true
    98  	} else {
    99  		return defCauslate1 == defCauslate2
   100  	}
   101  }
   102  
   103  // RewriteNewDefCauslationIDIfNeeded rewrites a defCauslation id if the new defCauslations are enabled.
   104  // When new defCauslations are enabled, we turn the defCauslation id to negative so that other the
   105  // components of the cluster(for example, EinsteinDB) is able to aware of it without any change to
   106  // the protodefCaus definition.
   107  // When new defCauslations are not enabled, defCauslation id remains the same.
   108  func RewriteNewDefCauslationIDIfNeeded(id int32) int32 {
   109  	if atomic.LoadInt32(&newDefCauslationEnabled) == 1 {
   110  		if id < 0 {
   111  			logutil.BgLogger().Warn("Unexpected negative defCauslation ID for rewrite.", zap.Int32("ID", id))
   112  		} else {
   113  			return -id
   114  		}
   115  	}
   116  	return id
   117  }
   118  
   119  // RestoreDefCauslationIDIfNeeded restores a defCauslation id if the new defCauslations are enabled.
   120  func RestoreDefCauslationIDIfNeeded(id int32) int32 {
   121  	if atomic.LoadInt32(&newDefCauslationEnabled) == 1 {
   122  		if id > 0 {
   123  			logutil.BgLogger().Warn("Unexpected positive defCauslation ID for restore.", zap.Int32("ID", id))
   124  		} else {
   125  			return -id
   126  		}
   127  	}
   128  	return id
   129  }
   130  
   131  // GetDefCauslator get the defCauslator according to defCauslate, it will return the binary defCauslator if the corresponding defCauslator doesn't exist.
   132  func GetDefCauslator(defCauslate string) DefCauslator {
   133  	if atomic.LoadInt32(&newDefCauslationEnabled) == 1 {
   134  		ctor, ok := newDefCauslatorMap[defCauslate]
   135  		if !ok {
   136  			logutil.BgLogger().Warn(
   137  				"Unable to get defCauslator by name, use binDefCauslator instead.",
   138  				zap.String("name", defCauslate),
   139  				zap.Stack("stack"))
   140  			return newDefCauslatorMap["utf8mb4_bin"]
   141  		}
   142  		return ctor
   143  	}
   144  	return binDefCauslatorInstance
   145  }
   146  
   147  // GetDefCauslatorByID get the defCauslator according to id, it will return the binary defCauslator if the corresponding defCauslator doesn't exist.
   148  func GetDefCauslatorByID(id int) DefCauslator {
   149  	if atomic.LoadInt32(&newDefCauslationEnabled) == 1 {
   150  		ctor, ok := newDefCauslatorIDMap[id]
   151  		if !ok {
   152  			logutil.BgLogger().Warn(
   153  				"Unable to get defCauslator by ID, use binDefCauslator instead.",
   154  				zap.Int("ID", id),
   155  				zap.Stack("stack"))
   156  			return newDefCauslatorMap["utf8mb4_bin"]
   157  		}
   158  		return ctor
   159  	}
   160  	return binDefCauslatorInstance
   161  }
   162  
   163  // DefCauslationID2Name return the defCauslation name by the given id.
   164  // If the id is not found in the map, the default defCauslation is returned.
   165  func DefCauslationID2Name(id int32) string {
   166  	name, ok := allegrosql.DefCauslations[uint8(id)]
   167  	if !ok {
   168  		// TODO(bb7133): fix repeating logs when the following code is uncommented.
   169  		//logutil.BgLogger().Warn(
   170  		//	"Unable to get defCauslation name from ID, use default defCauslation instead.",
   171  		//	zap.Int32("ID", id),
   172  		//	zap.Stack("stack"))
   173  		return allegrosql.DefaultDefCauslationName
   174  	}
   175  	return name
   176  }
   177  
   178  // GetDefCauslationByName wraps charset.GetDefCauslationByName, it checks the defCauslation.
   179  func GetDefCauslationByName(name string) (defCausl *charset.DefCauslation, err error) {
   180  	if defCausl, err = charset.GetDefCauslationByName(name); err != nil {
   181  		return nil, errors.Trace(err)
   182  	}
   183  	if atomic.LoadInt32(&newDefCauslationEnabled) == 1 {
   184  		if _, ok := newDefCauslatorIDMap[defCausl.ID]; !ok {
   185  			return nil, ErrUnsupportedDefCauslation.GenWithStackByArgs(name)
   186  		}
   187  	}
   188  	return
   189  }
   190  
   191  // GetSupportedDefCauslations gets information for all defCauslations supported so far.
   192  func GetSupportedDefCauslations() []*charset.DefCauslation {
   193  	if atomic.LoadInt32(&newDefCauslationEnabled) == 1 {
   194  		newSupportedDefCauslations := make([]*charset.DefCauslation, 0, len(newDefCauslatorMap))
   195  		for name := range newDefCauslatorMap {
   196  			if defCausl, err := charset.GetDefCauslationByName(name); err != nil {
   197  				// Should never happens.
   198  				terror.Log(err)
   199  			} else {
   200  				newSupportedDefCauslations = append(newSupportedDefCauslations, defCausl)
   201  			}
   202  		}
   203  		sort.Slice(newSupportedDefCauslations, func(i int, j int) bool {
   204  			return newSupportedDefCauslations[i].Name < newSupportedDefCauslations[j].Name
   205  		})
   206  		return newSupportedDefCauslations
   207  	}
   208  	return charset.GetSupportedDefCauslations()
   209  }
   210  
   211  func truncateTailingSpace(str string) string {
   212  	byteLen := len(str)
   213  	i := byteLen - 1
   214  	for ; i >= 0; i-- {
   215  		if str[i] != ' ' {
   216  			break
   217  		}
   218  	}
   219  	str = str[:i+1]
   220  	return str
   221  }
   222  
   223  // IsCIDefCauslation returns if the defCauslation is case-sensitive
   224  func IsCIDefCauslation(defCauslate string) bool {
   225  	return defCauslate == "utf8_general_ci" || defCauslate == "utf8mb4_general_ci" ||
   226  		defCauslate == "utf8_unicode_ci" || defCauslate == "utf8mb4_unicode_ci"
   227  }
   228  
   229  func init() {
   230  	newDefCauslatorMap = make(map[string]DefCauslator)
   231  	newDefCauslatorIDMap = make(map[int]DefCauslator)
   232  
   233  	newDefCauslatorMap["binary"] = &binDefCauslator{}
   234  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["binary"])] = &binDefCauslator{}
   235  	newDefCauslatorMap["ascii_bin"] = &binPaddingDefCauslator{}
   236  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["ascii_bin"])] = &binPaddingDefCauslator{}
   237  	newDefCauslatorMap["latin1_bin"] = &binPaddingDefCauslator{}
   238  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["latin1_bin"])] = &binPaddingDefCauslator{}
   239  	newDefCauslatorMap["utf8mb4_bin"] = &binPaddingDefCauslator{}
   240  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8mb4_bin"])] = &binPaddingDefCauslator{}
   241  	newDefCauslatorMap["utf8_bin"] = &binPaddingDefCauslator{}
   242  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8_bin"])] = &binPaddingDefCauslator{}
   243  	newDefCauslatorMap["utf8mb4_general_ci"] = &generalCIDefCauslator{}
   244  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8mb4_general_ci"])] = &generalCIDefCauslator{}
   245  	newDefCauslatorMap["utf8_general_ci"] = &generalCIDefCauslator{}
   246  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8_general_ci"])] = &generalCIDefCauslator{}
   247  	newDefCauslatorMap["utf8mb4_unicode_ci"] = &unicodeCIDefCauslator{}
   248  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8mb4_unicode_ci"])] = &unicodeCIDefCauslator{}
   249  	newDefCauslatorMap["utf8_unicode_ci"] = &unicodeCIDefCauslator{}
   250  	newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8_unicode_ci"])] = &unicodeCIDefCauslator{}
   251  }