github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/dbs/memristed/memex/collation.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package memex
    15  
    16  import (
    17  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    18  	"github.com/whtcorpsinc/BerolinaSQL/charset"
    19  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    20  	"github.com/whtcorpsinc/milevadb/types"
    21  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    22  )
    23  
    24  type defCauslationInfo struct {
    25  	coer     Coercibility
    26  	coerInit bool
    27  
    28  	charset   string
    29  	defCauslation string
    30  	flen      int
    31  }
    32  
    33  func (c *defCauslationInfo) HasCoercibility() bool {
    34  	return c.coerInit
    35  }
    36  
    37  func (c *defCauslationInfo) Coercibility() Coercibility {
    38  	return c.coer
    39  }
    40  
    41  // SetCoercibility implements DefCauslationInfo SetCoercibility interface.
    42  func (c *defCauslationInfo) SetCoercibility(val Coercibility) {
    43  	c.coer = val
    44  	c.coerInit = true
    45  }
    46  
    47  func (c *defCauslationInfo) SetCharsetAndDefCauslation(chs, defCausl string) {
    48  	c.charset, c.defCauslation = chs, defCausl
    49  }
    50  
    51  func (c *defCauslationInfo) CharsetAndDefCauslation(ctx stochastikctx.Context) (string, string) {
    52  	if c.charset != "" || c.defCauslation != "" {
    53  		return c.charset, c.defCauslation
    54  	}
    55  
    56  	if ctx != nil && ctx.GetStochastikVars() != nil {
    57  		c.charset, c.defCauslation = ctx.GetStochastikVars().GetCharsetInfo()
    58  	}
    59  	if c.charset == "" || c.defCauslation == "" {
    60  		c.charset, c.defCauslation = charset.GetDefaultCharsetAndDefCauslate()
    61  	}
    62  	c.flen = types.UnspecifiedLength
    63  	return c.charset, c.defCauslation
    64  }
    65  
    66  // DefCauslationInfo contains all interfaces about dealing with defCauslation.
    67  type DefCauslationInfo interface {
    68  	// HasCoercibility returns if the Coercibility value is initialized.
    69  	HasCoercibility() bool
    70  
    71  	// Coercibility returns the coercibility value which is used to check defCauslations.
    72  	Coercibility() Coercibility
    73  
    74  	// SetCoercibility sets a specified coercibility for this memex.
    75  	SetCoercibility(val Coercibility)
    76  
    77  	// CharsetAndDefCauslation ...
    78  	CharsetAndDefCauslation(ctx stochastikctx.Context) (string, string)
    79  
    80  	// SetCharsetAndDefCauslation ...
    81  	SetCharsetAndDefCauslation(chs, defCausl string)
    82  }
    83  
    84  // Coercibility values are used to check whether the defCauslation of one item can be coerced to
    85  // the defCauslation of other. See https://dev.allegrosql.com/doc/refman/8.0/en/charset-defCauslation-coercibility.html
    86  type Coercibility int
    87  
    88  const (
    89  	// CoercibilityExplicit is derived from an explicit COLLATE clause.
    90  	CoercibilityExplicit Coercibility = 0
    91  	// CoercibilityNone is derived from the concatenation of two strings with different defCauslations.
    92  	CoercibilityNone Coercibility = 1
    93  	// CoercibilityImplicit is derived from a defCausumn or a stored routine parameter or local variable.
    94  	CoercibilityImplicit Coercibility = 2
    95  	// CoercibilitySysconst is derived from a “system constant” (the string returned by functions such as USER() or VERSION()).
    96  	CoercibilitySysconst Coercibility = 3
    97  	// CoercibilityCoercible is derived from a literal.
    98  	CoercibilityCoercible Coercibility = 4
    99  	// CoercibilityNumeric is derived from a numeric or temporal value.
   100  	CoercibilityNumeric Coercibility = 5
   101  	// CoercibilityIgnorable is derived from NULL or an memex that is derived from NULL.
   102  	CoercibilityIgnorable Coercibility = 6
   103  )
   104  
   105  var (
   106  	sysConstFuncs = map[string]struct{}{
   107  		ast.User:        {},
   108  		ast.Version:     {},
   109  		ast.Database:    {},
   110  		ast.CurrentRole: {},
   111  		ast.CurrentUser: {},
   112  	}
   113  
   114  	// defCauslationPriority is the priority when infer the result defCauslation, the priority of defCauslation a > b iff defCauslationPriority[a] > defCauslationPriority[b]
   115  	// defCauslation a and b are incompatible if defCauslationPriority[a] = defCauslationPriority[b]
   116  	defCauslationPriority = map[string]int{
   117  		charset.DefCauslationASCII:   1,
   118  		charset.DefCauslationLatin1:  2,
   119  		"utf8_general_ci":        3,
   120  		"utf8_unicode_ci":        3,
   121  		charset.DefCauslationUTF8:    4,
   122  		"utf8mb4_general_ci":     5,
   123  		"utf8mb4_unicode_ci":     5,
   124  		charset.DefCauslationUTF8MB4: 6,
   125  		charset.DefCauslationBin:     7,
   126  	}
   127  
   128  	// DefCauslationStrictnessGroup group defCauslation by strictness
   129  	DefCauslationStrictnessGroup = map[string]int{
   130  		"utf8_general_ci":        1,
   131  		"utf8mb4_general_ci":     1,
   132  		"utf8_unicode_ci":        2,
   133  		"utf8mb4_unicode_ci":     2,
   134  		charset.DefCauslationASCII:   3,
   135  		charset.DefCauslationLatin1:  3,
   136  		charset.DefCauslationUTF8:    3,
   137  		charset.DefCauslationUTF8MB4: 3,
   138  		charset.DefCauslationBin:     4,
   139  	}
   140  
   141  	// DefCauslationStrictness indicates the strictness of comparison of the defCauslation. The unequal order in a weak defCauslation also holds in a strict defCauslation.
   142  	// For example, if a != b in a weak defCauslation(e.g. general_ci), then there must be a != b in a strict defCauslation(e.g. _bin).
   143  	// defCauslation group id in value is stricter than defCauslation group id in key
   144  	DefCauslationStrictness = map[int][]int{
   145  		1: {3, 4},
   146  		2: {3, 4},
   147  		3: {4},
   148  		4: {},
   149  	}
   150  )
   151  
   152  func deriveCoercibilityForScarlarFunc(sf *ScalarFunction) Coercibility {
   153  	if _, ok := sysConstFuncs[sf.FuncName.L]; ok {
   154  		return CoercibilitySysconst
   155  	}
   156  	if !types.IsString(sf.RetType.Tp) {
   157  		return CoercibilityNumeric
   158  	}
   159  
   160  	_, _, coer, _ := inferDefCauslation(sf.GetArgs()...)
   161  
   162  	// it is weird if a ScalarFunction is CoercibilityNumeric but return string type
   163  	if coer == CoercibilityNumeric {
   164  		return CoercibilityCoercible
   165  	}
   166  
   167  	return coer
   168  }
   169  
   170  func deriveCoercibilityForConstant(c *Constant) Coercibility {
   171  	if c.Value.IsNull() {
   172  		return CoercibilityIgnorable
   173  	} else if !types.IsString(c.RetType.Tp) {
   174  		return CoercibilityNumeric
   175  	}
   176  	return CoercibilityCoercible
   177  }
   178  
   179  func deriveCoercibilityForDeferredCauset(c *DeferredCauset) Coercibility {
   180  	if !types.IsString(c.RetType.Tp) {
   181  		return CoercibilityNumeric
   182  	}
   183  	return CoercibilityImplicit
   184  }
   185  
   186  // DeriveDefCauslationFromExprs derives defCauslation information from these memexs.
   187  func DeriveDefCauslationFromExprs(ctx stochastikctx.Context, exprs ...Expression) (dstCharset, dstDefCauslation string) {
   188  	dstDefCauslation, dstCharset, _, _ = inferDefCauslation(exprs...)
   189  	return
   190  }
   191  
   192  // inferDefCauslation infers defCauslation, charset, coercibility and check the legitimacy.
   193  func inferDefCauslation(exprs ...Expression) (dstDefCauslation, dstCharset string, coercibility Coercibility, legal bool) {
   194  	firstExplicitDefCauslation := ""
   195  	coercibility = CoercibilityIgnorable
   196  	dstCharset, dstDefCauslation = charset.GetDefaultCharsetAndDefCauslate()
   197  	for _, arg := range exprs {
   198  		if arg.Coercibility() == CoercibilityExplicit {
   199  			if firstExplicitDefCauslation == "" {
   200  				firstExplicitDefCauslation = arg.GetType().DefCauslate
   201  				coercibility, dstDefCauslation, dstCharset = CoercibilityExplicit, arg.GetType().DefCauslate, arg.GetType().Charset
   202  			} else if firstExplicitDefCauslation != arg.GetType().DefCauslate {
   203  				return "", "", CoercibilityIgnorable, false
   204  			}
   205  		} else if arg.Coercibility() < coercibility {
   206  			coercibility, dstDefCauslation, dstCharset = arg.Coercibility(), arg.GetType().DefCauslate, arg.GetType().Charset
   207  		} else if arg.Coercibility() == coercibility && dstDefCauslation != arg.GetType().DefCauslate {
   208  			p1 := defCauslationPriority[dstDefCauslation]
   209  			p2 := defCauslationPriority[arg.GetType().DefCauslate]
   210  
   211  			// same priority means this two defCauslation is incompatible, coercibility might derive to CoercibilityNone
   212  			if p1 == p2 {
   213  				coercibility, dstDefCauslation, dstCharset = CoercibilityNone, getBinDefCauslation(arg.GetType().Charset), arg.GetType().Charset
   214  			} else if p1 < p2 {
   215  				dstDefCauslation, dstCharset = arg.GetType().DefCauslate, arg.GetType().Charset
   216  			}
   217  		}
   218  	}
   219  
   220  	return dstDefCauslation, dstCharset, coercibility, true
   221  }
   222  
   223  // getBinDefCauslation get binary defCauslation by charset
   224  func getBinDefCauslation(cs string) string {
   225  	switch cs {
   226  	case charset.CharsetUTF8:
   227  		return charset.DefCauslationUTF8
   228  	case charset.CharsetUTF8MB4:
   229  		return charset.DefCauslationUTF8MB4
   230  	}
   231  
   232  	logutil.BgLogger().Error("unexpected charset " + cs)
   233  	// it must return something, never reachable
   234  	return charset.DefCauslationUTF8MB4
   235  }