vitess.io/vitess@v0.16.2/go/vt/vtgate/vindexes/cfc.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vindexes
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/json"
    23  
    24  	"vitess.io/vitess/go/sqltypes"
    25  	"vitess.io/vitess/go/vt/key"
    26  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    27  	"vitess.io/vitess/go/vt/proto/vtrpc"
    28  	"vitess.io/vitess/go/vt/vterrors"
    29  )
    30  
    31  // CFC is Concatenated Fixed-width Composite Vindex.
    32  //
    33  // The purpose of this vindex is to shard the rows based on the prefix of
    34  // sharding key. Imagine the sharding key is defined as (s1, s2, ... sN), a
    35  // prefix of this key is (s1, s2, ... sj) (j <= N). This vindex puts the rows
    36  // with the same prefix among a same group of shards instead of scatter them
    37  // around all the shards. The benefit of doing so is that prefix queries will
    38  // only fanout to a subset of shards instead of all the shards. Specifically
    39  // this vindex maps the full key, i.e. (s1, s2, ... sN) to a
    40  // `key.DestinationKeyspaceID` and the prefix of it, i.e. (s1, s2, ... sj)(j<N)
    41  // to a `key.DestinationKeyRange`. Note that the prefix to key range mapping is
    42  // only active in 'LIKE' expression. When a column with CFC defined appears in
    43  // other expressions, e.g. =, !=, IN etc, it behaves exactly as other
    44  // functional unique vindexes.
    45  //
    46  // This provides the capability to model hierarchical data models. If we
    47  // consider the prefix as the 'parent' key and the full key as the 'child' key,
    48  // all the child data is clustered within the same group of shards identified
    49  // by the 'parent' key.
    50  //
    51  // Due to the prevalance of using `vindexes.SingleColumn` in vindexes, it's way
    52  // more complex to implement a true multi-column composite index (see github
    53  // issue) than to implement it using a single column vindex where the
    54  // components of the composite keys are concatenated together to form a single
    55  // key. The user can use this single key directly as the keyspace id; one can
    56  // also define a hash function so that the keyspace id is the concatenation of
    57  // hash(s1), hash(s2), ... hash(sN). Using the concatenated key directly makes
    58  // it easier to reason the fanout but the data distribution depends on the key
    59  // itself; while using the hash on components takes care of the randomness of
    60  // the data distribution.
    61  //
    62  // Since the vindex is on a concatenated key, the offsets into the key are the
    63  // only way to mark its components. Thus it implicitly requires each component
    64  // to have a fixed width, except the last one. It's especially true when hash
    65  // is defined. Because the hash is calculated component by component, only the
    66  // prefix that aligns with the component boundary can be used to compute the
    67  // key range. Although the misaligned part doesn't participate the key range
    68  // calculation, the SQL executed on each shard uses the unchanged prefix; thus
    69  // the behavior is exactly same as other vindex's but just more efficient in
    70  // controlling the fanout.
    71  //
    72  // # The expected format of the vindex definition is
    73  //
    74  //	"vindexes": {
    75  //	  "cfc_md5": {
    76  //	    "type": "cfc",
    77  //	    "params": {
    78  //	      "hash": "md5",
    79  //	      "offsets": "[2,4]"
    80  //	    }
    81  //	  }
    82  //	}
    83  //
    84  // 'offsets' only makes sense when hash is used. Offsets should be a sorted
    85  // list of positive ints, each of which denotes the byte offset (from the
    86  // beginning of key) of each component's boundary in the concatenated key.
    87  // Specifically, offsets[0] is the byte offset of the first component,
    88  // offsets[1] is the byte offset of the second component, etc.
    89  type CFC struct {
    90  	// CFC is used in all compare expressions other than 'LIKE'.
    91  	*cfcCommon
    92  	// prefixCFC is only used in 'LIKE' compare expressions.
    93  	prefixCFC *prefixCFC
    94  }
    95  
    96  type cfcCommon struct {
    97  	name    string
    98  	hash    func([]byte) []byte
    99  	offsets []int
   100  }
   101  
   102  // NewCFC creates a new CFC vindex
   103  func NewCFC(name string, params map[string]string) (Vindex, error) {
   104  	ss := &cfcCommon{
   105  		name: name,
   106  	}
   107  	cfc := &CFC{
   108  		cfcCommon: ss,
   109  		prefixCFC: &prefixCFC{cfcCommon: ss},
   110  	}
   111  
   112  	if params == nil {
   113  		return cfc, nil
   114  	}
   115  
   116  	switch h := params["hash"]; h {
   117  	case "":
   118  		return cfc, nil
   119  	case "md5":
   120  		ss.hash = md5hash
   121  	case "xxhash64":
   122  		ss.hash = xxhash64
   123  	default:
   124  		return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "invalid hash %s to CFC vindex %s", h, name)
   125  	}
   126  
   127  	var offsets []int
   128  	if p := params["offsets"]; p == "" {
   129  		return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "CFC vindex requires offsets when hash is defined")
   130  	} else if err := json.Unmarshal([]byte(p), &offsets); err != nil || !validOffsets(offsets) {
   131  		return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "invalid offsets %s to CFC vindex %s. expected sorted positive ints in brackets", p, name)
   132  	}
   133  	// remove duplicates
   134  	prev := -1
   135  	for _, off := range offsets {
   136  		if off != prev {
   137  			ss.offsets = append(ss.offsets, off)
   138  		}
   139  		prev = off
   140  	}
   141  
   142  	return cfc, nil
   143  }
   144  
   145  func validOffsets(offsets []int) bool {
   146  	n := len(offsets)
   147  	if n == 0 {
   148  		return false
   149  	}
   150  	if offsets[0] <= 0 {
   151  		return false
   152  	}
   153  
   154  	for i := 1; i < n; i++ {
   155  		if offsets[i] <= offsets[i-1] {
   156  			return false
   157  		}
   158  	}
   159  	return true
   160  }
   161  
   162  func (vind *CFC) String() string {
   163  	return vind.name
   164  }
   165  
   166  // Cost returns the cost as 1. In regular mode, i.e. not in a LIKE op, CFC has
   167  // pretty much the same cost as other unique vindexes like 'binary', 'md5' etc.
   168  func (vind *CFC) Cost() int {
   169  	return 1
   170  }
   171  
   172  // IsUnique returns true since the Vindex is unique.
   173  func (vind *CFC) IsUnique() bool {
   174  	return true
   175  }
   176  
   177  // NeedsVCursor satisfies the Vindex interface.
   178  func (vind *CFC) NeedsVCursor() bool {
   179  	return false
   180  }
   181  
   182  // computeKsid returns the corresponding keyspace id of a key.
   183  func (vind *cfcCommon) computeKsid(v []byte, prefix bool) ([]byte, error) {
   184  
   185  	if vind.hash == nil {
   186  		return v, nil
   187  	}
   188  	n := len(v)
   189  	m := len(vind.offsets)
   190  	// if we are not working on a prefix, the key has to have all the components,
   191  	// that is, it has to be longer than the largest offset.
   192  	if !prefix && n < vind.offsets[m-1] {
   193  		return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "insufficient size for cfc vindex %s. need %d, got %d", vind.name, vind.offsets[m-1], n)
   194  	}
   195  	prev := 0
   196  	offset := 0
   197  	buf := new(bytes.Buffer)
   198  	for _, offset = range vind.offsets {
   199  		if n < offset {
   200  			// the given prefix doesn't align with the component boundaries,
   201  			// return the hashed prefix that's currently available
   202  			return buf.Bytes(), nil
   203  		}
   204  
   205  		if _, err := buf.Write(vind.hash(v[prev:offset])); err != nil {
   206  			return nil, err
   207  		}
   208  		prev = offset
   209  	}
   210  	if offset < n {
   211  		if _, err := buf.Write(vind.hash(v[offset:n])); err != nil {
   212  			return nil, err
   213  		}
   214  	}
   215  	return buf.Bytes(), nil
   216  }
   217  
   218  func (vind *cfcCommon) verify(ids []sqltypes.Value, ksids [][]byte) ([]bool, error) {
   219  	out := make([]bool, len(ids))
   220  	for i := range ids {
   221  		idBytes, err := ids[i].ToBytes()
   222  		if err != nil {
   223  			return out, err
   224  		}
   225  		v, err := vind.computeKsid(idBytes, false)
   226  		if err != nil {
   227  			return nil, err
   228  		}
   229  		out[i] = bytes.Equal(v, ksids[i])
   230  	}
   231  	return out, nil
   232  }
   233  
   234  // Verify returns true if ids maps to ksids.
   235  func (vind *CFC) Verify(_ context.Context, _ VCursor, ids []sqltypes.Value, ksids [][]byte) ([]bool, error) {
   236  	return vind.verify(ids, ksids)
   237  }
   238  
   239  // Map can map ids to key.Destination objects.
   240  func (vind *CFC) Map(_ context.Context, _ VCursor, ids []sqltypes.Value) ([]key.Destination, error) {
   241  	out := make([]key.Destination, len(ids))
   242  	for i, id := range ids {
   243  		idBytes, err := id.ToBytes()
   244  		if err != nil {
   245  			return out, err
   246  		}
   247  		v, err := vind.computeKsid(idBytes, false)
   248  		if err != nil {
   249  			return nil, err
   250  		}
   251  		out[i] = key.DestinationKeyspaceID(v)
   252  	}
   253  	return out, nil
   254  }
   255  
   256  // PrefixVindex switches the vindex to prefix mode
   257  func (vind *CFC) PrefixVindex() SingleColumn {
   258  	return vind.prefixCFC
   259  }
   260  
   261  // NewKeyRangeFromPrefix creates a keyspace range from a prefix of keyspace id.
   262  func NewKeyRangeFromPrefix(begin []byte) key.Destination {
   263  	if len(begin) == 0 {
   264  		return key.DestinationAllShards{}
   265  	}
   266  	// the prefix maps to a keyspace range corresponding to its value and plus one.
   267  	// that is [ keyspace_id, keyspace_id + 1 ).
   268  	end := make([]byte, len(begin))
   269  	copy(end, begin)
   270  	end = addOne(end)
   271  	return key.DestinationKeyRange{
   272  		KeyRange: &topodatapb.KeyRange{
   273  			Start: begin,
   274  			End:   end,
   275  		},
   276  	}
   277  }
   278  
   279  func addOne(value []byte) []byte {
   280  	n := len(value)
   281  	overflow := true
   282  	for i := n - 1; i >= 0; i-- {
   283  		if value[i] < 255 {
   284  			value[i]++
   285  			overflow = false
   286  			break
   287  		} else {
   288  			value[i] = 0
   289  		}
   290  	}
   291  	if overflow {
   292  		return nil
   293  	}
   294  	return value
   295  }
   296  
   297  type prefixCFC struct {
   298  	*cfcCommon
   299  }
   300  
   301  func (vind *prefixCFC) String() string {
   302  	return vind.name
   303  }
   304  
   305  func (vind *prefixCFC) NeedsVCursor() bool {
   306  	return false
   307  }
   308  
   309  func (vind *prefixCFC) Verify(_ context.Context, _ VCursor, ids []sqltypes.Value, ksids [][]byte) ([]bool, error) {
   310  	return vind.verify(ids, ksids)
   311  }
   312  
   313  // In prefix mode, i.e. within a LIKE op, the cost is higher than regular mode.
   314  // Ideally the cost should be the number of shards we resolved to but the current
   315  // framework doesn't do dynamic cost evaluation.
   316  func (vind *prefixCFC) Cost() int {
   317  	if n := len(vind.offsets); n > 0 {
   318  		return n
   319  	}
   320  	return 2
   321  }
   322  
   323  func (vind *prefixCFC) IsUnique() bool {
   324  	return false
   325  }
   326  
   327  // Map can map ids to key.Destination objects.
   328  func (vind *prefixCFC) Map(_ context.Context, _ VCursor, ids []sqltypes.Value) ([]key.Destination, error) {
   329  	out := make([]key.Destination, len(ids))
   330  	for i, id := range ids {
   331  		value, err := id.ToBytes()
   332  		if err != nil {
   333  			return out, err
   334  		}
   335  		prefix := findPrefix(value)
   336  		begin, err := vind.computeKsid(prefix, true)
   337  		if err != nil {
   338  			return nil, err
   339  		}
   340  		out[i] = NewKeyRangeFromPrefix(begin)
   341  	}
   342  	return out, nil
   343  }
   344  
   345  // findPrefix returns the 'prefix' of the string literal in LIKE expression.
   346  // The prefix is the prefix of the string literal up until the first unescaped
   347  // meta character (% and _). Other escape sequences are escaped according to
   348  // https://dev.mysql.com/doc/refman/8.0/en/string-literals.html.
   349  func findPrefix(str []byte) []byte {
   350  	buf := new(bytes.Buffer)
   351  L:
   352  	for len(str) > 0 {
   353  		n := len(str)
   354  		p := bytes.IndexAny(str, `%_\`)
   355  		if p < 0 {
   356  			buf.Write(str)
   357  			break
   358  		}
   359  		buf.Write(str[:p])
   360  		switch str[p] {
   361  		case '%', '_':
   362  			// prefix found
   363  			break L
   364  		// The following is not very efficient in dealing with too many
   365  		// continuous backslash characters, e.g. '\\\\\\\\\\\\\%', but
   366  		// hopefully it's the less common case.
   367  		case '\\':
   368  			if p == n-1 {
   369  				// backslash is the very last character of a string, typically
   370  				// this is an invalid string argument. We write the backslash
   371  				// anyway because Mysql can deal with it.
   372  				buf.WriteByte(str[p])
   373  				break L
   374  			} else if decoded := sqltypes.SQLDecodeMap[str[p+1]]; decoded != sqltypes.DontEscape {
   375  				buf.WriteByte(decoded)
   376  			} else {
   377  				buf.WriteByte(str[p+1])
   378  			}
   379  			str = str[(p + 2):n]
   380  		}
   381  	}
   382  	return buf.Bytes()
   383  }
   384  
   385  // we don't use the full hashed value because it's very long.
   386  // keyrange resolution is done via comparing []byte so longer
   387  // keyspace ids have performance impact.
   388  func md5hash(in []byte) []byte {
   389  	n := len(in)
   390  	out := vMD5Hash(in)
   391  	if n < len(out) {
   392  		return out[:n]
   393  	}
   394  	return out
   395  
   396  }
   397  
   398  // same here
   399  func xxhash64(in []byte) []byte {
   400  	out := vXXHash(in)
   401  	n := len(in)
   402  	if n < len(out) {
   403  		return out[:n]
   404  	}
   405  	return out
   406  }
   407  
   408  func init() {
   409  	Register("cfc", NewCFC)
   410  }