vitess.io/vitess@v0.16.2/go/vt/vtgate/vindexes/cfc.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vindexes 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/json" 23 24 "vitess.io/vitess/go/sqltypes" 25 "vitess.io/vitess/go/vt/key" 26 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 27 "vitess.io/vitess/go/vt/proto/vtrpc" 28 "vitess.io/vitess/go/vt/vterrors" 29 ) 30 31 // CFC is Concatenated Fixed-width Composite Vindex. 32 // 33 // The purpose of this vindex is to shard the rows based on the prefix of 34 // sharding key. Imagine the sharding key is defined as (s1, s2, ... sN), a 35 // prefix of this key is (s1, s2, ... sj) (j <= N). This vindex puts the rows 36 // with the same prefix among a same group of shards instead of scatter them 37 // around all the shards. The benefit of doing so is that prefix queries will 38 // only fanout to a subset of shards instead of all the shards. Specifically 39 // this vindex maps the full key, i.e. (s1, s2, ... sN) to a 40 // `key.DestinationKeyspaceID` and the prefix of it, i.e. (s1, s2, ... sj)(j<N) 41 // to a `key.DestinationKeyRange`. Note that the prefix to key range mapping is 42 // only active in 'LIKE' expression. When a column with CFC defined appears in 43 // other expressions, e.g. =, !=, IN etc, it behaves exactly as other 44 // functional unique vindexes. 45 // 46 // This provides the capability to model hierarchical data models. If we 47 // consider the prefix as the 'parent' key and the full key as the 'child' key, 48 // all the child data is clustered within the same group of shards identified 49 // by the 'parent' key. 50 // 51 // Due to the prevalance of using `vindexes.SingleColumn` in vindexes, it's way 52 // more complex to implement a true multi-column composite index (see github 53 // issue) than to implement it using a single column vindex where the 54 // components of the composite keys are concatenated together to form a single 55 // key. The user can use this single key directly as the keyspace id; one can 56 // also define a hash function so that the keyspace id is the concatenation of 57 // hash(s1), hash(s2), ... hash(sN). Using the concatenated key directly makes 58 // it easier to reason the fanout but the data distribution depends on the key 59 // itself; while using the hash on components takes care of the randomness of 60 // the data distribution. 61 // 62 // Since the vindex is on a concatenated key, the offsets into the key are the 63 // only way to mark its components. Thus it implicitly requires each component 64 // to have a fixed width, except the last one. It's especially true when hash 65 // is defined. Because the hash is calculated component by component, only the 66 // prefix that aligns with the component boundary can be used to compute the 67 // key range. Although the misaligned part doesn't participate the key range 68 // calculation, the SQL executed on each shard uses the unchanged prefix; thus 69 // the behavior is exactly same as other vindex's but just more efficient in 70 // controlling the fanout. 71 // 72 // # The expected format of the vindex definition is 73 // 74 // "vindexes": { 75 // "cfc_md5": { 76 // "type": "cfc", 77 // "params": { 78 // "hash": "md5", 79 // "offsets": "[2,4]" 80 // } 81 // } 82 // } 83 // 84 // 'offsets' only makes sense when hash is used. Offsets should be a sorted 85 // list of positive ints, each of which denotes the byte offset (from the 86 // beginning of key) of each component's boundary in the concatenated key. 87 // Specifically, offsets[0] is the byte offset of the first component, 88 // offsets[1] is the byte offset of the second component, etc. 89 type CFC struct { 90 // CFC is used in all compare expressions other than 'LIKE'. 91 *cfcCommon 92 // prefixCFC is only used in 'LIKE' compare expressions. 93 prefixCFC *prefixCFC 94 } 95 96 type cfcCommon struct { 97 name string 98 hash func([]byte) []byte 99 offsets []int 100 } 101 102 // NewCFC creates a new CFC vindex 103 func NewCFC(name string, params map[string]string) (Vindex, error) { 104 ss := &cfcCommon{ 105 name: name, 106 } 107 cfc := &CFC{ 108 cfcCommon: ss, 109 prefixCFC: &prefixCFC{cfcCommon: ss}, 110 } 111 112 if params == nil { 113 return cfc, nil 114 } 115 116 switch h := params["hash"]; h { 117 case "": 118 return cfc, nil 119 case "md5": 120 ss.hash = md5hash 121 case "xxhash64": 122 ss.hash = xxhash64 123 default: 124 return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "invalid hash %s to CFC vindex %s", h, name) 125 } 126 127 var offsets []int 128 if p := params["offsets"]; p == "" { 129 return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "CFC vindex requires offsets when hash is defined") 130 } else if err := json.Unmarshal([]byte(p), &offsets); err != nil || !validOffsets(offsets) { 131 return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "invalid offsets %s to CFC vindex %s. expected sorted positive ints in brackets", p, name) 132 } 133 // remove duplicates 134 prev := -1 135 for _, off := range offsets { 136 if off != prev { 137 ss.offsets = append(ss.offsets, off) 138 } 139 prev = off 140 } 141 142 return cfc, nil 143 } 144 145 func validOffsets(offsets []int) bool { 146 n := len(offsets) 147 if n == 0 { 148 return false 149 } 150 if offsets[0] <= 0 { 151 return false 152 } 153 154 for i := 1; i < n; i++ { 155 if offsets[i] <= offsets[i-1] { 156 return false 157 } 158 } 159 return true 160 } 161 162 func (vind *CFC) String() string { 163 return vind.name 164 } 165 166 // Cost returns the cost as 1. In regular mode, i.e. not in a LIKE op, CFC has 167 // pretty much the same cost as other unique vindexes like 'binary', 'md5' etc. 168 func (vind *CFC) Cost() int { 169 return 1 170 } 171 172 // IsUnique returns true since the Vindex is unique. 173 func (vind *CFC) IsUnique() bool { 174 return true 175 } 176 177 // NeedsVCursor satisfies the Vindex interface. 178 func (vind *CFC) NeedsVCursor() bool { 179 return false 180 } 181 182 // computeKsid returns the corresponding keyspace id of a key. 183 func (vind *cfcCommon) computeKsid(v []byte, prefix bool) ([]byte, error) { 184 185 if vind.hash == nil { 186 return v, nil 187 } 188 n := len(v) 189 m := len(vind.offsets) 190 // if we are not working on a prefix, the key has to have all the components, 191 // that is, it has to be longer than the largest offset. 192 if !prefix && n < vind.offsets[m-1] { 193 return nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "insufficient size for cfc vindex %s. need %d, got %d", vind.name, vind.offsets[m-1], n) 194 } 195 prev := 0 196 offset := 0 197 buf := new(bytes.Buffer) 198 for _, offset = range vind.offsets { 199 if n < offset { 200 // the given prefix doesn't align with the component boundaries, 201 // return the hashed prefix that's currently available 202 return buf.Bytes(), nil 203 } 204 205 if _, err := buf.Write(vind.hash(v[prev:offset])); err != nil { 206 return nil, err 207 } 208 prev = offset 209 } 210 if offset < n { 211 if _, err := buf.Write(vind.hash(v[offset:n])); err != nil { 212 return nil, err 213 } 214 } 215 return buf.Bytes(), nil 216 } 217 218 func (vind *cfcCommon) verify(ids []sqltypes.Value, ksids [][]byte) ([]bool, error) { 219 out := make([]bool, len(ids)) 220 for i := range ids { 221 idBytes, err := ids[i].ToBytes() 222 if err != nil { 223 return out, err 224 } 225 v, err := vind.computeKsid(idBytes, false) 226 if err != nil { 227 return nil, err 228 } 229 out[i] = bytes.Equal(v, ksids[i]) 230 } 231 return out, nil 232 } 233 234 // Verify returns true if ids maps to ksids. 235 func (vind *CFC) Verify(_ context.Context, _ VCursor, ids []sqltypes.Value, ksids [][]byte) ([]bool, error) { 236 return vind.verify(ids, ksids) 237 } 238 239 // Map can map ids to key.Destination objects. 240 func (vind *CFC) Map(_ context.Context, _ VCursor, ids []sqltypes.Value) ([]key.Destination, error) { 241 out := make([]key.Destination, len(ids)) 242 for i, id := range ids { 243 idBytes, err := id.ToBytes() 244 if err != nil { 245 return out, err 246 } 247 v, err := vind.computeKsid(idBytes, false) 248 if err != nil { 249 return nil, err 250 } 251 out[i] = key.DestinationKeyspaceID(v) 252 } 253 return out, nil 254 } 255 256 // PrefixVindex switches the vindex to prefix mode 257 func (vind *CFC) PrefixVindex() SingleColumn { 258 return vind.prefixCFC 259 } 260 261 // NewKeyRangeFromPrefix creates a keyspace range from a prefix of keyspace id. 262 func NewKeyRangeFromPrefix(begin []byte) key.Destination { 263 if len(begin) == 0 { 264 return key.DestinationAllShards{} 265 } 266 // the prefix maps to a keyspace range corresponding to its value and plus one. 267 // that is [ keyspace_id, keyspace_id + 1 ). 268 end := make([]byte, len(begin)) 269 copy(end, begin) 270 end = addOne(end) 271 return key.DestinationKeyRange{ 272 KeyRange: &topodatapb.KeyRange{ 273 Start: begin, 274 End: end, 275 }, 276 } 277 } 278 279 func addOne(value []byte) []byte { 280 n := len(value) 281 overflow := true 282 for i := n - 1; i >= 0; i-- { 283 if value[i] < 255 { 284 value[i]++ 285 overflow = false 286 break 287 } else { 288 value[i] = 0 289 } 290 } 291 if overflow { 292 return nil 293 } 294 return value 295 } 296 297 type prefixCFC struct { 298 *cfcCommon 299 } 300 301 func (vind *prefixCFC) String() string { 302 return vind.name 303 } 304 305 func (vind *prefixCFC) NeedsVCursor() bool { 306 return false 307 } 308 309 func (vind *prefixCFC) Verify(_ context.Context, _ VCursor, ids []sqltypes.Value, ksids [][]byte) ([]bool, error) { 310 return vind.verify(ids, ksids) 311 } 312 313 // In prefix mode, i.e. within a LIKE op, the cost is higher than regular mode. 314 // Ideally the cost should be the number of shards we resolved to but the current 315 // framework doesn't do dynamic cost evaluation. 316 func (vind *prefixCFC) Cost() int { 317 if n := len(vind.offsets); n > 0 { 318 return n 319 } 320 return 2 321 } 322 323 func (vind *prefixCFC) IsUnique() bool { 324 return false 325 } 326 327 // Map can map ids to key.Destination objects. 328 func (vind *prefixCFC) Map(_ context.Context, _ VCursor, ids []sqltypes.Value) ([]key.Destination, error) { 329 out := make([]key.Destination, len(ids)) 330 for i, id := range ids { 331 value, err := id.ToBytes() 332 if err != nil { 333 return out, err 334 } 335 prefix := findPrefix(value) 336 begin, err := vind.computeKsid(prefix, true) 337 if err != nil { 338 return nil, err 339 } 340 out[i] = NewKeyRangeFromPrefix(begin) 341 } 342 return out, nil 343 } 344 345 // findPrefix returns the 'prefix' of the string literal in LIKE expression. 346 // The prefix is the prefix of the string literal up until the first unescaped 347 // meta character (% and _). Other escape sequences are escaped according to 348 // https://dev.mysql.com/doc/refman/8.0/en/string-literals.html. 349 func findPrefix(str []byte) []byte { 350 buf := new(bytes.Buffer) 351 L: 352 for len(str) > 0 { 353 n := len(str) 354 p := bytes.IndexAny(str, `%_\`) 355 if p < 0 { 356 buf.Write(str) 357 break 358 } 359 buf.Write(str[:p]) 360 switch str[p] { 361 case '%', '_': 362 // prefix found 363 break L 364 // The following is not very efficient in dealing with too many 365 // continuous backslash characters, e.g. '\\\\\\\\\\\\\%', but 366 // hopefully it's the less common case. 367 case '\\': 368 if p == n-1 { 369 // backslash is the very last character of a string, typically 370 // this is an invalid string argument. We write the backslash 371 // anyway because Mysql can deal with it. 372 buf.WriteByte(str[p]) 373 break L 374 } else if decoded := sqltypes.SQLDecodeMap[str[p+1]]; decoded != sqltypes.DontEscape { 375 buf.WriteByte(decoded) 376 } else { 377 buf.WriteByte(str[p+1]) 378 } 379 str = str[(p + 2):n] 380 } 381 } 382 return buf.Bytes() 383 } 384 385 // we don't use the full hashed value because it's very long. 386 // keyrange resolution is done via comparing []byte so longer 387 // keyspace ids have performance impact. 388 func md5hash(in []byte) []byte { 389 n := len(in) 390 out := vMD5Hash(in) 391 if n < len(out) { 392 return out[:n] 393 } 394 return out 395 396 } 397 398 // same here 399 func xxhash64(in []byte) []byte { 400 out := vXXHash(in) 401 n := len(in) 402 if n < len(out) { 403 return out[:n] 404 } 405 return out 406 } 407 408 func init() { 409 Register("cfc", NewCFC) 410 }