github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/allegrosql/tablecodec/tablecodec.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package blockcodec 15 16 import ( 17 "bytes" 18 "encoding/binary" 19 "math" 20 "time" 21 "unicode/utf8" 22 23 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 24 "github.com/whtcorpsinc/BerolinaSQL/charset" 25 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 26 "github.com/whtcorpsinc/BerolinaSQL/terror" 27 "github.com/whtcorpsinc/errors" 28 "github.com/whtcorpsinc/milevadb/ekv" 29 "github.com/whtcorpsinc/milevadb/errno" 30 "github.com/whtcorpsinc/milevadb/soliton/codec" 31 "github.com/whtcorpsinc/milevadb/soliton/collate" 32 "github.com/whtcorpsinc/milevadb/soliton/rowcodec" 33 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 34 "github.com/whtcorpsinc/milevadb/structure" 35 "github.com/whtcorpsinc/milevadb/types" 36 ) 37 38 var ( 39 errInvalidKey = terror.ClassXEval.New(errno.ErrInvalidKey, errno.MyALLEGROSQLErrName[errno.ErrInvalidKey]) 40 errInvalidRecordKey = terror.ClassXEval.New(errno.ErrInvalidRecordKey, errno.MyALLEGROSQLErrName[errno.ErrInvalidRecordKey]) 41 errInvalidIndexKey = terror.ClassXEval.New(errno.ErrInvalidIndexKey, errno.MyALLEGROSQLErrName[errno.ErrInvalidIndexKey]) 42 ) 43 44 var ( 45 blockPrefix = []byte{'t'} 46 recordPrefixSep = []byte("_r") 47 indexPrefixSep = []byte("_i") 48 spacetimePrefix = []byte{'m'} 49 ) 50 51 const ( 52 idLen = 8 53 prefixLen = 1 + idLen /*blockID*/ + 2 54 // RecordRowKeyLen is public for calculating avgerage event size. 55 RecordRowKeyLen = prefixLen + idLen /*handle*/ 56 blockPrefixLength = 1 57 recordPrefixSepLength = 2 58 spacetimePrefixLength = 1 59 // MaxOldEncodeValueLen is the maximum len of the old encoding of index value. 60 MaxOldEncodeValueLen = 9 61 62 // CommonHandleFlag is the flag used to decode the common handle in an unique index value. 63 CommonHandleFlag byte = 127 64 // PartitionIDFlag is the flag used to decode the partition ID in global index value. 65 PartitionIDFlag byte = 126 66 // RestoreDataFlag is the flag that RestoreData begin with. 67 // See rowcodec.CausetEncoder.Encode and rowcodec.event.toBytes 68 RestoreDataFlag byte = rowcodec.CodecVer 69 ) 70 71 // TableSplitKeyLen is the length of key 't{block_id}' which is used for causet split. 72 const TableSplitKeyLen = 1 + idLen 73 74 // TablePrefix returns causet's prefix 't'. 75 func TablePrefix() []byte { 76 return blockPrefix 77 } 78 79 // EncodeRowKey encodes the causet id and record handle into a ekv.Key 80 func EncodeRowKey(blockID int64, encodedHandle []byte) ekv.Key { 81 buf := make([]byte, 0, prefixLen+len(encodedHandle)) 82 buf = appendTableRecordPrefix(buf, blockID) 83 buf = append(buf, encodedHandle...) 84 return buf 85 } 86 87 // EncodeRowKeyWithHandle encodes the causet id, event handle into a ekv.Key 88 func EncodeRowKeyWithHandle(blockID int64, handle ekv.Handle) ekv.Key { 89 return EncodeRowKey(blockID, handle.Encoded()) 90 } 91 92 // CutRowKeyPrefix cuts the event key prefix. 93 func CutRowKeyPrefix(key ekv.Key) []byte { 94 return key[prefixLen:] 95 } 96 97 // EncodeRecordKey encodes the recordPrefix, event handle into a ekv.Key. 98 func EncodeRecordKey(recordPrefix ekv.Key, h ekv.Handle) ekv.Key { 99 buf := make([]byte, 0, len(recordPrefix)+h.Len()) 100 buf = append(buf, recordPrefix...) 101 buf = append(buf, h.Encoded()...) 102 return buf 103 } 104 105 func hasTablePrefix(key ekv.Key) bool { 106 return key[0] == blockPrefix[0] 107 } 108 109 func hasRecordPrefixSep(key ekv.Key) bool { 110 return key[0] == recordPrefixSep[0] && key[1] == recordPrefixSep[1] 111 } 112 113 // DecodeRecordKey decodes the key and gets the blockID, handle. 114 func DecodeRecordKey(key ekv.Key) (blockID int64, handle ekv.Handle, err error) { 115 if len(key) <= prefixLen { 116 return 0, nil, errInvalidRecordKey.GenWithStack("invalid record key - %q", key) 117 } 118 119 k := key 120 if !hasTablePrefix(key) { 121 return 0, nil, errInvalidRecordKey.GenWithStack("invalid record key - %q", k) 122 } 123 124 key = key[blockPrefixLength:] 125 key, blockID, err = codec.DecodeInt(key) 126 if err != nil { 127 return 0, nil, errors.Trace(err) 128 } 129 130 if !hasRecordPrefixSep(key) { 131 return 0, nil, errInvalidRecordKey.GenWithStack("invalid record key - %q", k) 132 } 133 134 key = key[recordPrefixSepLength:] 135 if len(key) == 8 { 136 var intHandle int64 137 key, intHandle, err = codec.DecodeInt(key) 138 if err != nil { 139 return 0, nil, errors.Trace(err) 140 } 141 return blockID, ekv.IntHandle(intHandle), nil 142 } 143 h, err := ekv.NewCommonHandle(key) 144 if err != nil { 145 return 0, nil, errInvalidRecordKey.GenWithStack("invalid record key - %q %v", k, err) 146 } 147 return blockID, h, nil 148 } 149 150 // DecodeIndexKey decodes the key and gets the blockID, indexID, indexValues. 151 func DecodeIndexKey(key ekv.Key) (blockID int64, indexID int64, indexValues []string, err error) { 152 k := key 153 154 blockID, indexID, isRecord, err := DecodeKeyHead(key) 155 if err != nil { 156 return 0, 0, nil, errors.Trace(err) 157 } 158 if isRecord { 159 err = errInvalidIndexKey.GenWithStack("invalid index key - %q", k) 160 return 0, 0, nil, err 161 } 162 indexKey := key[prefixLen+idLen:] 163 indexValues, err = DecodeValuesBytesToStrings(indexKey) 164 if err != nil { 165 err = errInvalidIndexKey.GenWithStack("invalid index key - %q %v", k, err) 166 return 0, 0, nil, err 167 } 168 return blockID, indexID, indexValues, nil 169 } 170 171 // DecodeValuesBytesToStrings decode the raw bytes to strings for each columns. 172 // FIXME: Without the schemaReplicant information, we can only decode the raw HoTT of 173 // the column. For instance, MysqlTime is internally saved as uint64. 174 func DecodeValuesBytesToStrings(b []byte) ([]string, error) { 175 var datumValues []string 176 for len(b) > 0 { 177 remain, d, e := codec.DecodeOne(b) 178 if e != nil { 179 return nil, e 180 } 181 str, e1 := d.ToString() 182 if e1 != nil { 183 return nil, e 184 } 185 datumValues = append(datumValues, str) 186 b = remain 187 } 188 return datumValues, nil 189 } 190 191 // DecodeMetaKey decodes the key and get the spacetime key and spacetime field. 192 func DecodeMetaKey(ek ekv.Key) (key []byte, field []byte, err error) { 193 var tp uint64 194 if !bytes.HasPrefix(ek, spacetimePrefix) { 195 return nil, nil, errors.New("invalid encoded hash data key prefix") 196 } 197 ek = ek[spacetimePrefixLength:] 198 ek, key, err = codec.DecodeBytes(ek, nil) 199 if err != nil { 200 return nil, nil, errors.Trace(err) 201 } 202 ek, tp, err = codec.DecodeUint(ek) 203 if err != nil { 204 return nil, nil, errors.Trace(err) 205 } else if structure.TypeFlag(tp) != structure.HashData { 206 return nil, nil, errors.Errorf("invalid encoded hash data key flag %c", byte(tp)) 207 } 208 _, field, err = codec.DecodeBytes(ek, nil) 209 return key, field, errors.Trace(err) 210 } 211 212 // DecodeKeyHead decodes the key's head and gets the blockID, indexID. isRecordKey is true when is a record key. 213 func DecodeKeyHead(key ekv.Key) (blockID int64, indexID int64, isRecordKey bool, err error) { 214 isRecordKey = false 215 k := key 216 if !key.HasPrefix(blockPrefix) { 217 err = errInvalidKey.GenWithStack("invalid key - %q", k) 218 return 219 } 220 221 key = key[len(blockPrefix):] 222 key, blockID, err = codec.DecodeInt(key) 223 if err != nil { 224 err = errors.Trace(err) 225 return 226 } 227 228 if key.HasPrefix(recordPrefixSep) { 229 isRecordKey = true 230 return 231 } 232 if !key.HasPrefix(indexPrefixSep) { 233 err = errInvalidKey.GenWithStack("invalid key - %q", k) 234 return 235 } 236 237 key = key[len(indexPrefixSep):] 238 239 key, indexID, err = codec.DecodeInt(key) 240 if err != nil { 241 err = errors.Trace(err) 242 return 243 } 244 return 245 } 246 247 // DecodeTableID decodes the causet ID of the key, if the key is not causet key, returns 0. 248 func DecodeTableID(key ekv.Key) int64 { 249 if !key.HasPrefix(blockPrefix) { 250 return 0 251 } 252 key = key[len(blockPrefix):] 253 _, blockID, err := codec.DecodeInt(key) 254 // TODO: return error. 255 terror.Log(errors.Trace(err)) 256 return blockID 257 } 258 259 // DecodeRowKey decodes the key and gets the handle. 260 func DecodeRowKey(key ekv.Key) (ekv.Handle, error) { 261 if len(key) < RecordRowKeyLen || !hasTablePrefix(key) || !hasRecordPrefixSep(key[prefixLen-2:]) { 262 return ekv.IntHandle(0), errInvalidKey.GenWithStack("invalid key - %q", key) 263 } 264 if len(key) == RecordRowKeyLen { 265 u := binary.BigEndian.Uint64(key[prefixLen:]) 266 return ekv.IntHandle(codec.DecodeCmpUintToInt(u)), nil 267 } 268 return ekv.NewCommonHandle(key[prefixLen:]) 269 } 270 271 // EncodeValue encodes a go value to bytes. 272 func EncodeValue(sc *stmtctx.StatementContext, b []byte, raw types.Causet) ([]byte, error) { 273 var v types.Causet 274 err := flatten(sc, raw, &v) 275 if err != nil { 276 return nil, err 277 } 278 return codec.EncodeValue(sc, b, v) 279 } 280 281 // EncodeRow encode event data and column ids into a slice of byte. 282 // valBuf and values pass by caller, for reducing EncodeRow allocates temporary bufs. If you pass valBuf and values as nil, 283 // EncodeRow will allocate it. 284 func EncodeRow(sc *stmtctx.StatementContext, event []types.Causet, colIDs []int64, valBuf []byte, values []types.Causet, e *rowcodec.CausetEncoder) ([]byte, error) { 285 if len(event) != len(colIDs) { 286 return nil, errors.Errorf("EncodeRow error: data and columnID count not match %d vs %d", len(event), len(colIDs)) 287 } 288 if e.Enable { 289 return e.Encode(sc, colIDs, event, valBuf) 290 } 291 return EncodeOldRow(sc, event, colIDs, valBuf, values) 292 } 293 294 // EncodeOldRow encode event data and column ids into a slice of byte. 295 // Row layout: colID1, value1, colID2, value2, ..... 296 // valBuf and values pass by caller, for reducing EncodeOldRow allocates temporary bufs. If you pass valBuf and values as nil, 297 // EncodeOldRow will allocate it. 298 func EncodeOldRow(sc *stmtctx.StatementContext, event []types.Causet, colIDs []int64, valBuf []byte, values []types.Causet) ([]byte, error) { 299 if len(event) != len(colIDs) { 300 return nil, errors.Errorf("EncodeRow error: data and columnID count not match %d vs %d", len(event), len(colIDs)) 301 } 302 valBuf = valBuf[:0] 303 if values == nil { 304 values = make([]types.Causet, len(event)*2) 305 } 306 for i, c := range event { 307 id := colIDs[i] 308 values[2*i].SetInt64(id) 309 err := flatten(sc, c, &values[2*i+1]) 310 if err != nil { 311 return valBuf, errors.Trace(err) 312 } 313 } 314 if len(values) == 0 { 315 // We could not set nil value into ekv. 316 return append(valBuf, codec.NilFlag), nil 317 } 318 return codec.EncodeValue(sc, valBuf, values...) 319 } 320 321 func flatten(sc *stmtctx.StatementContext, data types.Causet, ret *types.Causet) error { 322 switch data.HoTT() { 323 case types.HoTTMysqlTime: 324 // for allegrosql datetime, timestamp and date type 325 t := data.GetMysqlTime() 326 if t.Type() == allegrosql.TypeTimestamp && sc.TimeZone != time.UTC { 327 err := t.ConvertTimeZone(sc.TimeZone, time.UTC) 328 if err != nil { 329 return errors.Trace(err) 330 } 331 } 332 v, err := t.ToPackedUint() 333 ret.SetUint64(v) 334 return errors.Trace(err) 335 case types.HoTTMysqlDuration: 336 // for allegrosql time type 337 ret.SetInt64(int64(data.GetMysqlDuration().Duration)) 338 return nil 339 case types.HoTTMysqlEnum: 340 ret.SetUint64(data.GetMysqlEnum().Value) 341 return nil 342 case types.HoTTMysqlSet: 343 ret.SetUint64(data.GetMysqlSet().Value) 344 return nil 345 case types.HoTTBinaryLiteral, types.HoTTMysqlBit: 346 // We don't need to handle errors here since the literal is ensured to be able to causetstore in uint64 in convertToMysqlBit. 347 val, err := data.GetBinaryLiteral().ToInt(sc) 348 if err != nil { 349 return errors.Trace(err) 350 } 351 ret.SetUint64(val) 352 return nil 353 default: 354 *ret = data 355 return nil 356 } 357 } 358 359 // DecodeDeferredCausetValue decodes data to a Causet according to the column info. 360 func DecodeDeferredCausetValue(data []byte, ft *types.FieldType, loc *time.Location) (types.Causet, error) { 361 _, d, err := codec.DecodeOne(data) 362 if err != nil { 363 return types.Causet{}, errors.Trace(err) 364 } 365 colCauset, err := Unflatten(d, ft, loc) 366 if err != nil { 367 return types.Causet{}, errors.Trace(err) 368 } 369 return colCauset, nil 370 } 371 372 // DecodeRowWithMapNew decode a event to causet map. 373 func DecodeRowWithMapNew(b []byte, defcaus map[int64]*types.FieldType, 374 loc *time.Location, event map[int64]types.Causet) (map[int64]types.Causet, error) { 375 if event == nil { 376 event = make(map[int64]types.Causet, len(defcaus)) 377 } 378 if b == nil { 379 return event, nil 380 } 381 if len(b) == 1 && b[0] == codec.NilFlag { 382 return event, nil 383 } 384 385 reqDefCauss := make([]rowcodec.DefCausInfo, len(defcaus)) 386 var idx int 387 for id, tp := range defcaus { 388 reqDefCauss[idx] = rowcodec.DefCausInfo{ 389 ID: id, 390 Ft: tp, 391 } 392 idx++ 393 } 394 rd := rowcodec.NewCausetFIDelioecoder(reqDefCauss, loc) 395 return rd.DecodeToCausetMap(b, event) 396 } 397 398 // DecodeRowWithMap decodes a byte slice into datums with a existing event map. 399 // Row layout: colID1, value1, colID2, value2, ..... 400 func DecodeRowWithMap(b []byte, defcaus map[int64]*types.FieldType, loc *time.Location, event map[int64]types.Causet) (map[int64]types.Causet, error) { 401 if event == nil { 402 event = make(map[int64]types.Causet, len(defcaus)) 403 } 404 if b == nil { 405 return event, nil 406 } 407 if len(b) == 1 && b[0] == codec.NilFlag { 408 return event, nil 409 } 410 cnt := 0 411 var ( 412 data []byte 413 err error 414 ) 415 for len(b) > 0 { 416 // Get col id. 417 data, b, err = codec.CutOne(b) 418 if err != nil { 419 return nil, errors.Trace(err) 420 } 421 _, cid, err := codec.DecodeOne(data) 422 if err != nil { 423 return nil, errors.Trace(err) 424 } 425 // Get col value. 426 data, b, err = codec.CutOne(b) 427 if err != nil { 428 return nil, errors.Trace(err) 429 } 430 id := cid.GetInt64() 431 ft, ok := defcaus[id] 432 if ok { 433 _, v, err := codec.DecodeOne(data) 434 if err != nil { 435 return nil, errors.Trace(err) 436 } 437 v, err = Unflatten(v, ft, loc) 438 if err != nil { 439 return nil, errors.Trace(err) 440 } 441 event[id] = v 442 cnt++ 443 if cnt == len(defcaus) { 444 // Get enough data. 445 break 446 } 447 } 448 } 449 return event, nil 450 } 451 452 // DecodeRowToCausetMap decodes a byte slice into datums. 453 // Row layout: colID1, value1, colID2, value2, ..... 454 // Default value columns, generated columns and handle columns are unprocessed. 455 func DecodeRowToCausetMap(b []byte, defcaus map[int64]*types.FieldType, loc *time.Location) (map[int64]types.Causet, error) { 456 if !rowcodec.IsNewFormat(b) { 457 return DecodeRowWithMap(b, defcaus, loc, nil) 458 } 459 return DecodeRowWithMapNew(b, defcaus, loc, nil) 460 } 461 462 // DecodeHandleToCausetMap decodes a handle into causet map. 463 func DecodeHandleToCausetMap(handle ekv.Handle, handleDefCausIDs []int64, 464 defcaus map[int64]*types.FieldType, loc *time.Location, event map[int64]types.Causet) (map[int64]types.Causet, error) { 465 if handle == nil || len(handleDefCausIDs) == 0 { 466 return event, nil 467 } 468 if event == nil { 469 event = make(map[int64]types.Causet, len(defcaus)) 470 } 471 for id, ft := range defcaus { 472 for idx, hid := range handleDefCausIDs { 473 if id != hid { 474 continue 475 } 476 d, err := decodeHandleToCauset(handle, ft, idx) 477 if err != nil { 478 return event, err 479 } 480 d, err = Unflatten(d, ft, loc) 481 if err != nil { 482 return event, err 483 } 484 if _, exists := event[id]; !exists { 485 event[id] = d 486 } 487 break 488 } 489 } 490 return event, nil 491 } 492 493 // decodeHandleToCauset decodes a handle to a specific column causet. 494 func decodeHandleToCauset(handle ekv.Handle, ft *types.FieldType, idx int) (types.Causet, error) { 495 var d types.Causet 496 var err error 497 if handle.IsInt() { 498 if allegrosql.HasUnsignedFlag(ft.Flag) { 499 d = types.NewUintCauset(uint64(handle.IntValue())) 500 } else { 501 d = types.NewIntCauset(handle.IntValue()) 502 } 503 return d, nil 504 } 505 // Decode common handle to Causet. 506 _, d, err = codec.DecodeOne(handle.EncodedDefCaus(idx)) 507 return d, err 508 } 509 510 // CutRowNew cuts encoded event into byte slices and return columns' byte slice. 511 // Row layout: colID1, value1, colID2, value2, ..... 512 func CutRowNew(data []byte, colIDs map[int64]int) ([][]byte, error) { 513 if data == nil { 514 return nil, nil 515 } 516 if len(data) == 1 && data[0] == codec.NilFlag { 517 return nil, nil 518 } 519 520 var ( 521 cnt int 522 b []byte 523 err error 524 cid int64 525 ) 526 event := make([][]byte, len(colIDs)) 527 for len(data) > 0 && cnt < len(colIDs) { 528 // Get col id. 529 data, cid, err = codec.CutDeferredCausetID(data) 530 if err != nil { 531 return nil, errors.Trace(err) 532 } 533 534 // Get col value. 535 b, data, err = codec.CutOne(data) 536 if err != nil { 537 return nil, errors.Trace(err) 538 } 539 540 offset, ok := colIDs[cid] 541 if ok { 542 event[offset] = b 543 cnt++ 544 } 545 } 546 return event, nil 547 } 548 549 // UnflattenCausets converts raw datums to column datums. 550 func UnflattenCausets(datums []types.Causet, fts []*types.FieldType, loc *time.Location) ([]types.Causet, error) { 551 for i, causet := range datums { 552 ft := fts[i] 553 uCauset, err := Unflatten(causet, ft, loc) 554 if err != nil { 555 return datums, errors.Trace(err) 556 } 557 datums[i] = uCauset 558 } 559 return datums, nil 560 } 561 562 // Unflatten converts a raw causet to a column causet. 563 func Unflatten(causet types.Causet, ft *types.FieldType, loc *time.Location) (types.Causet, error) { 564 if causet.IsNull() { 565 return causet, nil 566 } 567 switch ft.Tp { 568 case allegrosql.TypeFloat: 569 causet.SetFloat32(float32(causet.GetFloat64())) 570 return causet, nil 571 case allegrosql.TypeVarchar, allegrosql.TypeString, allegrosql.TypeVarString: 572 causet.SetString(causet.GetString(), ft.DefCauslate) 573 case allegrosql.TypeTiny, allegrosql.TypeShort, allegrosql.TypeYear, allegrosql.TypeInt24, 574 allegrosql.TypeLong, allegrosql.TypeLonglong, allegrosql.TypeDouble, allegrosql.TypeTinyBlob, 575 allegrosql.TypeMediumBlob, allegrosql.TypeBlob, allegrosql.TypeLongBlob: 576 return causet, nil 577 case allegrosql.TypeDate, allegrosql.TypeDatetime, allegrosql.TypeTimestamp: 578 t := types.NewTime(types.ZeroCoreTime, ft.Tp, int8(ft.Decimal)) 579 var err error 580 err = t.FromPackedUint(causet.GetUint64()) 581 if err != nil { 582 return causet, errors.Trace(err) 583 } 584 if ft.Tp == allegrosql.TypeTimestamp && !t.IsZero() { 585 err = t.ConvertTimeZone(time.UTC, loc) 586 if err != nil { 587 return causet, errors.Trace(err) 588 } 589 } 590 causet.SetUint64(0) 591 causet.SetMysqlTime(t) 592 return causet, nil 593 case allegrosql.TypeDuration: // duration should read fsp from column spacetime data 594 dur := types.Duration{Duration: time.Duration(causet.GetInt64()), Fsp: int8(ft.Decimal)} 595 causet.SetMysqlDuration(dur) 596 return causet, nil 597 case allegrosql.TypeEnum: 598 // ignore error deliberately, to read empty enum value. 599 enum, err := types.ParseEnumValue(ft.Elems, causet.GetUint64()) 600 if err != nil { 601 enum = types.Enum{} 602 } 603 causet.SetMysqlEnum(enum, ft.DefCauslate) 604 return causet, nil 605 case allegrosql.TypeSet: 606 set, err := types.ParseSetValue(ft.Elems, causet.GetUint64()) 607 if err != nil { 608 return causet, errors.Trace(err) 609 } 610 causet.SetMysqlSet(set, ft.DefCauslate) 611 return causet, nil 612 case allegrosql.TypeBit: 613 val := causet.GetUint64() 614 byteSize := (ft.Flen + 7) >> 3 615 causet.SetUint64(0) 616 causet.SetMysqlBit(types.NewBinaryLiteralFromUint(val, byteSize)) 617 } 618 return causet, nil 619 } 620 621 // EncodeIndexSeekKey encodes an index value to ekv.Key. 622 func EncodeIndexSeekKey(blockID int64, idxID int64, encodedValue []byte) ekv.Key { 623 key := make([]byte, 0, RecordRowKeyLen+len(encodedValue)) 624 key = appendTableIndexPrefix(key, blockID) 625 key = codec.EncodeInt(key, idxID) 626 key = append(key, encodedValue...) 627 return key 628 } 629 630 // CutIndexKey cuts encoded index key into colIDs to bytes slices map. 631 // The returned value b is the remaining bytes of the key which would be empty if it is unique index or handle data 632 // if it is non-unique index. 633 func CutIndexKey(key ekv.Key, colIDs []int64) (values map[int64][]byte, b []byte, err error) { 634 b = key[prefixLen+idLen:] 635 values = make(map[int64][]byte, len(colIDs)) 636 for _, id := range colIDs { 637 var val []byte 638 val, b, err = codec.CutOne(b) 639 if err != nil { 640 return nil, nil, errors.Trace(err) 641 } 642 values[id] = val 643 } 644 return 645 } 646 647 // CutIndexPrefix cuts the index prefix. 648 func CutIndexPrefix(key ekv.Key) []byte { 649 return key[prefixLen+idLen:] 650 } 651 652 // CutIndexKeyNew cuts encoded index key into colIDs to bytes slices. 653 // The returned value b is the remaining bytes of the key which would be empty if it is unique index or handle data 654 // if it is non-unique index. 655 func CutIndexKeyNew(key ekv.Key, length int) (values [][]byte, b []byte, err error) { 656 b = key[prefixLen+idLen:] 657 values = make([][]byte, 0, length) 658 for i := 0; i < length; i++ { 659 var val []byte 660 val, b, err = codec.CutOne(b) 661 if err != nil { 662 return nil, nil, errors.Trace(err) 663 } 664 values = append(values, val) 665 } 666 return 667 } 668 669 // CutCommonHandle cuts encoded common handle key into colIDs to bytes slices. 670 // The returned value b is the remaining bytes of the key which would be empty if it is unique index or handle data 671 // if it is non-unique index. 672 func CutCommonHandle(key ekv.Key, length int) (values [][]byte, b []byte, err error) { 673 b = key[prefixLen:] 674 values = make([][]byte, 0, length) 675 for i := 0; i < length; i++ { 676 var val []byte 677 val, b, err = codec.CutOne(b) 678 if err != nil { 679 return nil, nil, errors.Trace(err) 680 } 681 values = append(values, val) 682 } 683 return 684 } 685 686 // HandleStatus is the handle status in index. 687 type HandleStatus int 688 689 const ( 690 // HandleDefault means decode handle value as int64 or bytes when DecodeIndexKV. 691 HandleDefault HandleStatus = iota 692 // HandleIsUnsigned means decode handle value as uint64 when DecodeIndexKV. 693 HandleIsUnsigned 694 // HandleNotNeeded means no need to decode handle value when DecodeIndexKV. 695 HandleNotNeeded 696 ) 697 698 // reEncodeHandle encodes the handle as a Causet so it can be properly decoded later. 699 // If it is common handle, it returns the encoded column values. 700 // If it is int handle, it is encoded as int Causet or uint Causet decided by the unsigned. 701 func reEncodeHandle(handle ekv.Handle, unsigned bool) ([][]byte, error) { 702 if !handle.IsInt() { 703 handleDefCausLen := handle.NumDefCauss() 704 cHandleBytes := make([][]byte, 0, handleDefCausLen) 705 for i := 0; i < handleDefCausLen; i++ { 706 cHandleBytes = append(cHandleBytes, handle.EncodedDefCaus(i)) 707 } 708 return cHandleBytes, nil 709 } 710 handleCauset := types.NewIntCauset(handle.IntValue()) 711 if unsigned { 712 handleCauset.SetUint64(handleCauset.GetUint64()) 713 } 714 intHandleBytes, err := codec.EncodeValue(nil, nil, handleCauset) 715 return [][]byte{intHandleBytes}, err 716 } 717 718 func decodeRestoredValues(columns []rowcodec.DefCausInfo, restoredVal []byte) ([][]byte, error) { 719 colIDs := make(map[int64]int, len(columns)) 720 for i, col := range columns { 721 colIDs[col.ID] = i 722 } 723 // We don't need to decode handle here, and colIDs >= 0 always. 724 rd := rowcodec.NewByteCausetDecoder(columns, []int64{-1}, nil, nil) 725 resultValues, err := rd.DecodeToBytesNoHandle(colIDs, restoredVal) 726 if err != nil { 727 return nil, errors.Trace(err) 728 } 729 return resultValues, nil 730 } 731 732 func decodeIndexEkvOldDefCauslation(key, value []byte, defcausLen int, hdStatus HandleStatus) ([][]byte, error) { 733 resultValues, b, err := CutIndexKeyNew(key, defcausLen) 734 if err != nil { 735 return nil, errors.Trace(err) 736 } 737 if hdStatus == HandleNotNeeded { 738 return resultValues, nil 739 } 740 var handle ekv.Handle 741 if len(b) > 0 { 742 // non-unique index 743 handle, err = decodeHandleInIndexKey(b) 744 if err != nil { 745 return nil, err 746 } 747 handleBytes, err := reEncodeHandle(handle, hdStatus == HandleIsUnsigned) 748 if err != nil { 749 return nil, errors.Trace(err) 750 } 751 resultValues = append(resultValues, handleBytes...) 752 } else { 753 // In unique int handle index. 754 handle = decodeIntHandleInIndexValue(value) 755 handleBytes, err := reEncodeHandle(handle, hdStatus == HandleIsUnsigned) 756 if err != nil { 757 return nil, errors.Trace(err) 758 } 759 resultValues = append(resultValues, handleBytes...) 760 } 761 return resultValues, nil 762 } 763 764 // DecodeIndexKV uses to decode index key values. 765 func DecodeIndexKV(key, value []byte, defcausLen int, hdStatus HandleStatus, columns []rowcodec.DefCausInfo) ([][]byte, error) { 766 if len(value) <= MaxOldEncodeValueLen { 767 return decodeIndexEkvOldDefCauslation(key, value, defcausLen, hdStatus) 768 } 769 return decodeIndexEkvGeneral(key, value, defcausLen, hdStatus, columns) 770 } 771 772 // DecodeIndexHandle uses to decode the handle from index key/value. 773 func DecodeIndexHandle(key, value []byte, defcausLen int) (ekv.Handle, error) { 774 _, b, err := CutIndexKeyNew(key, defcausLen) 775 if err != nil { 776 return nil, errors.Trace(err) 777 } 778 if len(b) > 0 { 779 return decodeHandleInIndexKey(b) 780 } else if len(value) >= 8 { 781 return decodeHandleInIndexValue(value) 782 } 783 // Should never execute to here. 784 return nil, errors.Errorf("no handle in index key: %v, value: %v", key, value) 785 } 786 787 func decodeHandleInIndexKey(keySuffix []byte) (ekv.Handle, error) { 788 remain, d, err := codec.DecodeOne(keySuffix) 789 if err != nil { 790 return nil, errors.Trace(err) 791 } 792 if len(remain) == 0 && d.HoTT() == types.HoTTInt64 { 793 return ekv.IntHandle(d.GetInt64()), nil 794 } 795 return ekv.NewCommonHandle(keySuffix) 796 } 797 798 func decodeHandleInIndexValue(value []byte) (ekv.Handle, error) { 799 if len(value) > MaxOldEncodeValueLen { 800 tailLen := value[0] 801 if tailLen >= 8 { 802 return decodeIntHandleInIndexValue(value[len(value)-int(tailLen):]), nil 803 } 804 handleLen := uint16(value[2])<<8 + uint16(value[3]) 805 return ekv.NewCommonHandle(value[4 : 4+handleLen]) 806 } 807 return decodeIntHandleInIndexValue(value), nil 808 } 809 810 // decodeIntHandleInIndexValue uses to decode index value as int handle id. 811 func decodeIntHandleInIndexValue(data []byte) ekv.Handle { 812 return ekv.IntHandle(binary.BigEndian.Uint64(data)) 813 } 814 815 // EncodeTableIndexPrefix encodes index prefix with blockID and idxID. 816 func EncodeTableIndexPrefix(blockID, idxID int64) ekv.Key { 817 key := make([]byte, 0, prefixLen) 818 key = appendTableIndexPrefix(key, blockID) 819 key = codec.EncodeInt(key, idxID) 820 return key 821 } 822 823 // EncodeTablePrefix encodes causet prefix with causet ID. 824 func EncodeTablePrefix(blockID int64) ekv.Key { 825 var key ekv.Key 826 key = append(key, blockPrefix...) 827 key = codec.EncodeInt(key, blockID) 828 return key 829 } 830 831 // appendTableRecordPrefix appends causet record prefix "t[blockID]_r". 832 func appendTableRecordPrefix(buf []byte, blockID int64) []byte { 833 buf = append(buf, blockPrefix...) 834 buf = codec.EncodeInt(buf, blockID) 835 buf = append(buf, recordPrefixSep...) 836 return buf 837 } 838 839 // appendTableIndexPrefix appends causet index prefix "t[blockID]_i". 840 func appendTableIndexPrefix(buf []byte, blockID int64) []byte { 841 buf = append(buf, blockPrefix...) 842 buf = codec.EncodeInt(buf, blockID) 843 buf = append(buf, indexPrefixSep...) 844 return buf 845 } 846 847 // GenTableRecordPrefix composes record prefix with blockID: "t[blockID]_r". 848 func GenTableRecordPrefix(blockID int64) ekv.Key { 849 buf := make([]byte, 0, len(blockPrefix)+8+len(recordPrefixSep)) 850 return appendTableRecordPrefix(buf, blockID) 851 } 852 853 // GenTableIndexPrefix composes index prefix with blockID: "t[blockID]_i". 854 func GenTableIndexPrefix(blockID int64) ekv.Key { 855 buf := make([]byte, 0, len(blockPrefix)+8+len(indexPrefixSep)) 856 return appendTableIndexPrefix(buf, blockID) 857 } 858 859 // IsIndexKey is used to check whether the key is an index key. 860 func IsIndexKey(k []byte) bool { 861 return len(k) > 11 && k[0] == 't' && k[10] == 'i' 862 } 863 864 // IsUntouchedIndexKValue uses to check whether the key is index key, and the value is untouched, 865 // since the untouched index key/value is no need to commit. 866 func IsUntouchedIndexKValue(k, v []byte) bool { 867 if !IsIndexKey(k) { 868 return false 869 } 870 vLen := len(v) 871 if vLen <= MaxOldEncodeValueLen { 872 return (vLen == 1 || vLen == 9) && v[vLen-1] == ekv.UnCommitIndexKVFlag 873 } 874 // New index value format 875 tailLen := int(v[0]) 876 if tailLen < 8 { 877 // Non-unique index. 878 return tailLen >= 1 && v[vLen-1] == ekv.UnCommitIndexKVFlag 879 } 880 // Unique index 881 return tailLen == 9 882 } 883 884 // GenTablePrefix composes causet record and index prefix: "t[blockID]". 885 func GenTablePrefix(blockID int64) ekv.Key { 886 buf := make([]byte, 0, len(blockPrefix)+8) 887 buf = append(buf, blockPrefix...) 888 buf = codec.EncodeInt(buf, blockID) 889 return buf 890 } 891 892 // TruncateToRowKeyLen truncates the key to event key length if the key is longer than event key. 893 func TruncateToRowKeyLen(key ekv.Key) ekv.Key { 894 if len(key) > RecordRowKeyLen { 895 return key[:RecordRowKeyLen] 896 } 897 return key 898 } 899 900 // GetTableHandleKeyRange returns causet handle's key range with blockID. 901 func GetTableHandleKeyRange(blockID int64) (startKey, endKey []byte) { 902 startKey = EncodeRowKeyWithHandle(blockID, ekv.IntHandle(math.MinInt64)) 903 endKey = EncodeRowKeyWithHandle(blockID, ekv.IntHandle(math.MaxInt64)) 904 return 905 } 906 907 // GetTableIndexKeyRange returns causet index's key range with blockID and indexID. 908 func GetTableIndexKeyRange(blockID, indexID int64) (startKey, endKey []byte) { 909 startKey = EncodeIndexSeekKey(blockID, indexID, nil) 910 endKey = EncodeIndexSeekKey(blockID, indexID, []byte{255}) 911 return 912 } 913 914 // GetIndexKeyBuf reuse or allocate buffer 915 func GetIndexKeyBuf(buf []byte, defaultCap int) []byte { 916 if buf != nil { 917 return buf[:0] 918 } 919 return make([]byte, 0, defaultCap) 920 } 921 922 // GenIndexKey generates index key using input physical causet id 923 func GenIndexKey(sc *stmtctx.StatementContext, tblInfo *perceptron.TableInfo, idxInfo *perceptron.IndexInfo, 924 phyTblID int64, indexedValues []types.Causet, h ekv.Handle, buf []byte) (key []byte, distinct bool, err error) { 925 if idxInfo.Unique { 926 // See https://dev.allegrosql.com/doc/refman/5.7/en/create-index.html 927 // A UNIQUE index creates a constraint such that all values in the index must be distinct. 928 // An error occurs if you try to add a new event with a key value that matches an existing event. 929 // For all engines, a UNIQUE index permits multiple NULL values for columns that can contain NULL. 930 distinct = true 931 for _, cv := range indexedValues { 932 if cv.IsNull() { 933 distinct = false 934 break 935 } 936 } 937 } 938 // For string columns, indexes can be created using only the leading part of column values, 939 // using col_name(length) syntax to specify an index prefix length. 940 TruncateIndexValues(tblInfo, idxInfo, indexedValues) 941 key = GetIndexKeyBuf(buf, RecordRowKeyLen+len(indexedValues)*9+9) 942 key = appendTableIndexPrefix(key, phyTblID) 943 key = codec.EncodeInt(key, idxInfo.ID) 944 key, err = codec.EncodeKey(sc, key, indexedValues...) 945 if err != nil { 946 return nil, false, err 947 } 948 if !distinct && h != nil { 949 if h.IsInt() { 950 key, err = codec.EncodeKey(sc, key, types.NewCauset(h.IntValue())) 951 } else { 952 key = append(key, h.Encoded()...) 953 } 954 } 955 return 956 } 957 958 // GenIndexValue creates encoded index value and returns the result, only support local index 959 func GenIndexValue(sc *stmtctx.StatementContext, tblInfo *perceptron.TableInfo, idxInfo *perceptron.IndexInfo, containNonBinaryString bool, 960 distinct bool, untouched bool, indexedValues []types.Causet, h ekv.Handle) ([]byte, error) { 961 return GenIndexValueNew(sc, tblInfo, idxInfo, containNonBinaryString, distinct, untouched, indexedValues, h, 0) 962 } 963 964 // GenIndexValueNew create index value for both local and global index. 965 func GenIndexValueNew(sc *stmtctx.StatementContext, tblInfo *perceptron.TableInfo, idxInfo *perceptron.IndexInfo, containNonBinaryString bool, 966 distinct bool, untouched bool, indexedValues []types.Causet, h ekv.Handle, partitionID int64) ([]byte, error) { 967 idxVal := make([]byte, 1) 968 newEncode := false 969 tailLen := 0 970 if !h.IsInt() && distinct { 971 idxVal = encodeCommonHandle(idxVal, h) 972 newEncode = true 973 } 974 if idxInfo.Global { 975 idxVal = encodePartitionID(idxVal, partitionID) 976 newEncode = true 977 } 978 if collate.NewDefCauslationEnabled() && containNonBinaryString { 979 colIds := make([]int64, len(idxInfo.DeferredCausets)) 980 for i, col := range idxInfo.DeferredCausets { 981 colIds[i] = tblInfo.DeferredCausets[col.Offset].ID 982 } 983 rd := rowcodec.CausetEncoder{Enable: true} 984 rowRestoredValue, err := rd.Encode(sc, colIds, indexedValues, nil) 985 if err != nil { 986 return nil, err 987 } 988 idxVal = append(idxVal, rowRestoredValue...) 989 newEncode = true 990 } 991 992 if newEncode { 993 if h.IsInt() && distinct { 994 // The len of the idxVal is always >= 10 since len (restoredValue) > 0. 995 tailLen += 8 996 idxVal = append(idxVal, EncodeHandleInUniqueIndexValue(h, false)...) 997 } else if len(idxVal) < 10 { 998 // Padding the len to 10 999 paddingLen := 10 - len(idxVal) 1000 tailLen += paddingLen 1001 idxVal = append(idxVal, bytes.Repeat([]byte{0x0}, paddingLen)...) 1002 } 1003 if untouched { 1004 // If index is untouched and fetch here means the key is exists in EinsteinDB, but not in txn mem-buffer, 1005 // then should also write the untouched index key/value to mem-buffer to make sure the data 1006 // is consistent with the index in txn mem-buffer. 1007 tailLen += 1 1008 idxVal = append(idxVal, ekv.UnCommitIndexKVFlag) 1009 } 1010 idxVal[0] = byte(tailLen) 1011 } else { 1012 // Old index value encoding. 1013 idxVal = make([]byte, 0) 1014 if distinct { 1015 idxVal = EncodeHandleInUniqueIndexValue(h, untouched) 1016 } 1017 if untouched { 1018 // If index is untouched and fetch here means the key is exists in EinsteinDB, but not in txn mem-buffer, 1019 // then should also write the untouched index key/value to mem-buffer to make sure the data 1020 // is consistent with the index in txn mem-buffer. 1021 idxVal = append(idxVal, ekv.UnCommitIndexKVFlag) 1022 } 1023 if len(idxVal) == 0 { 1024 idxVal = []byte{'0'} 1025 } 1026 } 1027 return idxVal, nil 1028 } 1029 1030 // TruncateIndexValues truncates the index values created using only the leading part of column values. 1031 func TruncateIndexValues(tblInfo *perceptron.TableInfo, idxInfo *perceptron.IndexInfo, indexedValues []types.Causet) { 1032 for i := 0; i < len(indexedValues); i++ { 1033 v := &indexedValues[i] 1034 idxDefCaus := idxInfo.DeferredCausets[i] 1035 noPrefixIndex := idxDefCaus.Length == types.UnspecifiedLength 1036 if noPrefixIndex { 1037 continue 1038 } 1039 notStringType := v.HoTT() != types.HoTTString && v.HoTT() != types.HoTTBytes 1040 if notStringType { 1041 continue 1042 } 1043 1044 colInfo := tblInfo.DeferredCausets[idxDefCaus.Offset] 1045 isUTF8Charset := colInfo.Charset == charset.CharsetUTF8 || colInfo.Charset == charset.CharsetUTF8MB4 1046 if isUTF8Charset && utf8.RuneCount(v.GetBytes()) > idxDefCaus.Length { 1047 rs := bytes.Runes(v.GetBytes()) 1048 truncateStr := string(rs[:idxDefCaus.Length]) 1049 // truncate value and limit its length 1050 v.SetString(truncateStr, colInfo.DefCauslate) 1051 if v.HoTT() == types.HoTTBytes { 1052 v.SetBytes(v.GetBytes()) 1053 } 1054 } else if !isUTF8Charset && len(v.GetBytes()) > idxDefCaus.Length { 1055 v.SetBytes(v.GetBytes()[:idxDefCaus.Length]) 1056 if v.HoTT() == types.HoTTString { 1057 v.SetString(v.GetString(), colInfo.DefCauslate) 1058 } 1059 } 1060 } 1061 } 1062 1063 // EncodeHandleInUniqueIndexValue encodes handle in data. 1064 func EncodeHandleInUniqueIndexValue(h ekv.Handle, isUntouched bool) []byte { 1065 if h.IsInt() { 1066 var data [8]byte 1067 binary.BigEndian.PutUint64(data[:], uint64(h.IntValue())) 1068 return data[:] 1069 } 1070 var untouchedFlag byte 1071 if isUntouched { 1072 untouchedFlag = 1 1073 } 1074 return encodeCommonHandle([]byte{untouchedFlag}, h) 1075 } 1076 1077 func encodeCommonHandle(idxVal []byte, h ekv.Handle) []byte { 1078 idxVal = append(idxVal, CommonHandleFlag) 1079 hLen := uint16(len(h.Encoded())) 1080 idxVal = append(idxVal, byte(hLen>>8), byte(hLen)) 1081 idxVal = append(idxVal, h.Encoded()...) 1082 return idxVal 1083 } 1084 1085 // DecodeHandleInUniqueIndexValue decodes handle in data. 1086 func DecodeHandleInUniqueIndexValue(data []byte, isCommonHandle bool) (ekv.Handle, error) { 1087 if !isCommonHandle { 1088 dLen := len(data) 1089 if dLen <= MaxOldEncodeValueLen { 1090 return ekv.IntHandle(int64(binary.BigEndian.Uint64(data))), nil 1091 } 1092 return ekv.IntHandle(int64(binary.BigEndian.Uint64(data[dLen-int(data[0]):]))), nil 1093 } 1094 tailLen := int(data[0]) 1095 data = data[:len(data)-tailLen] 1096 handleLen := uint16(data[2])<<8 + uint16(data[3]) 1097 handleEndOff := 4 + handleLen 1098 h, err := ekv.NewCommonHandle(data[4:handleEndOff]) 1099 if err != nil { 1100 return nil, err 1101 } 1102 return h, nil 1103 } 1104 1105 func encodePartitionID(idxVal []byte, partitionID int64) []byte { 1106 idxVal = append(idxVal, PartitionIDFlag) 1107 idxVal = codec.EncodeInt(idxVal, partitionID) 1108 return idxVal 1109 } 1110 1111 type indexValueSegments struct { 1112 commonHandle []byte 1113 partitionID []byte 1114 restoredValues []byte 1115 intHandle []byte 1116 } 1117 1118 // splitIndexValue splits index value into segments. 1119 func splitIndexValue(value []byte) (segs indexValueSegments) { 1120 tailLen := int(value[0]) 1121 tail := value[len(value)-tailLen:] 1122 value = value[1 : len(value)-tailLen] 1123 if len(tail) >= 8 { 1124 segs.intHandle = tail[:8] 1125 } 1126 if len(value) > 0 && value[0] == CommonHandleFlag { 1127 handleLen := uint16(value[1])<<8 + uint16(value[2]) 1128 handleEndOff := 3 + handleLen 1129 segs.commonHandle = value[3:handleEndOff] 1130 value = value[handleEndOff:] 1131 } 1132 if len(value) > 0 && value[0] == PartitionIDFlag { 1133 segs.partitionID = value[1:9] 1134 value = value[9:] 1135 } 1136 if len(value) > 0 && value[0] == RestoreDataFlag { 1137 segs.restoredValues = value 1138 } 1139 return 1140 } 1141 1142 // decodeIndexEkvGeneral decodes index key value pair of new layout in an extensible way. 1143 func decodeIndexEkvGeneral(key, value []byte, defcausLen int, hdStatus HandleStatus, columns []rowcodec.DefCausInfo) ([][]byte, error) { 1144 var resultValues [][]byte 1145 var keySuffix []byte 1146 var handle ekv.Handle 1147 var err error 1148 segs := splitIndexValue(value) 1149 resultValues, keySuffix, err = CutIndexKeyNew(key, defcausLen) 1150 if err != nil { 1151 return nil, err 1152 } 1153 if segs.restoredValues != nil { // new collation 1154 resultValues, err = decodeRestoredValues(columns[:defcausLen], segs.restoredValues) 1155 if err != nil { 1156 return nil, err 1157 } 1158 } 1159 if hdStatus == HandleNotNeeded { 1160 return resultValues, nil 1161 } 1162 1163 if segs.intHandle != nil { 1164 // In unique int handle index. 1165 handle = decodeIntHandleInIndexValue(segs.intHandle) 1166 } else if segs.commonHandle != nil { 1167 // In unique common handle index. 1168 handle, err = decodeHandleInIndexKey(segs.commonHandle) 1169 if err != nil { 1170 return nil, err 1171 } 1172 } else { 1173 // In non-unique index, decode handle in keySuffix 1174 handle, err = decodeHandleInIndexKey(keySuffix) 1175 if err != nil { 1176 return nil, err 1177 } 1178 } 1179 handleBytes, err := reEncodeHandle(handle, hdStatus == HandleIsUnsigned) 1180 if err != nil { 1181 return nil, err 1182 } 1183 resultValues = append(resultValues, handleBytes...) 1184 if segs.partitionID != nil { 1185 _, pid, err := codec.DecodeInt(segs.partitionID) 1186 if err != nil { 1187 return nil, err 1188 } 1189 causet := types.NewIntCauset(pid) 1190 pidBytes, err := codec.EncodeValue(nil, nil, causet) 1191 if err != nil { 1192 return nil, err 1193 } 1194 resultValues = append(resultValues, pidBytes) 1195 } 1196 return resultValues, nil 1197 }