github.com/matrixorigin/matrixone@v0.7.0/pkg/container/types/tuple.go (about) 1 /* 2 * tuple.go 3 * 4 * This source file is part of the FoundationDB open source project 5 * 6 * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 * 20 * Portions of this file are additionally subject to the following 21 * copyright. 22 * 23 * Copyright (C) 2022 Matrix Origin. 24 * 25 * Modified the behavior of the tuple. 26 */ 27 28 package types 29 30 import ( 31 "bytes" 32 "encoding/binary" 33 "fmt" 34 "github.com/matrixorigin/matrixone/pkg/common/mpool" 35 "math" 36 37 "github.com/matrixorigin/matrixone/pkg/common/moerr" 38 ) 39 40 /* 41 * Tuple type is used for encoding multiColumns to single column 42 * for example: 43 * we create table (a int8, b int8, primary key(a, b)) 44 * we need to create composite primary key to combine a and b 45 * we have one method to generate the primary key([]byte): 46 * var a int8 = 1, var b int8 = 1 47 * packer := newPacker() 48 * packer.EncodeInt8(a) 49 * packer.EncodeInt8(b) 50 * var byteArr []byte 51 * byteArr = packer.GetBuf() 52 * we have one method recover from []byte to tuple 53 * var tuple Tuple 54 * tuple, err = Unpack(byteArr) 55 * tuple[0] = 1 56 * tuple[1] = 1 57 * 58 * in the composite_primary_key_util.go, we default use method2 to encode tupleElement 59 */ 60 61 type TupleElement interface{} 62 63 type Tuple []TupleElement 64 65 func (t Tuple) String() string { 66 return printTuple(t) 67 } 68 69 func printTuple(tuple Tuple) string { 70 res := "(" 71 for i, t := range tuple { 72 switch t := t.(type) { 73 case bool: 74 res += fmt.Sprintf("(bool: %v)", t) 75 case int8: 76 res += fmt.Sprintf("(int8: %v)", t) 77 case int16: 78 res += fmt.Sprintf("(int16: %v)", t) 79 case int32: 80 res += fmt.Sprintf("(int32: %v)", t) 81 case int64: 82 res += fmt.Sprintf("(int64: %v)", t) 83 case uint8: 84 res += fmt.Sprintf("(uint8: %v)", t) 85 case uint16: 86 res += fmt.Sprintf("(uint16: %v)", t) 87 case uint32: 88 res += fmt.Sprintf("(uint32: %v)", t) 89 case uint64: 90 res += fmt.Sprintf("(uint64: %v)", t) 91 case Date: 92 res += fmt.Sprintf("(date: %v)", t.String()) 93 case Time: 94 res += fmt.Sprintf("(time: %v)", t.String()) 95 case Datetime: 96 res += fmt.Sprintf("(datetime: %v)", t.String()) 97 case Timestamp: 98 res += fmt.Sprintf("(timestamp: %v)", t.String()) 99 case Decimal64: 100 res += fmt.Sprintf("(decimal64: %v)", t.String()) 101 case Decimal128: 102 res += fmt.Sprintf("(decimal128: %v)", t.String()) 103 case []byte: 104 res += fmt.Sprintf("([]byte: %v)", t) 105 case float32: 106 res += fmt.Sprintf("(float32: %v)", t) 107 case float64: 108 res += fmt.Sprintf("(float64: %v)", t) 109 default: 110 res += fmt.Sprintf("(unorganizedType: %v)", t) 111 } 112 if i != len(tuple)-1 { 113 res += "," 114 } 115 } 116 res += ")" 117 return res 118 } 119 120 const nilCode = 0x00 121 const bytesCode = 0x01 122 const intZeroCode = 0x14 123 const float32Code = 0x20 124 const float64Code = 0x21 125 const falseCode = 0x26 126 const trueCode = 0x27 127 const int8Code = 0x28 128 const int16Code = 0x29 129 const int32Code = 0x3a 130 const int64Code = 0x3b 131 const uint8Code = 0x3c 132 const uint16Code = 0x3d 133 const uint32Code = 0x3e 134 const uint64Code = 0x40 135 const dateCode = 0x41 136 const datetimeCode = 0x42 137 const timestampCode = 0x43 138 const decimal64Code = 0x44 139 const decimal128Code = 0x45 140 const stringTypeCode = 0x46 141 const timeCode = 0x47 // TODO: reorder the list to put timeCode next to date type code? 142 143 var sizeLimits = []uint64{ 144 1<<(0*8) - 1, 145 1<<(1*8) - 1, 146 1<<(2*8) - 1, 147 1<<(3*8) - 1, 148 1<<(4*8) - 1, 149 1<<(5*8) - 1, 150 1<<(6*8) - 1, 151 1<<(7*8) - 1, 152 1<<(8*8) - 1, 153 } 154 155 func bisectLeft(u uint64) int { 156 var n int 157 for sizeLimits[n] < u { 158 n++ 159 } 160 return n 161 } 162 163 func adjustFloatBytes(b []byte, encode bool) { 164 if (encode && b[0]&0x80 != 0x00) || (!encode && b[0]&0x80 == 0x00) { 165 // Negative numbers: flip all of the bytes. 166 for i := 0; i < len(b); i++ { 167 b[i] = b[i] ^ 0xff 168 } 169 } else { 170 // Positive number: flip just the sign bit. 171 b[0] = b[0] ^ 0x80 172 } 173 } 174 175 const PackerMemUnit = 64 176 177 type packer struct { 178 buf []byte 179 size int 180 capacity int 181 mp *mpool.MPool 182 } 183 184 func NewPacker(mp *mpool.MPool) *packer { 185 bytes, err := mp.Alloc(PackerMemUnit) 186 if err != nil { 187 panic(err) 188 } 189 return &packer{ 190 buf: bytes, 191 size: 0, 192 capacity: PackerMemUnit, 193 mp: mp, 194 } 195 } 196 197 func NewPackerArray(length int, mp *mpool.MPool) []*packer { 198 packerArr := make([]*packer, length) 199 for num := range packerArr { 200 bytes, err := mp.Alloc(PackerMemUnit) 201 if err != nil { 202 panic(err) 203 } 204 packerArr[num] = &packer{ 205 buf: bytes, 206 size: 0, 207 capacity: PackerMemUnit, 208 mp: mp, 209 } 210 } 211 return packerArr 212 } 213 214 func (p *packer) FreeMem() { 215 if p.buf != nil { 216 p.mp.Free(p.buf) 217 p.size = 0 218 p.capacity = 0 219 p.buf = nil 220 } 221 } 222 223 func (p *packer) putByte(b byte) { 224 if p.size < p.capacity { 225 p.buf[p.size] = b 226 p.size++ 227 } else { 228 p.buf, _ = p.mp.Grow(p.buf, p.capacity+PackerMemUnit) 229 p.capacity += PackerMemUnit 230 p.buf[p.size] = b 231 p.size++ 232 } 233 } 234 235 func (p *packer) putBytes(bs []byte) { 236 if p.size+len(bs) < p.capacity { 237 for _, b := range bs { 238 p.buf[p.size] = b 239 p.size++ 240 } 241 } else { 242 incrementSize := ((len(bs) / PackerMemUnit) + 1) * PackerMemUnit 243 p.buf, _ = p.mp.Grow(p.buf, p.capacity+incrementSize) 244 p.capacity += incrementSize 245 for _, b := range bs { 246 p.buf[p.size] = b 247 p.size++ 248 } 249 } 250 } 251 252 func (p *packer) putBytesNil(b []byte, i int) { 253 for i >= 0 { 254 p.putBytes(b[:i+1]) 255 p.putByte(0xFF) 256 b = b[i+1:] 257 i = bytes.IndexByte(b, 0x00) 258 } 259 p.putBytes(b) 260 } 261 262 func (p *packer) encodeBytes(code byte, b []byte) { 263 p.putByte(code) 264 if i := bytes.IndexByte(b, 0x00); i >= 0 { 265 p.putBytesNil(b, i) 266 } else { 267 p.putBytes(b) 268 } 269 p.putByte(0x00) 270 } 271 272 func (p *packer) encodeUint(i uint64) { 273 if i == 0 { 274 p.putByte(intZeroCode) 275 return 276 } 277 278 n := bisectLeft(i) 279 var scratch [8]byte 280 281 p.putByte(byte(intZeroCode + n)) 282 binary.BigEndian.PutUint64(scratch[:], i) 283 284 p.putBytes(scratch[8-n:]) 285 } 286 287 func (p *packer) encodeInt(i int64) { 288 if i >= 0 { 289 p.encodeUint(uint64(i)) 290 return 291 } 292 293 n := bisectLeft(uint64(-i)) 294 var scratch [8]byte 295 296 p.putByte(byte(intZeroCode - n)) 297 offsetEncoded := int64(sizeLimits[n]) + i 298 binary.BigEndian.PutUint64(scratch[:], uint64(offsetEncoded)) 299 300 p.putBytes(scratch[8-n:]) 301 } 302 303 func (p *packer) encodeFloat32(f float32) { 304 var scratch [4]byte 305 binary.BigEndian.PutUint32(scratch[:], math.Float32bits(f)) 306 adjustFloatBytes(scratch[:], true) 307 308 p.putByte(float32Code) 309 p.putBytes(scratch[:]) 310 } 311 312 func (p *packer) encodeFloat64(d float64) { 313 var scratch [8]byte 314 binary.BigEndian.PutUint64(scratch[:], math.Float64bits(d)) 315 adjustFloatBytes(scratch[:], true) 316 317 p.putByte(float64Code) 318 p.putBytes(scratch[:]) 319 } 320 321 func (p *packer) EncodeInt8(e int8) { 322 p.putByte(int8Code) 323 p.encodeInt(int64(e)) 324 } 325 326 func (p *packer) EncodeInt16(e int16) { 327 p.putByte(int16Code) 328 p.encodeInt(int64(e)) 329 } 330 331 func (p *packer) EncodeInt32(e int32) { 332 p.putByte(int32Code) 333 p.encodeInt(int64(e)) 334 } 335 336 func (p *packer) EncodeInt64(e int64) { 337 p.putByte(int64Code) 338 p.encodeInt(e) 339 } 340 341 func (p *packer) EncodeUint8(e uint8) { 342 p.putByte(uint8Code) 343 p.encodeUint(uint64(e)) 344 } 345 346 func (p *packer) EncodeUint16(e uint16) { 347 p.putByte(uint16Code) 348 p.encodeUint(uint64(e)) 349 } 350 351 func (p *packer) EncodeUint32(e uint32) { 352 p.putByte(uint32Code) 353 p.encodeUint(uint64(e)) 354 } 355 356 func (p *packer) EncodeUint64(e uint64) { 357 p.putByte(uint64Code) 358 p.encodeUint(e) 359 } 360 361 func (p *packer) EncodeFloat32(e float32) { 362 p.encodeFloat32(e) 363 } 364 365 func (p *packer) EncodeFloat64(e float64) { 366 p.encodeFloat64(e) 367 } 368 369 func (p *packer) EncodeBool(e bool) { 370 if e { 371 p.putByte(trueCode) 372 } else { 373 p.putByte(falseCode) 374 } 375 } 376 377 func (p *packer) EncodeDate(e Date) { 378 p.putByte(dateCode) 379 p.encodeInt(int64(e)) 380 } 381 382 func (p *packer) EncodeTime(e Time) { 383 p.putByte(timeCode) 384 p.encodeInt(int64(e)) 385 } 386 387 func (p *packer) EncodeDatetime(e Datetime) { 388 p.putByte(datetimeCode) 389 p.encodeInt(int64(e)) 390 } 391 392 func (p *packer) EncodeTimestamp(e Timestamp) { 393 p.putByte(timestampCode) 394 p.encodeInt(int64(e)) 395 } 396 397 func (p *packer) EncodeDecimal64(e Decimal64) { 398 p.putByte(decimal64Code) 399 b := [8]byte(e) 400 p.encodeBytes(bytesCode, b[:]) 401 } 402 403 func (p *packer) EncodeDecimal128(e Decimal128) { 404 p.putByte(decimal128Code) 405 b := [16]byte(e) 406 p.encodeBytes(bytesCode, b[:]) 407 } 408 409 func (p *packer) EncodeStringType(e []byte) { 410 p.putByte(stringTypeCode) 411 p.encodeBytes(bytesCode, e) 412 } 413 414 func (p *packer) GetBuf() []byte { 415 return p.buf[:p.size] 416 } 417 418 func findTerminator(b []byte) int { 419 bp := b 420 var length int 421 422 for { 423 idx := bytes.IndexByte(bp, 0x00) 424 length += idx 425 if idx+1 == len(bp) || bp[idx+1] != 0xFF { 426 break 427 } 428 length += 2 429 bp = bp[idx+2:] 430 } 431 432 return length 433 } 434 435 func decodeBytes(b []byte) ([]byte, int) { 436 idx := findTerminator(b[1:]) 437 return bytes.Replace(b[1:idx+1], []byte{0x00, 0xFF}, []byte{0x00}, -1), idx + 2 438 } 439 440 func decodeInt(code byte, b []byte) (interface{}, int) { 441 if b[0] == intZeroCode { 442 switch code { 443 case int8Code: 444 return int8(0), 1 445 case int16Code: 446 return int16(0), 1 447 case int32Code: 448 return int32(0), 1 449 case dateCode: 450 return Date(0), 1 451 case datetimeCode: 452 return Datetime(0), 1 453 case timestampCode: 454 return Timestamp(0), 1 455 default: 456 return int64(0), 1 457 } 458 } 459 460 var neg bool 461 462 n := int(b[0]) - intZeroCode 463 if n < 0 { 464 n = -n 465 neg = true 466 } 467 468 bp := make([]byte, 8) 469 copy(bp[8-n:], b[1:n+1]) 470 471 var ret int64 472 binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) 473 474 if neg { 475 switch code { 476 case int8Code: 477 return int8(ret - int64(sizeLimits[n])), n + 1 478 case int16Code: 479 return int16(ret - int64(sizeLimits[n])), n + 1 480 case int32Code: 481 return int32(ret - int64(sizeLimits[n])), n + 1 482 case dateCode: 483 return Date(ret - int64(sizeLimits[n])), n + 1 484 case datetimeCode: 485 return Datetime(ret - int64(sizeLimits[n])), n + 1 486 case timestampCode: 487 return Timestamp(ret - int64(sizeLimits[n])), n + 1 488 default: 489 return ret - int64(sizeLimits[n]), n + 1 490 } 491 } 492 switch code { 493 case int8Code: 494 return int8(ret), n + 1 495 case int16Code: 496 return int16(ret), n + 1 497 case int32Code: 498 return int32(ret), n + 1 499 case dateCode: 500 return Date(ret), n + 1 501 case datetimeCode: 502 return Datetime(ret), n + 1 503 case timestampCode: 504 return Timestamp(ret), n + 1 505 default: 506 return ret, n + 1 507 } 508 } 509 510 func decodeUint(code byte, b []byte) (interface{}, int) { 511 if b[0] == intZeroCode { 512 switch code { 513 case uint8Code: 514 return uint8(0), 1 515 case uint16Code: 516 return uint16(0), 1 517 case uint32Code: 518 return uint32(0), 1 519 } 520 return uint64(0), 1 521 } 522 n := int(b[0]) - intZeroCode 523 524 bp := make([]byte, 8) 525 copy(bp[8-n:], b[1:n+1]) 526 527 var ret uint64 528 binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) 529 530 switch code { 531 case uint8Code: 532 return uint8(ret), n + 1 533 case uint16Code: 534 return uint16(ret), n + 1 535 case uint32Code: 536 return uint32(ret), n + 1 537 default: 538 return ret, n + 1 539 } 540 } 541 542 func decodeFloat32(b []byte) (float32, int) { 543 bp := make([]byte, 4) 544 copy(bp, b[1:]) 545 adjustFloatBytes(bp, false) 546 var ret float32 547 binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) 548 return ret, 5 549 } 550 551 func decodeFloat64(b []byte) (float64, int) { 552 bp := make([]byte, 8) 553 copy(bp, b[1:]) 554 adjustFloatBytes(bp, false) 555 var ret float64 556 binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) 557 return ret, 9 558 } 559 560 func decodeTuple(b []byte) (Tuple, int, error) { 561 var t Tuple 562 563 var i int 564 565 for i < len(b) { 566 var el interface{} 567 // used for type decimal64/128 568 var dEl []byte 569 var off int 570 571 switch { 572 case b[i] == nilCode: 573 el = nil 574 off = 1 575 case b[i] == int8Code: 576 el, off = decodeInt(int8Code, b[i+1:]) 577 off += 1 578 case b[i] == int16Code: 579 el, off = decodeInt(int16Code, b[i+1:]) 580 off += 1 581 case b[i] == int32Code: 582 el, off = decodeInt(int32Code, b[i+1:]) 583 off += 1 584 case b[i] == int64Code: 585 el, off = decodeInt(int64Code, b[i+1:]) 586 off += 1 587 case b[i] == uint8Code: 588 el, off = decodeUint(uint8Code, b[i+1:]) 589 off += 1 590 case b[i] == uint16Code: 591 el, off = decodeUint(uint16Code, b[i+1:]) 592 off += 1 593 case b[i] == uint32Code: 594 el, off = decodeUint(uint32Code, b[i+1:]) 595 off += 1 596 case b[i] == uint64Code: 597 el, off = decodeUint(uint64Code, b[i+1:]) 598 off += 1 599 case b[i] == trueCode: 600 el = true 601 off = 1 602 case b[i] == falseCode: 603 el = false 604 off = 1 605 case b[i] == float32Code: 606 el, off = decodeFloat32(b[i:]) 607 case b[i] == float64Code: 608 el, off = decodeFloat64(b[i:]) 609 case b[i] == dateCode: 610 el, off = decodeInt(dateCode, b[i+1:]) 611 off += 1 612 case b[i] == datetimeCode: 613 el, off = decodeInt(datetimeCode, b[i+1:]) 614 off += 1 615 case b[i] == timestampCode: 616 el, off = decodeInt(timestampCode, b[i+1:]) 617 off += 1 618 case b[i] == decimal64Code: 619 dEl, off = decodeBytes(b[i+1:]) 620 var bb [8]byte 621 copy(bb[:], dEl[:8]) 622 el = Decimal64(bb) 623 off += 1 624 case b[i] == decimal128Code: 625 dEl, off = decodeBytes(b[i+1:]) 626 var bb [16]byte 627 copy(bb[:], dEl[:16]) 628 el = Decimal128(bb) 629 off += 1 630 case b[i] == stringTypeCode: 631 el, off = decodeBytes(b[i+1:]) 632 off += 1 633 default: 634 return nil, i, moerr.NewInternalErrorNoCtx("unable to decode tuple element with unknown typecode %02x", b[i]) 635 } 636 t = append(t, el) 637 i += off 638 } 639 640 return t, i, nil 641 } 642 643 func Unpack(b []byte) (Tuple, error) { 644 t, _, err := decodeTuple(b) 645 return t, err 646 }