github.com/matrixorigin/matrixone@v0.7.0/pkg/container/types/tuple.go

github.com/matrixorigin/matrixone@v0.7.0/pkg/container/types/tuple.go (about)

     1  /*
     2   * tuple.go
     3   *
     4   * This source file is part of the FoundationDB open source project
     5   *
     6   * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
     7   *
     8   * Licensed under the Apache License, Version 2.0 (the "License");
     9   * you may not use this file except in compliance with the License.
    10   * You may obtain a copy of the License at
    11   *
    12   *     http://www.apache.org/licenses/LICENSE-2.0
    13   *
    14   * Unless required by applicable law or agreed to in writing, software
    15   * distributed under the License is distributed on an "AS IS" BASIS,
    16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    17   * See the License for the specific language governing permissions and
    18   * limitations under the License.
    19   *
    20   * Portions of this file are additionally subject to the following
    21   * copyright.
    22   *
    23   * Copyright (C) 2022 Matrix Origin.
    24   *
    25   * Modified the behavior of the tuple.
    26   */
    27  
    28  package types
    29  
    30  import (
    31  	"bytes"
    32  	"encoding/binary"
    33  	"fmt"
    34  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    35  	"math"
    36  
    37  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    38  )
    39  
    40  /*
    41   * Tuple type is used for encoding multiColumns to single column
    42   * for example:
    43   * we create table (a int8, b int8, primary key(a, b))
    44   * we need to create composite primary key to combine a and b
    45   * we have one method to generate the primary key([]byte):
    46   *    var a int8 = 1, var b int8 = 1
    47   *    packer := newPacker()
    48   *    packer.EncodeInt8(a)
    49   *    packer.EncodeInt8(b)
    50   *    var byteArr []byte
    51   *    byteArr = packer.GetBuf()
    52   * we have one method recover from []byte to tuple
    53   *    var tuple Tuple
    54   *    tuple, err = Unpack(byteArr)
    55   *    tuple[0] = 1
    56   *    tuple[1] = 1
    57   *
    58   * in the composite_primary_key_util.go, we default use method2 to encode tupleElement
    59   */
    60  
    61  type TupleElement interface{}
    62  
    63  type Tuple []TupleElement
    64  
    65  func (t Tuple) String() string {
    66  	return printTuple(t)
    67  }
    68  
    69  func printTuple(tuple Tuple) string {
    70  	res := "("
    71  	for i, t := range tuple {
    72  		switch t := t.(type) {
    73  		case bool:
    74  			res += fmt.Sprintf("(bool: %v)", t)
    75  		case int8:
    76  			res += fmt.Sprintf("(int8: %v)", t)
    77  		case int16:
    78  			res += fmt.Sprintf("(int16: %v)", t)
    79  		case int32:
    80  			res += fmt.Sprintf("(int32: %v)", t)
    81  		case int64:
    82  			res += fmt.Sprintf("(int64: %v)", t)
    83  		case uint8:
    84  			res += fmt.Sprintf("(uint8: %v)", t)
    85  		case uint16:
    86  			res += fmt.Sprintf("(uint16: %v)", t)
    87  		case uint32:
    88  			res += fmt.Sprintf("(uint32: %v)", t)
    89  		case uint64:
    90  			res += fmt.Sprintf("(uint64: %v)", t)
    91  		case Date:
    92  			res += fmt.Sprintf("(date: %v)", t.String())
    93  		case Time:
    94  			res += fmt.Sprintf("(time: %v)", t.String())
    95  		case Datetime:
    96  			res += fmt.Sprintf("(datetime: %v)", t.String())
    97  		case Timestamp:
    98  			res += fmt.Sprintf("(timestamp: %v)", t.String())
    99  		case Decimal64:
   100  			res += fmt.Sprintf("(decimal64: %v)", t.String())
   101  		case Decimal128:
   102  			res += fmt.Sprintf("(decimal128: %v)", t.String())
   103  		case []byte:
   104  			res += fmt.Sprintf("([]byte: %v)", t)
   105  		case float32:
   106  			res += fmt.Sprintf("(float32: %v)", t)
   107  		case float64:
   108  			res += fmt.Sprintf("(float64: %v)", t)
   109  		default:
   110  			res += fmt.Sprintf("(unorganizedType: %v)", t)
   111  		}
   112  		if i != len(tuple)-1 {
   113  			res += ","
   114  		}
   115  	}
   116  	res += ")"
   117  	return res
   118  }
   119  
   120  const nilCode = 0x00
   121  const bytesCode = 0x01
   122  const intZeroCode = 0x14
   123  const float32Code = 0x20
   124  const float64Code = 0x21
   125  const falseCode = 0x26
   126  const trueCode = 0x27
   127  const int8Code = 0x28
   128  const int16Code = 0x29
   129  const int32Code = 0x3a
   130  const int64Code = 0x3b
   131  const uint8Code = 0x3c
   132  const uint16Code = 0x3d
   133  const uint32Code = 0x3e
   134  const uint64Code = 0x40
   135  const dateCode = 0x41
   136  const datetimeCode = 0x42
   137  const timestampCode = 0x43
   138  const decimal64Code = 0x44
   139  const decimal128Code = 0x45
   140  const stringTypeCode = 0x46
   141  const timeCode = 0x47 // TODO: reorder the list to put timeCode next to date type code?
   142  
   143  var sizeLimits = []uint64{
   144  	1<<(0*8) - 1,
   145  	1<<(1*8) - 1,
   146  	1<<(2*8) - 1,
   147  	1<<(3*8) - 1,
   148  	1<<(4*8) - 1,
   149  	1<<(5*8) - 1,
   150  	1<<(6*8) - 1,
   151  	1<<(7*8) - 1,
   152  	1<<(8*8) - 1,
   153  }
   154  
   155  func bisectLeft(u uint64) int {
   156  	var n int
   157  	for sizeLimits[n] < u {
   158  		n++
   159  	}
   160  	return n
   161  }
   162  
   163  func adjustFloatBytes(b []byte, encode bool) {
   164  	if (encode && b[0]&0x80 != 0x00) || (!encode && b[0]&0x80 == 0x00) {
   165  		// Negative numbers: flip all of the bytes.
   166  		for i := 0; i < len(b); i++ {
   167  			b[i] = b[i] ^ 0xff
   168  		}
   169  	} else {
   170  		// Positive number: flip just the sign bit.
   171  		b[0] = b[0] ^ 0x80
   172  	}
   173  }
   174  
   175  const PackerMemUnit = 64
   176  
   177  type packer struct {
   178  	buf      []byte
   179  	size     int
   180  	capacity int
   181  	mp       *mpool.MPool
   182  }
   183  
   184  func NewPacker(mp *mpool.MPool) *packer {
   185  	bytes, err := mp.Alloc(PackerMemUnit)
   186  	if err != nil {
   187  		panic(err)
   188  	}
   189  	return &packer{
   190  		buf:      bytes,
   191  		size:     0,
   192  		capacity: PackerMemUnit,
   193  		mp:       mp,
   194  	}
   195  }
   196  
   197  func NewPackerArray(length int, mp *mpool.MPool) []*packer {
   198  	packerArr := make([]*packer, length)
   199  	for num := range packerArr {
   200  		bytes, err := mp.Alloc(PackerMemUnit)
   201  		if err != nil {
   202  			panic(err)
   203  		}
   204  		packerArr[num] = &packer{
   205  			buf:      bytes,
   206  			size:     0,
   207  			capacity: PackerMemUnit,
   208  			mp:       mp,
   209  		}
   210  	}
   211  	return packerArr
   212  }
   213  
   214  func (p *packer) FreeMem() {
   215  	if p.buf != nil {
   216  		p.mp.Free(p.buf)
   217  		p.size = 0
   218  		p.capacity = 0
   219  		p.buf = nil
   220  	}
   221  }
   222  
   223  func (p *packer) putByte(b byte) {
   224  	if p.size < p.capacity {
   225  		p.buf[p.size] = b
   226  		p.size++
   227  	} else {
   228  		p.buf, _ = p.mp.Grow(p.buf, p.capacity+PackerMemUnit)
   229  		p.capacity += PackerMemUnit
   230  		p.buf[p.size] = b
   231  		p.size++
   232  	}
   233  }
   234  
   235  func (p *packer) putBytes(bs []byte) {
   236  	if p.size+len(bs) < p.capacity {
   237  		for _, b := range bs {
   238  			p.buf[p.size] = b
   239  			p.size++
   240  		}
   241  	} else {
   242  		incrementSize := ((len(bs) / PackerMemUnit) + 1) * PackerMemUnit
   243  		p.buf, _ = p.mp.Grow(p.buf, p.capacity+incrementSize)
   244  		p.capacity += incrementSize
   245  		for _, b := range bs {
   246  			p.buf[p.size] = b
   247  			p.size++
   248  		}
   249  	}
   250  }
   251  
   252  func (p *packer) putBytesNil(b []byte, i int) {
   253  	for i >= 0 {
   254  		p.putBytes(b[:i+1])
   255  		p.putByte(0xFF)
   256  		b = b[i+1:]
   257  		i = bytes.IndexByte(b, 0x00)
   258  	}
   259  	p.putBytes(b)
   260  }
   261  
   262  func (p *packer) encodeBytes(code byte, b []byte) {
   263  	p.putByte(code)
   264  	if i := bytes.IndexByte(b, 0x00); i >= 0 {
   265  		p.putBytesNil(b, i)
   266  	} else {
   267  		p.putBytes(b)
   268  	}
   269  	p.putByte(0x00)
   270  }
   271  
   272  func (p *packer) encodeUint(i uint64) {
   273  	if i == 0 {
   274  		p.putByte(intZeroCode)
   275  		return
   276  	}
   277  
   278  	n := bisectLeft(i)
   279  	var scratch [8]byte
   280  
   281  	p.putByte(byte(intZeroCode + n))
   282  	binary.BigEndian.PutUint64(scratch[:], i)
   283  
   284  	p.putBytes(scratch[8-n:])
   285  }
   286  
   287  func (p *packer) encodeInt(i int64) {
   288  	if i >= 0 {
   289  		p.encodeUint(uint64(i))
   290  		return
   291  	}
   292  
   293  	n := bisectLeft(uint64(-i))
   294  	var scratch [8]byte
   295  
   296  	p.putByte(byte(intZeroCode - n))
   297  	offsetEncoded := int64(sizeLimits[n]) + i
   298  	binary.BigEndian.PutUint64(scratch[:], uint64(offsetEncoded))
   299  
   300  	p.putBytes(scratch[8-n:])
   301  }
   302  
   303  func (p *packer) encodeFloat32(f float32) {
   304  	var scratch [4]byte
   305  	binary.BigEndian.PutUint32(scratch[:], math.Float32bits(f))
   306  	adjustFloatBytes(scratch[:], true)
   307  
   308  	p.putByte(float32Code)
   309  	p.putBytes(scratch[:])
   310  }
   311  
   312  func (p *packer) encodeFloat64(d float64) {
   313  	var scratch [8]byte
   314  	binary.BigEndian.PutUint64(scratch[:], math.Float64bits(d))
   315  	adjustFloatBytes(scratch[:], true)
   316  
   317  	p.putByte(float64Code)
   318  	p.putBytes(scratch[:])
   319  }
   320  
   321  func (p *packer) EncodeInt8(e int8) {
   322  	p.putByte(int8Code)
   323  	p.encodeInt(int64(e))
   324  }
   325  
   326  func (p *packer) EncodeInt16(e int16) {
   327  	p.putByte(int16Code)
   328  	p.encodeInt(int64(e))
   329  }
   330  
   331  func (p *packer) EncodeInt32(e int32) {
   332  	p.putByte(int32Code)
   333  	p.encodeInt(int64(e))
   334  }
   335  
   336  func (p *packer) EncodeInt64(e int64) {
   337  	p.putByte(int64Code)
   338  	p.encodeInt(e)
   339  }
   340  
   341  func (p *packer) EncodeUint8(e uint8) {
   342  	p.putByte(uint8Code)
   343  	p.encodeUint(uint64(e))
   344  }
   345  
   346  func (p *packer) EncodeUint16(e uint16) {
   347  	p.putByte(uint16Code)
   348  	p.encodeUint(uint64(e))
   349  }
   350  
   351  func (p *packer) EncodeUint32(e uint32) {
   352  	p.putByte(uint32Code)
   353  	p.encodeUint(uint64(e))
   354  }
   355  
   356  func (p *packer) EncodeUint64(e uint64) {
   357  	p.putByte(uint64Code)
   358  	p.encodeUint(e)
   359  }
   360  
   361  func (p *packer) EncodeFloat32(e float32) {
   362  	p.encodeFloat32(e)
   363  }
   364  
   365  func (p *packer) EncodeFloat64(e float64) {
   366  	p.encodeFloat64(e)
   367  }
   368  
   369  func (p *packer) EncodeBool(e bool) {
   370  	if e {
   371  		p.putByte(trueCode)
   372  	} else {
   373  		p.putByte(falseCode)
   374  	}
   375  }
   376  
   377  func (p *packer) EncodeDate(e Date) {
   378  	p.putByte(dateCode)
   379  	p.encodeInt(int64(e))
   380  }
   381  
   382  func (p *packer) EncodeTime(e Time) {
   383  	p.putByte(timeCode)
   384  	p.encodeInt(int64(e))
   385  }
   386  
   387  func (p *packer) EncodeDatetime(e Datetime) {
   388  	p.putByte(datetimeCode)
   389  	p.encodeInt(int64(e))
   390  }
   391  
   392  func (p *packer) EncodeTimestamp(e Timestamp) {
   393  	p.putByte(timestampCode)
   394  	p.encodeInt(int64(e))
   395  }
   396  
   397  func (p *packer) EncodeDecimal64(e Decimal64) {
   398  	p.putByte(decimal64Code)
   399  	b := [8]byte(e)
   400  	p.encodeBytes(bytesCode, b[:])
   401  }
   402  
   403  func (p *packer) EncodeDecimal128(e Decimal128) {
   404  	p.putByte(decimal128Code)
   405  	b := [16]byte(e)
   406  	p.encodeBytes(bytesCode, b[:])
   407  }
   408  
   409  func (p *packer) EncodeStringType(e []byte) {
   410  	p.putByte(stringTypeCode)
   411  	p.encodeBytes(bytesCode, e)
   412  }
   413  
   414  func (p *packer) GetBuf() []byte {
   415  	return p.buf[:p.size]
   416  }
   417  
   418  func findTerminator(b []byte) int {
   419  	bp := b
   420  	var length int
   421  
   422  	for {
   423  		idx := bytes.IndexByte(bp, 0x00)
   424  		length += idx
   425  		if idx+1 == len(bp) || bp[idx+1] != 0xFF {
   426  			break
   427  		}
   428  		length += 2
   429  		bp = bp[idx+2:]
   430  	}
   431  
   432  	return length
   433  }
   434  
   435  func decodeBytes(b []byte) ([]byte, int) {
   436  	idx := findTerminator(b[1:])
   437  	return bytes.Replace(b[1:idx+1], []byte{0x00, 0xFF}, []byte{0x00}, -1), idx + 2
   438  }
   439  
   440  func decodeInt(code byte, b []byte) (interface{}, int) {
   441  	if b[0] == intZeroCode {
   442  		switch code {
   443  		case int8Code:
   444  			return int8(0), 1
   445  		case int16Code:
   446  			return int16(0), 1
   447  		case int32Code:
   448  			return int32(0), 1
   449  		case dateCode:
   450  			return Date(0), 1
   451  		case datetimeCode:
   452  			return Datetime(0), 1
   453  		case timestampCode:
   454  			return Timestamp(0), 1
   455  		default:
   456  			return int64(0), 1
   457  		}
   458  	}
   459  
   460  	var neg bool
   461  
   462  	n := int(b[0]) - intZeroCode
   463  	if n < 0 {
   464  		n = -n
   465  		neg = true
   466  	}
   467  
   468  	bp := make([]byte, 8)
   469  	copy(bp[8-n:], b[1:n+1])
   470  
   471  	var ret int64
   472  	binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret)
   473  
   474  	if neg {
   475  		switch code {
   476  		case int8Code:
   477  			return int8(ret - int64(sizeLimits[n])), n + 1
   478  		case int16Code:
   479  			return int16(ret - int64(sizeLimits[n])), n + 1
   480  		case int32Code:
   481  			return int32(ret - int64(sizeLimits[n])), n + 1
   482  		case dateCode:
   483  			return Date(ret - int64(sizeLimits[n])), n + 1
   484  		case datetimeCode:
   485  			return Datetime(ret - int64(sizeLimits[n])), n + 1
   486  		case timestampCode:
   487  			return Timestamp(ret - int64(sizeLimits[n])), n + 1
   488  		default:
   489  			return ret - int64(sizeLimits[n]), n + 1
   490  		}
   491  	}
   492  	switch code {
   493  	case int8Code:
   494  		return int8(ret), n + 1
   495  	case int16Code:
   496  		return int16(ret), n + 1
   497  	case int32Code:
   498  		return int32(ret), n + 1
   499  	case dateCode:
   500  		return Date(ret), n + 1
   501  	case datetimeCode:
   502  		return Datetime(ret), n + 1
   503  	case timestampCode:
   504  		return Timestamp(ret), n + 1
   505  	default:
   506  		return ret, n + 1
   507  	}
   508  }
   509  
   510  func decodeUint(code byte, b []byte) (interface{}, int) {
   511  	if b[0] == intZeroCode {
   512  		switch code {
   513  		case uint8Code:
   514  			return uint8(0), 1
   515  		case uint16Code:
   516  			return uint16(0), 1
   517  		case uint32Code:
   518  			return uint32(0), 1
   519  		}
   520  		return uint64(0), 1
   521  	}
   522  	n := int(b[0]) - intZeroCode
   523  
   524  	bp := make([]byte, 8)
   525  	copy(bp[8-n:], b[1:n+1])
   526  
   527  	var ret uint64
   528  	binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret)
   529  
   530  	switch code {
   531  	case uint8Code:
   532  		return uint8(ret), n + 1
   533  	case uint16Code:
   534  		return uint16(ret), n + 1
   535  	case uint32Code:
   536  		return uint32(ret), n + 1
   537  	default:
   538  		return ret, n + 1
   539  	}
   540  }
   541  
   542  func decodeFloat32(b []byte) (float32, int) {
   543  	bp := make([]byte, 4)
   544  	copy(bp, b[1:])
   545  	adjustFloatBytes(bp, false)
   546  	var ret float32
   547  	binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret)
   548  	return ret, 5
   549  }
   550  
   551  func decodeFloat64(b []byte) (float64, int) {
   552  	bp := make([]byte, 8)
   553  	copy(bp, b[1:])
   554  	adjustFloatBytes(bp, false)
   555  	var ret float64
   556  	binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret)
   557  	return ret, 9
   558  }
   559  
   560  func decodeTuple(b []byte) (Tuple, int, error) {
   561  	var t Tuple
   562  
   563  	var i int
   564  
   565  	for i < len(b) {
   566  		var el interface{}
   567  		// used for type decimal64/128
   568  		var dEl []byte
   569  		var off int
   570  
   571  		switch {
   572  		case b[i] == nilCode:
   573  			el = nil
   574  			off = 1
   575  		case b[i] == int8Code:
   576  			el, off = decodeInt(int8Code, b[i+1:])
   577  			off += 1
   578  		case b[i] == int16Code:
   579  			el, off = decodeInt(int16Code, b[i+1:])
   580  			off += 1
   581  		case b[i] == int32Code:
   582  			el, off = decodeInt(int32Code, b[i+1:])
   583  			off += 1
   584  		case b[i] == int64Code:
   585  			el, off = decodeInt(int64Code, b[i+1:])
   586  			off += 1
   587  		case b[i] == uint8Code:
   588  			el, off = decodeUint(uint8Code, b[i+1:])
   589  			off += 1
   590  		case b[i] == uint16Code:
   591  			el, off = decodeUint(uint16Code, b[i+1:])
   592  			off += 1
   593  		case b[i] == uint32Code:
   594  			el, off = decodeUint(uint32Code, b[i+1:])
   595  			off += 1
   596  		case b[i] == uint64Code:
   597  			el, off = decodeUint(uint64Code, b[i+1:])
   598  			off += 1
   599  		case b[i] == trueCode:
   600  			el = true
   601  			off = 1
   602  		case b[i] == falseCode:
   603  			el = false
   604  			off = 1
   605  		case b[i] == float32Code:
   606  			el, off = decodeFloat32(b[i:])
   607  		case b[i] == float64Code:
   608  			el, off = decodeFloat64(b[i:])
   609  		case b[i] == dateCode:
   610  			el, off = decodeInt(dateCode, b[i+1:])
   611  			off += 1
   612  		case b[i] == datetimeCode:
   613  			el, off = decodeInt(datetimeCode, b[i+1:])
   614  			off += 1
   615  		case b[i] == timestampCode:
   616  			el, off = decodeInt(timestampCode, b[i+1:])
   617  			off += 1
   618  		case b[i] == decimal64Code:
   619  			dEl, off = decodeBytes(b[i+1:])
   620  			var bb [8]byte
   621  			copy(bb[:], dEl[:8])
   622  			el = Decimal64(bb)
   623  			off += 1
   624  		case b[i] == decimal128Code:
   625  			dEl, off = decodeBytes(b[i+1:])
   626  			var bb [16]byte
   627  			copy(bb[:], dEl[:16])
   628  			el = Decimal128(bb)
   629  			off += 1
   630  		case b[i] == stringTypeCode:
   631  			el, off = decodeBytes(b[i+1:])
   632  			off += 1
   633  		default:
   634  			return nil, i, moerr.NewInternalErrorNoCtx("unable to decode tuple element with unknown typecode %02x", b[i])
   635  		}
   636  		t = append(t, el)
   637  		i += off
   638  	}
   639  
   640  	return t, i, nil
   641  }
   642  
   643  func Unpack(b []byte) (Tuple, error) {
   644  	t, _, err := decodeTuple(b)
   645  	return t, err
   646  }