github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/soliton/chunk/codec.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package chunk
    15  
    16  import (
    17  	"encoding/binary"
    18  	"reflect"
    19  	"unsafe"
    20  
    21  	"github.com/cznic/mathutil"
    22  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    23  	"github.com/whtcorpsinc/milevadb/types"
    24  )
    25  
    26  // Codec is used to:
    27  // 1. encode a Chunk to a byte slice.
    28  // 2. decode a Chunk from a byte slice.
    29  type Codec struct {
    30  	// defCausTypes is used to check whether a DeferredCauset is fixed sized and what the
    31  	// fixed size for every element.
    32  	// NOTE: It's only used for decoding.
    33  	defCausTypes []*types.FieldType
    34  }
    35  
    36  // NewCodec creates a new Codec object for encode or decode a Chunk.
    37  func NewCodec(defCausTypes []*types.FieldType) *Codec {
    38  	return &Codec{defCausTypes}
    39  }
    40  
    41  // Encode encodes a Chunk to a byte slice.
    42  func (c *Codec) Encode(chk *Chunk) []byte {
    43  	buffer := make([]byte, 0, chk.MemoryUsage())
    44  	for _, defCaus := range chk.defCausumns {
    45  		buffer = c.encodeDeferredCauset(buffer, defCaus)
    46  	}
    47  	return buffer
    48  }
    49  
    50  func (c *Codec) encodeDeferredCauset(buffer []byte, defCaus *DeferredCauset) []byte {
    51  	var lenBuffer [4]byte
    52  	// encode length.
    53  	binary.LittleEndian.PutUint32(lenBuffer[:], uint32(defCaus.length))
    54  	buffer = append(buffer, lenBuffer[:4]...)
    55  
    56  	// encode nullCount.
    57  	binary.LittleEndian.PutUint32(lenBuffer[:], uint32(defCaus.nullCount()))
    58  	buffer = append(buffer, lenBuffer[:4]...)
    59  
    60  	// encode nullBitmap.
    61  	if defCaus.nullCount() > 0 {
    62  		numNullBitmapBytes := (defCaus.length + 7) / 8
    63  		buffer = append(buffer, defCaus.nullBitmap[:numNullBitmapBytes]...)
    64  	}
    65  
    66  	// encode offsets.
    67  	if !defCaus.isFixed() {
    68  		numOffsetBytes := (defCaus.length + 1) * 8
    69  		offsetBytes := i64SliceToBytes(defCaus.offsets)
    70  		buffer = append(buffer, offsetBytes[:numOffsetBytes]...)
    71  	}
    72  
    73  	// encode data.
    74  	buffer = append(buffer, defCaus.data...)
    75  	return buffer
    76  }
    77  
    78  func i64SliceToBytes(i64s []int64) (b []byte) {
    79  	if len(i64s) == 0 {
    80  		return nil
    81  	}
    82  	hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
    83  	hdr.Len = len(i64s) * 8
    84  	hdr.Cap = hdr.Len
    85  	hdr.Data = uintptr(unsafe.Pointer(&i64s[0]))
    86  	return b
    87  }
    88  
    89  // Decode decodes a Chunk from a byte slice, return the remained unused bytes.
    90  func (c *Codec) Decode(buffer []byte) (*Chunk, []byte) {
    91  	chk := &Chunk{}
    92  	for ordinal := 0; len(buffer) > 0; ordinal++ {
    93  		defCaus := &DeferredCauset{}
    94  		buffer = c.decodeDeferredCauset(buffer, defCaus, ordinal)
    95  		chk.defCausumns = append(chk.defCausumns, defCaus)
    96  	}
    97  	return chk, buffer
    98  }
    99  
   100  // DecodeToChunk decodes a Chunk from a byte slice, return the remained unused bytes.
   101  func (c *Codec) DecodeToChunk(buffer []byte, chk *Chunk) (remained []byte) {
   102  	for i := 0; i < len(chk.defCausumns); i++ {
   103  		buffer = c.decodeDeferredCauset(buffer, chk.defCausumns[i], i)
   104  	}
   105  	return buffer
   106  }
   107  
   108  // decodeDeferredCauset decodes a DeferredCauset from a byte slice, return the remained unused bytes.
   109  func (c *Codec) decodeDeferredCauset(buffer []byte, defCaus *DeferredCauset, ordinal int) (remained []byte) {
   110  	// Todo(Shenghui Wu): Optimize all data is null.
   111  	// decode length.
   112  	defCaus.length = int(binary.LittleEndian.Uint32(buffer))
   113  	buffer = buffer[4:]
   114  
   115  	// decode nullCount.
   116  	nullCount := int(binary.LittleEndian.Uint32(buffer))
   117  	buffer = buffer[4:]
   118  
   119  	// decode nullBitmap.
   120  	if nullCount > 0 {
   121  		numNullBitmapBytes := (defCaus.length + 7) / 8
   122  		defCaus.nullBitmap = buffer[:numNullBitmapBytes:numNullBitmapBytes]
   123  		buffer = buffer[numNullBitmapBytes:]
   124  	} else {
   125  		c.setAllNotNull(defCaus)
   126  	}
   127  
   128  	// decode offsets.
   129  	numFixedBytes := getFixedLen(c.defCausTypes[ordinal])
   130  	numDataBytes := int64(numFixedBytes * defCaus.length)
   131  	if numFixedBytes == -1 {
   132  		numOffsetBytes := (defCaus.length + 1) * 8
   133  		defCaus.offsets = bytesToI64Slice(buffer[:numOffsetBytes:numOffsetBytes])
   134  		buffer = buffer[numOffsetBytes:]
   135  		numDataBytes = defCaus.offsets[defCaus.length]
   136  	} else if cap(defCaus.elemBuf) < numFixedBytes {
   137  		defCaus.elemBuf = make([]byte, numFixedBytes)
   138  	}
   139  
   140  	// decode data.
   141  	defCaus.data = buffer[:numDataBytes:numDataBytes]
   142  	return buffer[numDataBytes:]
   143  }
   144  
   145  var allNotNullBitmap [128]byte
   146  
   147  func (c *Codec) setAllNotNull(defCaus *DeferredCauset) {
   148  	numNullBitmapBytes := (defCaus.length + 7) / 8
   149  	defCaus.nullBitmap = defCaus.nullBitmap[:0]
   150  	for i := 0; i < numNullBitmapBytes; {
   151  		numAppendBytes := mathutil.Min(numNullBitmapBytes-i, cap(allNotNullBitmap))
   152  		defCaus.nullBitmap = append(defCaus.nullBitmap, allNotNullBitmap[:numAppendBytes]...)
   153  		i += numAppendBytes
   154  	}
   155  }
   156  
   157  func bytesToI64Slice(b []byte) (i64s []int64) {
   158  	if len(b) == 0 {
   159  		return nil
   160  	}
   161  	hdr := (*reflect.SliceHeader)(unsafe.Pointer(&i64s))
   162  	hdr.Len = len(b) / 8
   163  	hdr.Cap = hdr.Len
   164  	hdr.Data = uintptr(unsafe.Pointer(&b[0]))
   165  	return i64s
   166  }
   167  
   168  // varElemLen indicates this DeferredCauset is a variable length DeferredCauset.
   169  const varElemLen = -1
   170  
   171  func getFixedLen(defCausType *types.FieldType) int {
   172  	switch defCausType.Tp {
   173  	case allegrosql.TypeFloat:
   174  		return 4
   175  	case allegrosql.TypeTiny, allegrosql.TypeShort, allegrosql.TypeInt24, allegrosql.TypeLong,
   176  		allegrosql.TypeLonglong, allegrosql.TypeDouble, allegrosql.TypeYear, allegrosql.TypeDuration:
   177  		return 8
   178  	case allegrosql.TypeDate, allegrosql.TypeDatetime, allegrosql.TypeTimestamp:
   179  		return sizeTime
   180  	case allegrosql.TypeNewDecimal:
   181  		return types.MyDecimalStructSize
   182  	default:
   183  		return varElemLen
   184  	}
   185  }
   186  
   187  // GetFixedLen get the memory size of a fixed-length type.
   188  // if defCausType is not fixed-length, it returns varElemLen, aka -1.
   189  func GetFixedLen(defCausType *types.FieldType) int {
   190  	return getFixedLen(defCausType)
   191  }
   192  
   193  // EstimateTypeWidth estimates the average width of values of the type.
   194  // This is used by the causet, which doesn't require absolutely correct results;
   195  // it's OK (and expected) to guess if we don't know for sure.
   196  //
   197  // mostly study from https://github.com/postgres/postgres/blob/REL_12_STABLE/src/backend/utils/cache/lsyscache.c#L2356
   198  func EstimateTypeWidth(defCausType *types.FieldType) int {
   199  	defCausLen := getFixedLen(defCausType)
   200  	// Easy if it's a fixed-width type
   201  	if defCausLen != varElemLen {
   202  		return defCausLen
   203  	}
   204  
   205  	defCausLen = defCausType.Flen
   206  	if defCausLen > 0 {
   207  		if defCausLen <= 32 {
   208  			return defCausLen
   209  		}
   210  		if defCausLen < 1000 {
   211  			return 32 + (defCausLen-32)/2 // assume 50%
   212  		}
   213  		/*
   214  		 * Beyond 1000, assume we're looking at something like
   215  		 * "varchar(10000)" where the limit isn't actually reached often, and
   216  		 * use a fixed estimate.
   217  		 */
   218  		return 32 + (1000-32)/2
   219  	}
   220  	// Oops, we have no idea ... wild guess time.
   221  	return 32
   222  }
   223  
   224  func init() {
   225  	for i := 0; i < 128; i++ {
   226  		allNotNullBitmap[i] = 0xFF
   227  	}
   228  }
   229  
   230  // CausetDecoder decodes the data returned from the interlock and stores the result in Chunk.
   231  // How CausetDecoder works:
   232  // 1. Initialization phase: Decode a whole input byte slice to CausetDecoder.intermChk(intermediate chunk) using Codec.Decode.
   233  //    intermChk is introduced to simplify the implementation of decode phase. This phase uses pointer operations with
   234  //    less CPU and memory cost.
   235  // 2. Decode phase:
   236  //    2.1 Set the number of rows to be decoded to a value that is a multiple of 8 and greater than
   237  //        `chk.RequiredRows() - chk.NumRows()`. This reduces the overhead of copying the srcDefCaus.nullBitMap into
   238  //        destDefCaus.nullBitMap.
   239  //    2.2 Append srcDefCaus.offsets to destDefCaus.offsets when the elements is of var-length type. And further adjust the
   240  //        offsets according to descDefCaus.offsets[destDefCaus.length]-srcDefCaus.offsets[0].
   241  //    2.3 Append srcDefCaus.nullBitMap to destDefCaus.nullBitMap.
   242  // 3. Go to step 1 when the input byte slice is consumed.
   243  type CausetDecoder struct {
   244  	intermChk    *Chunk
   245  	codec        *Codec
   246  	remainedRows int
   247  }
   248  
   249  // NewCausetDecoder creates a new CausetDecoder object for decode a Chunk.
   250  func NewCausetDecoder(chk *Chunk, defCausTypes []*types.FieldType) *CausetDecoder {
   251  	return &CausetDecoder{intermChk: chk, codec: NewCodec(defCausTypes), remainedRows: 0}
   252  }
   253  
   254  // Decode decodes multiple rows of CausetDecoder.intermChk and stores the result in chk.
   255  func (c *CausetDecoder) Decode(chk *Chunk) {
   256  	requiredRows := chk.RequiredRows() - chk.NumRows()
   257  	// Set the requiredRows to a multiple of 8.
   258  	requiredRows = (requiredRows + 7) >> 3 << 3
   259  	if requiredRows > c.remainedRows {
   260  		requiredRows = c.remainedRows
   261  	}
   262  	for i := 0; i < chk.NumDefCauss(); i++ {
   263  		c.decodeDeferredCauset(chk, i, requiredRows)
   264  	}
   265  	c.remainedRows -= requiredRows
   266  }
   267  
   268  // Reset decodes data and causetstore the result in CausetDecoder.intermChk. This decode phase uses pointer operations with less
   269  // CPU and memory costs.
   270  func (c *CausetDecoder) Reset(data []byte) {
   271  	c.codec.DecodeToChunk(data, c.intermChk)
   272  	c.remainedRows = c.intermChk.NumRows()
   273  }
   274  
   275  // IsFinished indicates whether CausetDecoder.intermChk has been dried up.
   276  func (c *CausetDecoder) IsFinished() bool {
   277  	return c.remainedRows == 0
   278  }
   279  
   280  // RemainedRows indicates CausetDecoder.intermChk has remained rows.
   281  func (c *CausetDecoder) RemainedRows() int {
   282  	return c.remainedRows
   283  }
   284  
   285  // ReuseIntermChk swaps `CausetDecoder.intermChk` with `chk` directly when `CausetDecoder.intermChk.NumRows()` is no less
   286  // than `chk.requiredRows * factor` where `factor` is 0.8 now. This can avoid the overhead of appending the
   287  // data from `CausetDecoder.intermChk` to `chk`. Moreover, the defCausumn.offsets needs to be further adjusted
   288  // according to defCausumn.offset[0].
   289  func (c *CausetDecoder) ReuseIntermChk(chk *Chunk) {
   290  	for i, defCaus := range c.intermChk.defCausumns {
   291  		defCaus.length = c.remainedRows
   292  		elemLen := getFixedLen(c.codec.defCausTypes[i])
   293  		if elemLen == varElemLen {
   294  			// For var-length types, we need to adjust the offsets before reuse.
   295  			if deltaOffset := defCaus.offsets[0]; deltaOffset != 0 {
   296  				for j := 0; j < len(defCaus.offsets); j++ {
   297  					defCaus.offsets[j] -= deltaOffset
   298  				}
   299  			}
   300  		}
   301  	}
   302  	chk.SwapDeferredCausets(c.intermChk)
   303  	c.remainedRows = 0
   304  }
   305  
   306  func (c *CausetDecoder) decodeDeferredCauset(chk *Chunk, ordinal int, requiredRows int) {
   307  	elemLen := getFixedLen(c.codec.defCausTypes[ordinal])
   308  	numDataBytes := int64(elemLen * requiredRows)
   309  	srcDefCaus := c.intermChk.defCausumns[ordinal]
   310  	destDefCaus := chk.defCausumns[ordinal]
   311  
   312  	if elemLen == varElemLen {
   313  		// For var-length types, we need to adjust the offsets after appending to destDefCaus.
   314  		numDataBytes = srcDefCaus.offsets[requiredRows] - srcDefCaus.offsets[0]
   315  		deltaOffset := destDefCaus.offsets[destDefCaus.length] - srcDefCaus.offsets[0]
   316  		destDefCaus.offsets = append(destDefCaus.offsets, srcDefCaus.offsets[1:requiredRows+1]...)
   317  		for i := destDefCaus.length + 1; i <= destDefCaus.length+requiredRows; i++ {
   318  			destDefCaus.offsets[i] = destDefCaus.offsets[i] + deltaOffset
   319  		}
   320  		srcDefCaus.offsets = srcDefCaus.offsets[requiredRows:]
   321  	}
   322  
   323  	numNullBitmapBytes := (requiredRows + 7) >> 3
   324  	if destDefCaus.length%8 == 0 {
   325  		destDefCaus.nullBitmap = append(destDefCaus.nullBitmap, srcDefCaus.nullBitmap[:numNullBitmapBytes]...)
   326  	} else {
   327  		destDefCaus.appendMultiSameNullBitmap(false, requiredRows)
   328  		bitMapLen := len(destDefCaus.nullBitmap)
   329  		// bitOffset indicates the number of valid bits in destDefCaus.nullBitmap's last byte.
   330  		bitOffset := destDefCaus.length % 8
   331  		startIdx := (destDefCaus.length - 1) >> 3
   332  		for i := 0; i < numNullBitmapBytes; i++ {
   333  			destDefCaus.nullBitmap[startIdx+i] |= srcDefCaus.nullBitmap[i] << bitOffset
   334  			// The high order 8-bitOffset bits in `srcDefCaus.nullBitmap[i]` should be appended to the low order of the next slot.
   335  			if startIdx+i+1 < bitMapLen {
   336  				destDefCaus.nullBitmap[startIdx+i+1] |= srcDefCaus.nullBitmap[i] >> (8 - bitOffset)
   337  			}
   338  		}
   339  	}
   340  	// Set all the redundant bits in the last slot of destDefCaus.nullBitmap to 0.
   341  	numRedundantBits := uint(len(destDefCaus.nullBitmap)*8 - destDefCaus.length - requiredRows)
   342  	bitMask := byte(1<<(8-numRedundantBits)) - 1
   343  	destDefCaus.nullBitmap[len(destDefCaus.nullBitmap)-1] &= bitMask
   344  
   345  	srcDefCaus.nullBitmap = srcDefCaus.nullBitmap[numNullBitmapBytes:]
   346  	destDefCaus.length += requiredRows
   347  
   348  	destDefCaus.data = append(destDefCaus.data, srcDefCaus.data[:numDataBytes]...)
   349  	srcDefCaus.data = srcDefCaus.data[numDataBytes:]
   350  }