github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/soliton/chunk/chunk.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package chunk
    15  
    16  import (
    17  	"reflect"
    18  	"unsafe"
    19  
    20  	"github.com/cznic/mathutil"
    21  	"github.com/whtcorpsinc/errors"
    22  	"github.com/whtcorpsinc/milevadb/types"
    23  	"github.com/whtcorpsinc/milevadb/types/json"
    24  )
    25  
    26  var msgErrSelNotNil = "The selection vector of Chunk is not nil. Please file a bug to the MilevaDB Team"
    27  
    28  // Chunk stores multiple rows of data in Apache Arrow format.
    29  // See https://arrow.apache.org/docs/format/DeferredCausetar.html#physical-memory-layout
    30  // Values are appended in compact format and can be directly accessed without decoding.
    31  // When the chunk is done processing, we can reuse the allocated memory by resetting it.
    32  type Chunk struct {
    33  	// sel indicates which rows are selected.
    34  	// If it is nil, all rows are selected.
    35  	sel []int
    36  
    37  	defCausumns []*DeferredCauset
    38  	// numVirtualRows indicates the number of virtual rows, which have zero DeferredCauset.
    39  	// It is used only when this Chunk doesn't hold any data, i.e. "len(defCausumns)==0".
    40  	numVirtualRows int
    41  	// capacity indicates the max number of rows this chunk can hold.
    42  	// TODO: replace all usages of capacity to requiredRows and remove this field
    43  	capacity int
    44  
    45  	// requiredRows indicates how many rows the parent interlock want.
    46  	requiredRows int
    47  }
    48  
    49  // Capacity constants.
    50  const (
    51  	InitialCapacity = 32
    52  	ZeroCapacity    = 0
    53  )
    54  
    55  // NewChunkWithCapacity creates a new chunk with field types and capacity.
    56  func NewChunkWithCapacity(fields []*types.FieldType, cap int) *Chunk {
    57  	return New(fields, cap, cap) //FIXME: in following PR.
    58  }
    59  
    60  // New creates a new chunk.
    61  //  cap: the limit for the max number of rows.
    62  //  maxChunkSize: the max limit for the number of rows.
    63  func New(fields []*types.FieldType, cap, maxChunkSize int) *Chunk {
    64  	chk := &Chunk{
    65  		defCausumns:  make([]*DeferredCauset, 0, len(fields)),
    66  		capacity: mathutil.Min(cap, maxChunkSize),
    67  		// set the default value of requiredRows to maxChunkSize to let chk.IsFull() behave
    68  		// like how we judge whether a chunk is full now, then the memex
    69  		// "chk.NumRows() < maxChunkSize"
    70  		// equals to "!chk.IsFull()".
    71  		requiredRows: maxChunkSize,
    72  	}
    73  
    74  	for _, f := range fields {
    75  		chk.defCausumns = append(chk.defCausumns, NewDeferredCauset(f, chk.capacity))
    76  	}
    77  
    78  	return chk
    79  }
    80  
    81  // renewWithCapacity creates a new Chunk based on an existing Chunk with capacity. The newly
    82  // created Chunk has the same data schemaReplicant with the old Chunk.
    83  func renewWithCapacity(chk *Chunk, cap, maxChunkSize int) *Chunk {
    84  	newChk := new(Chunk)
    85  	if chk.defCausumns == nil {
    86  		return newChk
    87  	}
    88  	newChk.defCausumns = renewDeferredCausets(chk.defCausumns, cap)
    89  	newChk.numVirtualRows = 0
    90  	newChk.capacity = cap
    91  	newChk.requiredRows = maxChunkSize
    92  	return newChk
    93  }
    94  
    95  // Renew creates a new Chunk based on an existing Chunk. The newly created Chunk
    96  // has the same data schemaReplicant with the old Chunk. The capacity of the new Chunk
    97  // might be doubled based on the capacity of the old Chunk and the maxChunkSize.
    98  //  chk: old chunk(often used in previous call).
    99  //  maxChunkSize: the limit for the max number of rows.
   100  func Renew(chk *Chunk, maxChunkSize int) *Chunk {
   101  	newCap := reCalcCapacity(chk, maxChunkSize)
   102  	return renewWithCapacity(chk, newCap, maxChunkSize)
   103  }
   104  
   105  // renewDeferredCausets creates the defCausumns of a Chunk. The capacity of the newly
   106  // created defCausumns is equal to cap.
   107  func renewDeferredCausets(oldDefCaus []*DeferredCauset, cap int) []*DeferredCauset {
   108  	defCausumns := make([]*DeferredCauset, 0, len(oldDefCaus))
   109  	for _, defCaus := range oldDefCaus {
   110  		defCausumns = append(defCausumns, newDeferredCauset(defCaus.typeSize(), cap))
   111  	}
   112  	return defCausumns
   113  }
   114  
   115  // renewEmpty creates a new Chunk based on an existing Chunk
   116  // but keep defCausumns empty.
   117  func renewEmpty(chk *Chunk) *Chunk {
   118  	newChk := &Chunk{
   119  		defCausumns:        nil,
   120  		numVirtualRows: chk.numVirtualRows,
   121  		capacity:       chk.capacity,
   122  		requiredRows:   chk.requiredRows,
   123  	}
   124  	if chk.sel != nil {
   125  		newChk.sel = make([]int, len(chk.sel))
   126  		copy(newChk.sel, chk.sel)
   127  	}
   128  	return newChk
   129  }
   130  
   131  // MemoryUsage returns the total memory usage of a Chunk in B.
   132  // We ignore the size of DeferredCauset.length and DeferredCauset.nullCount
   133  // since they have little effect of the total memory usage.
   134  func (c *Chunk) MemoryUsage() (sum int64) {
   135  	for _, defCaus := range c.defCausumns {
   136  		curDefCausMemUsage := int64(unsafe.Sizeof(*defCaus)) + int64(cap(defCaus.nullBitmap)) + int64(cap(defCaus.offsets)*4) + int64(cap(defCaus.data)) + int64(cap(defCaus.elemBuf))
   137  		sum += curDefCausMemUsage
   138  	}
   139  	return
   140  }
   141  
   142  // newFixedLenDeferredCauset creates a fixed length DeferredCauset with elemLen and initial data capacity.
   143  func newFixedLenDeferredCauset(elemLen, cap int) *DeferredCauset {
   144  	return &DeferredCauset{
   145  		elemBuf:    make([]byte, elemLen),
   146  		data:       make([]byte, 0, cap*elemLen),
   147  		nullBitmap: make([]byte, 0, (cap+7)>>3),
   148  	}
   149  }
   150  
   151  // newVarLenDeferredCauset creates a variable length DeferredCauset with initial data capacity.
   152  func newVarLenDeferredCauset(cap int, old *DeferredCauset) *DeferredCauset {
   153  	estimatedElemLen := 8
   154  	// For varLenDeferredCauset (e.g. varchar), the accurate length of an element is unknown.
   155  	// Therefore, in the first interlock.Next we use an experience value -- 8 (so it may make runtime.growslice)
   156  	// but in the following Next call we estimate the length as AVG x 1.125 elemLen of the previous call.
   157  	if old != nil && old.length != 0 {
   158  		estimatedElemLen = (len(old.data) + len(old.data)/8) / old.length
   159  	}
   160  	return &DeferredCauset{
   161  		offsets:    make([]int64, 1, cap+1),
   162  		data:       make([]byte, 0, cap*estimatedElemLen),
   163  		nullBitmap: make([]byte, 0, (cap+7)>>3),
   164  	}
   165  }
   166  
   167  // RequiredRows returns how many rows is considered full.
   168  func (c *Chunk) RequiredRows() int {
   169  	return c.requiredRows
   170  }
   171  
   172  // SetRequiredRows sets the number of required rows.
   173  func (c *Chunk) SetRequiredRows(requiredRows, maxChunkSize int) *Chunk {
   174  	if requiredRows <= 0 || requiredRows > maxChunkSize {
   175  		requiredRows = maxChunkSize
   176  	}
   177  	c.requiredRows = requiredRows
   178  	return c
   179  }
   180  
   181  // IsFull returns if this chunk is considered full.
   182  func (c *Chunk) IsFull() bool {
   183  	return c.NumRows() >= c.requiredRows
   184  }
   185  
   186  // Prune creates a new Chunk according to `c` and prunes the defCausumns
   187  // whose index is not in `usedDefCausIdxs`
   188  func (c *Chunk) Prune(usedDefCausIdxs []int) *Chunk {
   189  	chk := renewEmpty(c)
   190  	chk.defCausumns = make([]*DeferredCauset, len(usedDefCausIdxs))
   191  	for i, idx := range usedDefCausIdxs {
   192  		chk.defCausumns[i] = c.defCausumns[idx]
   193  	}
   194  	return chk
   195  }
   196  
   197  // MakeRef makes DeferredCauset in "dstDefCausIdx" reference to DeferredCauset in "srcDefCausIdx".
   198  func (c *Chunk) MakeRef(srcDefCausIdx, dstDefCausIdx int) {
   199  	c.defCausumns[dstDefCausIdx] = c.defCausumns[srcDefCausIdx]
   200  }
   201  
   202  // MakeRefTo copies defCausumns `src.defCausumns[srcDefCausIdx]` to `c.defCausumns[dstDefCausIdx]`.
   203  func (c *Chunk) MakeRefTo(dstDefCausIdx int, src *Chunk, srcDefCausIdx int) error {
   204  	if c.sel != nil || src.sel != nil {
   205  		return errors.New(msgErrSelNotNil)
   206  	}
   207  	c.defCausumns[dstDefCausIdx] = src.defCausumns[srcDefCausIdx]
   208  	return nil
   209  }
   210  
   211  // SwapDeferredCauset swaps DeferredCauset "c.defCausumns[defCausIdx]" with DeferredCauset
   212  // "other.defCausumns[otherIdx]". If there exists defCausumns refer to the DeferredCauset to be
   213  // swapped, we need to re-build the reference.
   214  func (c *Chunk) SwapDeferredCauset(defCausIdx int, other *Chunk, otherIdx int) error {
   215  	if c.sel != nil || other.sel != nil {
   216  		return errors.New(msgErrSelNotNil)
   217  	}
   218  	// Find the leftmost DeferredCauset of the reference which is the actual DeferredCauset to
   219  	// be swapped.
   220  	for i := 0; i < defCausIdx; i++ {
   221  		if c.defCausumns[i] == c.defCausumns[defCausIdx] {
   222  			defCausIdx = i
   223  		}
   224  	}
   225  	for i := 0; i < otherIdx; i++ {
   226  		if other.defCausumns[i] == other.defCausumns[otherIdx] {
   227  			otherIdx = i
   228  		}
   229  	}
   230  
   231  	// Find the defCausumns which refer to the actual DeferredCauset to be swapped.
   232  	refDefCaussIdx := make([]int, 0, len(c.defCausumns)-defCausIdx)
   233  	for i := defCausIdx; i < len(c.defCausumns); i++ {
   234  		if c.defCausumns[i] == c.defCausumns[defCausIdx] {
   235  			refDefCaussIdx = append(refDefCaussIdx, i)
   236  		}
   237  	}
   238  	refDefCaussIdx4Other := make([]int, 0, len(other.defCausumns)-otherIdx)
   239  	for i := otherIdx; i < len(other.defCausumns); i++ {
   240  		if other.defCausumns[i] == other.defCausumns[otherIdx] {
   241  			refDefCaussIdx4Other = append(refDefCaussIdx4Other, i)
   242  		}
   243  	}
   244  
   245  	// Swap defCausumns from two chunks.
   246  	c.defCausumns[defCausIdx], other.defCausumns[otherIdx] = other.defCausumns[otherIdx], c.defCausumns[defCausIdx]
   247  
   248  	// Rebuild the reference.
   249  	for _, i := range refDefCaussIdx {
   250  		c.MakeRef(defCausIdx, i)
   251  	}
   252  	for _, i := range refDefCaussIdx4Other {
   253  		other.MakeRef(otherIdx, i)
   254  	}
   255  	return nil
   256  }
   257  
   258  // SwapDeferredCausets swaps defCausumns with another Chunk.
   259  func (c *Chunk) SwapDeferredCausets(other *Chunk) {
   260  	c.sel, other.sel = other.sel, c.sel
   261  	c.defCausumns, other.defCausumns = other.defCausumns, c.defCausumns
   262  	c.numVirtualRows, other.numVirtualRows = other.numVirtualRows, c.numVirtualRows
   263  }
   264  
   265  // SetNumVirtualRows sets the virtual event number for a Chunk.
   266  // It should only be used when there exists no DeferredCauset in the Chunk.
   267  func (c *Chunk) SetNumVirtualRows(numVirtualRows int) {
   268  	c.numVirtualRows = numVirtualRows
   269  }
   270  
   271  // Reset resets the chunk, so the memory it allocated can be reused.
   272  // Make sure all the data in the chunk is not used anymore before you reuse this chunk.
   273  func (c *Chunk) Reset() {
   274  	c.sel = nil
   275  	if c.defCausumns == nil {
   276  		return
   277  	}
   278  	for _, defCaus := range c.defCausumns {
   279  		defCaus.reset()
   280  	}
   281  	c.numVirtualRows = 0
   282  }
   283  
   284  // CopyConstruct creates a new chunk and copies this chunk's data into it.
   285  func (c *Chunk) CopyConstruct() *Chunk {
   286  	newChk := renewEmpty(c)
   287  	newChk.defCausumns = make([]*DeferredCauset, len(c.defCausumns))
   288  	for i := range c.defCausumns {
   289  		newChk.defCausumns[i] = c.defCausumns[i].CopyConstruct(nil)
   290  	}
   291  	return newChk
   292  }
   293  
   294  // GrowAndReset resets the Chunk and doubles the capacity of the Chunk.
   295  // The doubled capacity should not be larger than maxChunkSize.
   296  // TODO: this method will be used in following PR.
   297  func (c *Chunk) GrowAndReset(maxChunkSize int) {
   298  	c.sel = nil
   299  	if c.defCausumns == nil {
   300  		return
   301  	}
   302  	newCap := reCalcCapacity(c, maxChunkSize)
   303  	if newCap <= c.capacity {
   304  		c.Reset()
   305  		return
   306  	}
   307  	c.capacity = newCap
   308  	c.defCausumns = renewDeferredCausets(c.defCausumns, newCap)
   309  	c.numVirtualRows = 0
   310  	c.requiredRows = maxChunkSize
   311  }
   312  
   313  // reCalcCapacity calculates the capacity for another Chunk based on the current
   314  // Chunk. The new capacity is doubled only when the current Chunk is full.
   315  func reCalcCapacity(c *Chunk, maxChunkSize int) int {
   316  	if c.NumRows() < c.capacity {
   317  		return c.capacity
   318  	}
   319  	return mathutil.Min(c.capacity*2, maxChunkSize)
   320  }
   321  
   322  // Capacity returns the capacity of the Chunk.
   323  func (c *Chunk) Capacity() int {
   324  	return c.capacity
   325  }
   326  
   327  // NumDefCauss returns the number of defCausumns in the chunk.
   328  func (c *Chunk) NumDefCauss() int {
   329  	return len(c.defCausumns)
   330  }
   331  
   332  // NumRows returns the number of rows in the chunk.
   333  func (c *Chunk) NumRows() int {
   334  	if c.sel != nil {
   335  		return len(c.sel)
   336  	}
   337  	if c.NumDefCauss() == 0 {
   338  		return c.numVirtualRows
   339  	}
   340  	return c.defCausumns[0].length
   341  }
   342  
   343  // GetRow gets the Row in the chunk with the event index.
   344  func (c *Chunk) GetRow(idx int) Row {
   345  	if c.sel != nil {
   346  		// mapping the logical RowIdx to the actual physical RowIdx;
   347  		// for example, if the Sel is [1, 5, 6], then
   348  		//	logical 0 -> physical 1,
   349  		//	logical 1 -> physical 5,
   350  		//	logical 2 -> physical 6.
   351  		// Then when we iterate this Chunk according to Row, only selected rows will be
   352  		// accessed while all filtered rows will be ignored.
   353  		return Row{c: c, idx: c.sel[idx]}
   354  	}
   355  	return Row{c: c, idx: idx}
   356  }
   357  
   358  // AppendRow appends a event to the chunk.
   359  func (c *Chunk) AppendRow(event Row) {
   360  	c.AppendPartialRow(0, event)
   361  	c.numVirtualRows++
   362  }
   363  
   364  // AppendPartialRow appends a event to the chunk.
   365  func (c *Chunk) AppendPartialRow(defCausOff int, event Row) {
   366  	c.appendSel(defCausOff)
   367  	for i, rowDefCaus := range event.c.defCausumns {
   368  		chkDefCaus := c.defCausumns[defCausOff+i]
   369  		appendCellByCell(chkDefCaus, rowDefCaus, event.idx)
   370  	}
   371  }
   372  
   373  // AppendRowByDefCausIdxs appends a event by its defCausIdxs to the chunk.
   374  // 1. every defCausumns are used if defCausIdxs is nil.
   375  // 2. no defCausumns are used if defCausIdxs is not nil but the size of defCausIdxs is 0.
   376  func (c *Chunk) AppendRowByDefCausIdxs(event Row, defCausIdxs []int) (wide int) {
   377  	wide = c.AppendPartialRowByDefCausIdxs(0, event, defCausIdxs)
   378  	c.numVirtualRows++
   379  	return
   380  }
   381  
   382  // AppendPartialRowByDefCausIdxs appends a event by its defCausIdxs to the chunk.
   383  // 1. every defCausumns are used if defCausIdxs is nil.
   384  // 2. no defCausumns are used if defCausIdxs is not nil but the size of defCausIdxs is 0.
   385  func (c *Chunk) AppendPartialRowByDefCausIdxs(defCausOff int, event Row, defCausIdxs []int) (wide int) {
   386  	if defCausIdxs == nil {
   387  		c.AppendPartialRow(defCausOff, event)
   388  		return event.Len()
   389  	}
   390  
   391  	c.appendSel(defCausOff)
   392  	for i, defCausIdx := range defCausIdxs {
   393  		rowDefCaus := event.c.defCausumns[defCausIdx]
   394  		chkDefCaus := c.defCausumns[defCausOff+i]
   395  		appendCellByCell(chkDefCaus, rowDefCaus, event.idx)
   396  	}
   397  	return len(defCausIdxs)
   398  }
   399  
   400  // appendCellByCell appends the cell with rowIdx of src into dst.
   401  func appendCellByCell(dst *DeferredCauset, src *DeferredCauset, rowIdx int) {
   402  	dst.appendNullBitmap(!src.IsNull(rowIdx))
   403  	if src.isFixed() {
   404  		elemLen := len(src.elemBuf)
   405  		offset := rowIdx * elemLen
   406  		dst.data = append(dst.data, src.data[offset:offset+elemLen]...)
   407  	} else {
   408  		start, end := src.offsets[rowIdx], src.offsets[rowIdx+1]
   409  		dst.data = append(dst.data, src.data[start:end]...)
   410  		dst.offsets = append(dst.offsets, int64(len(dst.data)))
   411  	}
   412  	dst.length++
   413  }
   414  
   415  // preAlloc pre-allocates the memory space in a Chunk to causetstore the Row.
   416  // NOTE: only used in test.
   417  // 1. The Chunk must be empty or holds no useful data.
   418  // 2. The schemaReplicant of the Row must be the same with the Chunk.
   419  // 3. This API is paired with the `Insert()` function, which inserts all the
   420  //    rows data into the Chunk after the pre-allocation.
   421  // 4. We set the null bitmap here instead of in the Insert() function because
   422  //    when the Insert() function is called parallelly, the data race on a byte
   423  //    can not be avoided although the manipulated bits are different inside a
   424  //    byte.
   425  func (c *Chunk) preAlloc(event Row) (rowIdx uint32) {
   426  	rowIdx = uint32(c.NumRows())
   427  	for i, srcDefCaus := range event.c.defCausumns {
   428  		dstDefCaus := c.defCausumns[i]
   429  		dstDefCaus.appendNullBitmap(!srcDefCaus.IsNull(event.idx))
   430  		elemLen := len(srcDefCaus.elemBuf)
   431  		if !srcDefCaus.isFixed() {
   432  			elemLen = int(srcDefCaus.offsets[event.idx+1] - srcDefCaus.offsets[event.idx])
   433  			dstDefCaus.offsets = append(dstDefCaus.offsets, int64(len(dstDefCaus.data)+elemLen))
   434  		}
   435  		dstDefCaus.length++
   436  		needCap := len(dstDefCaus.data) + elemLen
   437  		if needCap <= cap(dstDefCaus.data) {
   438  			(*reflect.SliceHeader)(unsafe.Pointer(&dstDefCaus.data)).Len = len(dstDefCaus.data) + elemLen
   439  			continue
   440  		}
   441  		// Grow the capacity according to golang.growslice.
   442  		// Implementation differences with golang:
   443  		// 1. We double the capacity when `dstDefCaus.data < 1024*elemLen bytes` but
   444  		// not `1024 bytes`.
   445  		// 2. We expand the capacity to 1.5*originCap rather than 1.25*originCap
   446  		// during the slow-increasing phase.
   447  		newCap := cap(dstDefCaus.data)
   448  		doubleCap := newCap << 1
   449  		if needCap > doubleCap {
   450  			newCap = needCap
   451  		} else {
   452  			avgElemLen := elemLen
   453  			if !srcDefCaus.isFixed() {
   454  				avgElemLen = len(dstDefCaus.data) / len(dstDefCaus.offsets)
   455  			}
   456  			// slowIncThreshold indicates the threshold exceeding which the
   457  			// dstDefCaus.data capacity increase fold decreases from 2 to 1.5.
   458  			slowIncThreshold := 1024 * avgElemLen
   459  			if len(dstDefCaus.data) < slowIncThreshold {
   460  				newCap = doubleCap
   461  			} else {
   462  				for 0 < newCap && newCap < needCap {
   463  					newCap += newCap / 2
   464  				}
   465  				if newCap <= 0 {
   466  					newCap = needCap
   467  				}
   468  			}
   469  		}
   470  		dstDefCaus.data = make([]byte, len(dstDefCaus.data)+elemLen, newCap)
   471  	}
   472  	return
   473  }
   474  
   475  // insert inserts `event` on the position specified by `rowIdx`.
   476  // NOTE: only used in test.
   477  // Note: Insert will cover the origin data, it should be called after
   478  // PreAlloc.
   479  func (c *Chunk) insert(rowIdx int, event Row) {
   480  	for i, srcDefCaus := range event.c.defCausumns {
   481  		if event.IsNull(i) {
   482  			continue
   483  		}
   484  		dstDefCaus := c.defCausumns[i]
   485  		var srcStart, srcEnd, destStart, destEnd int
   486  		if srcDefCaus.isFixed() {
   487  			srcElemLen, destElemLen := len(srcDefCaus.elemBuf), len(dstDefCaus.elemBuf)
   488  			srcStart, destStart = event.idx*srcElemLen, rowIdx*destElemLen
   489  			srcEnd, destEnd = srcStart+srcElemLen, destStart+destElemLen
   490  		} else {
   491  			srcStart, srcEnd = int(srcDefCaus.offsets[event.idx]), int(srcDefCaus.offsets[event.idx+1])
   492  			destStart, destEnd = int(dstDefCaus.offsets[rowIdx]), int(dstDefCaus.offsets[rowIdx+1])
   493  		}
   494  		copy(dstDefCaus.data[destStart:destEnd], srcDefCaus.data[srcStart:srcEnd])
   495  	}
   496  }
   497  
   498  // Append appends rows in [begin, end) in another Chunk to a Chunk.
   499  func (c *Chunk) Append(other *Chunk, begin, end int) {
   500  	for defCausID, src := range other.defCausumns {
   501  		dst := c.defCausumns[defCausID]
   502  		if src.isFixed() {
   503  			elemLen := len(src.elemBuf)
   504  			dst.data = append(dst.data, src.data[begin*elemLen:end*elemLen]...)
   505  		} else {
   506  			beginOffset, endOffset := src.offsets[begin], src.offsets[end]
   507  			dst.data = append(dst.data, src.data[beginOffset:endOffset]...)
   508  			for i := begin; i < end; i++ {
   509  				dst.offsets = append(dst.offsets, dst.offsets[len(dst.offsets)-1]+src.offsets[i+1]-src.offsets[i])
   510  			}
   511  		}
   512  		for i := begin; i < end; i++ {
   513  			c.appendSel(defCausID)
   514  			dst.appendNullBitmap(!src.IsNull(i))
   515  			dst.length++
   516  		}
   517  	}
   518  	c.numVirtualRows += end - begin
   519  }
   520  
   521  // TruncateTo truncates rows from tail to head in a Chunk to "numRows" rows.
   522  func (c *Chunk) TruncateTo(numRows int) {
   523  	c.Reconstruct()
   524  	for _, defCaus := range c.defCausumns {
   525  		if defCaus.isFixed() {
   526  			elemLen := len(defCaus.elemBuf)
   527  			defCaus.data = defCaus.data[:numRows*elemLen]
   528  		} else {
   529  			defCaus.data = defCaus.data[:defCaus.offsets[numRows]]
   530  			defCaus.offsets = defCaus.offsets[:numRows+1]
   531  		}
   532  		defCaus.length = numRows
   533  		bitmapLen := (defCaus.length + 7) / 8
   534  		defCaus.nullBitmap = defCaus.nullBitmap[:bitmapLen]
   535  		if defCaus.length%8 != 0 {
   536  			// When we append null, we simply increment the nullCount,
   537  			// so we need to clear the unused bits in the last bitmap byte.
   538  			lastByte := defCaus.nullBitmap[bitmapLen-1]
   539  			unusedBitsLen := 8 - uint(defCaus.length%8)
   540  			lastByte <<= unusedBitsLen
   541  			lastByte >>= unusedBitsLen
   542  			defCaus.nullBitmap[bitmapLen-1] = lastByte
   543  		}
   544  	}
   545  	c.numVirtualRows = numRows
   546  }
   547  
   548  // AppendNull appends a null value to the chunk.
   549  func (c *Chunk) AppendNull(defCausIdx int) {
   550  	c.appendSel(defCausIdx)
   551  	c.defCausumns[defCausIdx].AppendNull()
   552  }
   553  
   554  // AppendInt64 appends a int64 value to the chunk.
   555  func (c *Chunk) AppendInt64(defCausIdx int, i int64) {
   556  	c.appendSel(defCausIdx)
   557  	c.defCausumns[defCausIdx].AppendInt64(i)
   558  }
   559  
   560  // AppendUint64 appends a uint64 value to the chunk.
   561  func (c *Chunk) AppendUint64(defCausIdx int, u uint64) {
   562  	c.appendSel(defCausIdx)
   563  	c.defCausumns[defCausIdx].AppendUint64(u)
   564  }
   565  
   566  // AppendFloat32 appends a float32 value to the chunk.
   567  func (c *Chunk) AppendFloat32(defCausIdx int, f float32) {
   568  	c.appendSel(defCausIdx)
   569  	c.defCausumns[defCausIdx].AppendFloat32(f)
   570  }
   571  
   572  // AppendFloat64 appends a float64 value to the chunk.
   573  func (c *Chunk) AppendFloat64(defCausIdx int, f float64) {
   574  	c.appendSel(defCausIdx)
   575  	c.defCausumns[defCausIdx].AppendFloat64(f)
   576  }
   577  
   578  // AppendString appends a string value to the chunk.
   579  func (c *Chunk) AppendString(defCausIdx int, str string) {
   580  	c.appendSel(defCausIdx)
   581  	c.defCausumns[defCausIdx].AppendString(str)
   582  }
   583  
   584  // AppendBytes appends a bytes value to the chunk.
   585  func (c *Chunk) AppendBytes(defCausIdx int, b []byte) {
   586  	c.appendSel(defCausIdx)
   587  	c.defCausumns[defCausIdx].AppendBytes(b)
   588  }
   589  
   590  // AppendTime appends a Time value to the chunk.
   591  func (c *Chunk) AppendTime(defCausIdx int, t types.Time) {
   592  	c.appendSel(defCausIdx)
   593  	c.defCausumns[defCausIdx].AppendTime(t)
   594  }
   595  
   596  // AppendDuration appends a Duration value to the chunk.
   597  func (c *Chunk) AppendDuration(defCausIdx int, dur types.Duration) {
   598  	c.appendSel(defCausIdx)
   599  	c.defCausumns[defCausIdx].AppendDuration(dur)
   600  }
   601  
   602  // AppendMyDecimal appends a MyDecimal value to the chunk.
   603  func (c *Chunk) AppendMyDecimal(defCausIdx int, dec *types.MyDecimal) {
   604  	c.appendSel(defCausIdx)
   605  	c.defCausumns[defCausIdx].AppendMyDecimal(dec)
   606  }
   607  
   608  // AppendEnum appends an Enum value to the chunk.
   609  func (c *Chunk) AppendEnum(defCausIdx int, enum types.Enum) {
   610  	c.appendSel(defCausIdx)
   611  	c.defCausumns[defCausIdx].appendNameValue(enum.Name, enum.Value)
   612  }
   613  
   614  // AppendSet appends a Set value to the chunk.
   615  func (c *Chunk) AppendSet(defCausIdx int, set types.Set) {
   616  	c.appendSel(defCausIdx)
   617  	c.defCausumns[defCausIdx].appendNameValue(set.Name, set.Value)
   618  }
   619  
   620  // AppendJSON appends a JSON value to the chunk.
   621  func (c *Chunk) AppendJSON(defCausIdx int, j json.BinaryJSON) {
   622  	c.appendSel(defCausIdx)
   623  	c.defCausumns[defCausIdx].AppendJSON(j)
   624  }
   625  
   626  func (c *Chunk) appendSel(defCausIdx int) {
   627  	if defCausIdx == 0 && c.sel != nil { // use defCausumn 0 as standard
   628  		c.sel = append(c.sel, c.defCausumns[0].length)
   629  	}
   630  }
   631  
   632  // AppendCauset appends a causet into the chunk.
   633  func (c *Chunk) AppendCauset(defCausIdx int, d *types.Causet) {
   634  	switch d.HoTT() {
   635  	case types.HoTTNull:
   636  		c.AppendNull(defCausIdx)
   637  	case types.HoTTInt64:
   638  		c.AppendInt64(defCausIdx, d.GetInt64())
   639  	case types.HoTTUint64:
   640  		c.AppendUint64(defCausIdx, d.GetUint64())
   641  	case types.HoTTFloat32:
   642  		c.AppendFloat32(defCausIdx, d.GetFloat32())
   643  	case types.HoTTFloat64:
   644  		c.AppendFloat64(defCausIdx, d.GetFloat64())
   645  	case types.HoTTString, types.HoTTBytes, types.HoTTBinaryLiteral, types.HoTTRaw, types.HoTTMysqlBit:
   646  		c.AppendBytes(defCausIdx, d.GetBytes())
   647  	case types.HoTTMysqlDecimal:
   648  		c.AppendMyDecimal(defCausIdx, d.GetMysqlDecimal())
   649  	case types.HoTTMysqlDuration:
   650  		c.AppendDuration(defCausIdx, d.GetMysqlDuration())
   651  	case types.HoTTMysqlEnum:
   652  		c.AppendEnum(defCausIdx, d.GetMysqlEnum())
   653  	case types.HoTTMysqlSet:
   654  		c.AppendSet(defCausIdx, d.GetMysqlSet())
   655  	case types.HoTTMysqlTime:
   656  		c.AppendTime(defCausIdx, d.GetMysqlTime())
   657  	case types.HoTTMysqlJSON:
   658  		c.AppendJSON(defCausIdx, d.GetMysqlJSON())
   659  	}
   660  }
   661  
   662  // DeferredCauset returns the specific defCausumn.
   663  func (c *Chunk) DeferredCauset(defCausIdx int) *DeferredCauset {
   664  	return c.defCausumns[defCausIdx]
   665  }
   666  
   667  // SetDefCaus sets the defCausIdx DeferredCauset to defCaus and returns the old DeferredCauset.
   668  func (c *Chunk) SetDefCaus(defCausIdx int, defCaus *DeferredCauset) *DeferredCauset {
   669  	if defCaus == c.defCausumns[defCausIdx] {
   670  		return nil
   671  	}
   672  	old := c.defCausumns[defCausIdx]
   673  	c.defCausumns[defCausIdx] = defCaus
   674  	return old
   675  }
   676  
   677  // Sel returns Sel of this Chunk.
   678  func (c *Chunk) Sel() []int {
   679  	return c.sel
   680  }
   681  
   682  // SetSel sets a Sel for this Chunk.
   683  func (c *Chunk) SetSel(sel []int) {
   684  	c.sel = sel
   685  }
   686  
   687  // Reconstruct removes all filtered rows in this Chunk.
   688  func (c *Chunk) Reconstruct() {
   689  	if c.sel == nil {
   690  		return
   691  	}
   692  	for _, defCaus := range c.defCausumns {
   693  		defCaus.reconstruct(c.sel)
   694  	}
   695  	c.numVirtualRows = len(c.sel)
   696  	c.sel = nil
   697  }