github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/deletion/deletion.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package deletion
    16  
    17  import (
    18  	"bytes"
    19  	"sync/atomic"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/catalog"
    22  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    23  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    24  	"github.com/matrixorigin/matrixone/pkg/container/types"
    25  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    26  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    27  	"github.com/matrixorigin/matrixone/pkg/vm"
    28  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/options"
    29  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    30  )
    31  
    32  //row id be divided into four types:
    33  // 1. RawBatchOffset : belong to txn's workspace
    34  // 2. CNBlockOffset  : belong to txn's workspace
    35  
    36  // 3. RawRowIdBatch  : belong to txn's snapshot data.
    37  // 4. FlushDeltaLoc   : belong to txn's snapshot data, which on S3 and pointed by delta location.
    38  const (
    39  	RawRowIdBatch = iota
    40  	// remember that, for one block,
    41  	// when it sends the info to mergedeletes,
    42  	// either it's Compaction or not.
    43  	Compaction
    44  	CNBlockOffset
    45  	RawBatchOffset
    46  	FlushDeltaLoc
    47  )
    48  
    49  const argName = "deletion"
    50  
    51  func (arg *Argument) String(buf *bytes.Buffer) {
    52  	buf.WriteString(argName)
    53  	buf.WriteString(": delete rows")
    54  }
    55  
    56  func (arg *Argument) Prepare(_ *process.Process) error {
    57  	ap := arg
    58  	if ap.RemoteDelete {
    59  		ap.ctr = new(container)
    60  		ap.ctr.state = vm.Build
    61  		ap.ctr.blockId_type = make(map[types.Blockid]int8)
    62  		ap.ctr.blockId_bitmap = make(map[types.Blockid]*nulls.Nulls)
    63  		ap.ctr.pool = &BatchPool{pools: make([]*batch.Batch, 0, options.DefaultBlocksPerObject)}
    64  		ap.ctr.partitionId_blockId_rowIdBatch = make(map[int]map[types.Blockid]*batch.Batch)
    65  		ap.ctr.partitionId_blockId_deltaLoc = make(map[int]map[types.Blockid]*batch.Batch)
    66  	}
    67  	return nil
    68  }
    69  
    70  // the bool return value means whether it completed its work or not
    71  func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) {
    72  	if err, isCancel := vm.CancelCheck(proc); isCancel {
    73  		return vm.CancelResult, err
    74  	}
    75  
    76  	if arg.RemoteDelete {
    77  		return arg.remoteDelete(proc)
    78  	}
    79  	return arg.normalDelete(proc)
    80  }
    81  
    82  func (arg *Argument) remoteDelete(proc *process.Process) (vm.CallResult, error) {
    83  	var err error
    84  
    85  	anal := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor())
    86  	anal.Start()
    87  	defer func() {
    88  		anal.Stop()
    89  	}()
    90  
    91  	if arg.ctr.state == vm.Build {
    92  		for {
    93  			result, err := vm.ChildrenCall(arg.GetChildren(0), proc, anal)
    94  
    95  			if err != nil {
    96  				return result, err
    97  			}
    98  			if result.Batch == nil {
    99  				arg.ctr.state = vm.Eval
   100  				break
   101  			}
   102  			if result.Batch.IsEmpty() {
   103  				continue
   104  			}
   105  
   106  			if err = arg.SplitBatch(proc, result.Batch); err != nil {
   107  				return result, err
   108  			}
   109  		}
   110  	}
   111  
   112  	result := vm.NewCallResult()
   113  	if arg.ctr.state == vm.Eval {
   114  		// ToDo: CNBlock Compaction
   115  		// blkId,delta_metaLoc,type
   116  		if arg.resBat != nil {
   117  			proc.PutBatch(arg.resBat)
   118  			arg.resBat = nil
   119  		}
   120  		arg.resBat = batch.NewWithSize(5)
   121  		arg.resBat.Attrs = []string{
   122  			catalog.BlockMeta_Delete_ID,
   123  			catalog.BlockMeta_DeltaLoc,
   124  			catalog.BlockMeta_Type,
   125  			catalog.BlockMeta_Partition,
   126  			catalog.BlockMeta_Deletes_Length,
   127  		}
   128  		arg.resBat.SetVector(0, proc.GetVector(types.T_text.ToType()))
   129  		arg.resBat.SetVector(1, proc.GetVector(types.T_text.ToType()))
   130  		arg.resBat.SetVector(2, proc.GetVector(types.T_int8.ToType()))
   131  		arg.resBat.SetVector(3, proc.GetVector(types.T_int32.ToType()))
   132  
   133  		for pidx, blockidRowidbatch := range arg.ctr.partitionId_blockId_rowIdBatch {
   134  			for blkid, bat := range blockidRowidbatch {
   135  				if err = vector.AppendBytes(arg.resBat.GetVector(0), blkid[:], false, proc.GetMPool()); err != nil {
   136  					return result, err
   137  				}
   138  				bat.SetRowCount(bat.GetVector(0).Length())
   139  				byts, err1 := bat.MarshalBinary()
   140  				if err1 != nil {
   141  					result.Status = vm.ExecStop
   142  					return result, err1
   143  				}
   144  				if err = vector.AppendBytes(arg.resBat.GetVector(1), byts, false, proc.GetMPool()); err != nil {
   145  					return result, err
   146  				}
   147  				if err = vector.AppendFixed(arg.resBat.GetVector(2), arg.ctr.blockId_type[blkid], false, proc.GetMPool()); err != nil {
   148  					return result, err
   149  				}
   150  				if err = vector.AppendFixed(arg.resBat.GetVector(3), int32(pidx), false, proc.GetMPool()); err != nil {
   151  					return result, err
   152  				}
   153  			}
   154  		}
   155  
   156  		for pidx, blockidDeltaloc := range arg.ctr.partitionId_blockId_deltaLoc {
   157  			for blkid, bat := range blockidDeltaloc {
   158  				if err = vector.AppendBytes(arg.resBat.GetVector(0), blkid[:], false, proc.GetMPool()); err != nil {
   159  					return result, err
   160  				}
   161  				//bat.Attrs = {catalog.BlockMeta_DeltaLoc}
   162  				bat.SetRowCount(bat.GetVector(0).Length())
   163  				byts, err1 := bat.MarshalBinary()
   164  				if err1 != nil {
   165  					result.Status = vm.ExecStop
   166  					return result, err1
   167  				}
   168  				if err = vector.AppendBytes(arg.resBat.GetVector(1), byts, false, proc.GetMPool()); err != nil {
   169  					return result, err
   170  				}
   171  				if err = vector.AppendFixed(arg.resBat.GetVector(2), int8(FlushDeltaLoc), false, proc.GetMPool()); err != nil {
   172  					return result, err
   173  				}
   174  				if err = vector.AppendFixed(arg.resBat.GetVector(3), int32(pidx), false, proc.GetMPool()); err != nil {
   175  					return result, err
   176  				}
   177  			}
   178  		}
   179  
   180  		arg.resBat.SetRowCount(arg.resBat.Vecs[0].Length())
   181  		arg.resBat.Vecs[4], err = vector.NewConstFixed(types.T_uint32.ToType(), arg.ctr.deleted_length, arg.resBat.RowCount(), proc.GetMPool())
   182  		if err != nil {
   183  			result.Status = vm.ExecStop
   184  			return result, err
   185  		}
   186  		result.Batch = arg.resBat
   187  		arg.ctr.state = vm.End
   188  		return result, nil
   189  	}
   190  
   191  	if arg.ctr.state == vm.End {
   192  		return result, nil
   193  	}
   194  
   195  	panic("bug")
   196  
   197  }
   198  
   199  func (arg *Argument) normalDelete(proc *process.Process) (vm.CallResult, error) {
   200  	result, err := arg.GetChildren(0).Call(proc)
   201  	if err != nil {
   202  		return result, err
   203  	}
   204  	if result.Batch == nil || result.Batch.IsEmpty() {
   205  		return result, nil
   206  	}
   207  
   208  	anal := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor())
   209  	anal.Start()
   210  	defer anal.Stop()
   211  
   212  	bat := result.Batch
   213  
   214  	var affectedRows uint64
   215  	delCtx := arg.DeleteCtx
   216  
   217  	if len(delCtx.PartitionTableIDs) > 0 {
   218  		delBatches, err := colexec.GroupByPartitionForDelete(proc, bat, delCtx.RowIdIdx, delCtx.PartitionIndexInBatch,
   219  			len(delCtx.PartitionTableIDs), delCtx.PrimaryKeyIdx)
   220  		if err != nil {
   221  			return result, err
   222  		}
   223  
   224  		for i, delBatch := range delBatches {
   225  			tempRows := uint64(delBatch.RowCount())
   226  			if tempRows > 0 {
   227  				affectedRows += tempRows
   228  				err = delCtx.PartitionSources[i].Delete(proc.Ctx, delBatch, catalog.Row_ID)
   229  				if err != nil {
   230  					delBatch.Clean(proc.Mp())
   231  					return result, err
   232  				}
   233  				proc.PutBatch(delBatch)
   234  			}
   235  		}
   236  	} else {
   237  		delBatch, err := colexec.FilterRowIdForDel(proc, bat, delCtx.RowIdIdx,
   238  			delCtx.PrimaryKeyIdx)
   239  		if err != nil {
   240  			return result, err
   241  		}
   242  		affectedRows = uint64(delBatch.RowCount())
   243  		if affectedRows > 0 {
   244  			err = delCtx.Source.Delete(proc.Ctx, delBatch, catalog.Row_ID)
   245  			if err != nil {
   246  				delBatch.Clean(proc.GetMPool())
   247  				return result, err
   248  			}
   249  		}
   250  		proc.PutBatch(delBatch)
   251  	}
   252  	// result.Batch = batch.EmptyBatch
   253  
   254  	if delCtx.AddAffectedRows {
   255  		atomic.AddUint64(&arg.affectedRows, affectedRows)
   256  	}
   257  	return result, nil
   258  }