github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/blockio/writer.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package blockio
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    23  	"github.com/matrixorigin/matrixone/pkg/container/types"
    24  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    25  	"github.com/matrixorigin/matrixone/pkg/logutil"
    26  	"github.com/matrixorigin/matrixone/pkg/objectio"
    27  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    28  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    29  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/index"
    30  )
    31  
    32  type BlockWriter struct {
    33  	writer         *objectio.ObjectWriter
    34  	objMetaBuilder *ObjectColumnMetasBuilder
    35  	isSetPK        bool
    36  	pk             uint16
    37  	sortKeyIdx     uint16
    38  	nameStr        string
    39  	name           objectio.ObjectName
    40  	objectStats    []objectio.ObjectStats
    41  }
    42  
    43  func NewBlockWriter(fs fileservice.FileService, name string) (*BlockWriter, error) {
    44  	writer, err := objectio.NewObjectWriterSpecial(objectio.WriterETL, name, fs)
    45  	if err != nil {
    46  		return nil, err
    47  	}
    48  	return &BlockWriter{
    49  		writer:     writer,
    50  		isSetPK:    false,
    51  		sortKeyIdx: math.MaxUint16,
    52  		nameStr:    name,
    53  	}, nil
    54  }
    55  
    56  // seqnums is the column's seqnums of the batch written by `WriteBatch`. `WriteBatchWithoutIndex` will ignore the seqnums
    57  func NewBlockWriterNew(fs fileservice.FileService, name objectio.ObjectName, schemaVer uint32, seqnums []uint16) (*BlockWriter, error) {
    58  	writer, err := objectio.NewObjectWriter(name, fs, schemaVer, seqnums)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  	return &BlockWriter{
    63  		writer:     writer,
    64  		isSetPK:    false,
    65  		sortKeyIdx: math.MaxUint16,
    66  		nameStr:    name.String(),
    67  		name:       name,
    68  	}, nil
    69  }
    70  
    71  func (w *BlockWriter) SetPrimaryKey(idx uint16) {
    72  	w.isSetPK = true
    73  	w.pk = idx
    74  	w.sortKeyIdx = idx
    75  }
    76  
    77  func (w *BlockWriter) SetSortKey(idx uint16) {
    78  	w.sortKeyIdx = idx
    79  }
    80  
    81  func (w *BlockWriter) SetAppendable() {
    82  	w.writer.SetAppendable()
    83  }
    84  
    85  func (w *BlockWriter) GetObjectStats() []objectio.ObjectStats {
    86  	return w.objectStats
    87  }
    88  
    89  // WriteBatch write a batch whose schema is decribed by seqnum in NewBlockWriterNew
    90  func (w *BlockWriter) WriteBatch(batch *batch.Batch) (objectio.BlockObject, error) {
    91  	block, err := w.writer.Write(batch)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  	if w.objMetaBuilder == nil {
    96  		w.objMetaBuilder = NewObjectColumnMetasBuilder(len(batch.Vecs))
    97  	}
    98  	seqnums := w.writer.GetSeqnums()
    99  	if w.sortKeyIdx != math.MaxUint16 {
   100  		w.writer.SetSortKeySeqnum(seqnums[w.sortKeyIdx])
   101  	}
   102  	for i, vec := range batch.Vecs {
   103  		isPK := false
   104  		if i == 0 {
   105  			w.objMetaBuilder.AddRowCnt(vec.Length())
   106  		}
   107  		if vec.GetType().Oid == types.T_Rowid || vec.GetType().Oid == types.T_TS {
   108  			continue
   109  		}
   110  		if w.isSetPK && w.pk == uint16(i) {
   111  			isPK = true
   112  		}
   113  		columnData := containers.ToTNVector(vec, common.DefaultAllocator)
   114  		// update null count and distinct value
   115  		w.objMetaBuilder.InspectVector(i, columnData, isPK)
   116  
   117  		// Build ZM
   118  		zm := index.NewZM(vec.GetType().Oid, vec.GetType().Scale)
   119  		if err = index.BatchUpdateZM(zm, columnData.GetDownstreamVector()); err != nil {
   120  			return nil, err
   121  		}
   122  		index.SetZMSum(zm, columnData.GetDownstreamVector())
   123  		// Update column meta zonemap
   124  		w.writer.UpdateBlockZM(objectio.SchemaData, int(block.GetID()), seqnums[i], zm)
   125  		// update object zonemap
   126  		w.objMetaBuilder.UpdateZm(i, zm)
   127  
   128  		if !w.isSetPK || w.pk != uint16(i) {
   129  			continue
   130  		}
   131  		w.objMetaBuilder.AddPKData(columnData)
   132  		bf, err := index.NewBinaryFuseFilter(columnData)
   133  		if err != nil {
   134  			return nil, err
   135  		}
   136  		buf, err := bf.Marshal()
   137  		if err != nil {
   138  			return nil, err
   139  		}
   140  
   141  		if err = w.writer.WriteBF(int(block.GetID()), seqnums[i], buf); err != nil {
   142  			return nil, err
   143  		}
   144  	}
   145  	return block, nil
   146  }
   147  
   148  func (w *BlockWriter) WriteTombstoneBatch(batch *batch.Batch) (objectio.BlockObject, error) {
   149  	block, err := w.writer.WriteTombstone(batch)
   150  	if err != nil {
   151  		return nil, err
   152  	}
   153  	for i, vec := range batch.Vecs {
   154  		columnData := containers.ToTNVector(vec, common.DefaultAllocator)
   155  		// Build ZM
   156  		zm := index.NewZM(vec.GetType().Oid, vec.GetType().Scale)
   157  		if err = index.BatchUpdateZM(zm, columnData.GetDownstreamVector()); err != nil {
   158  			return nil, err
   159  		}
   160  		index.SetZMSum(zm, columnData.GetDownstreamVector())
   161  		// Update column meta zonemap
   162  		w.writer.UpdateBlockZM(objectio.SchemaTombstone, 0, uint16(i), zm)
   163  	}
   164  	return block, nil
   165  }
   166  
   167  func (w *BlockWriter) WriteSubBatch(batch *batch.Batch, dataType objectio.DataMetaType) (objectio.BlockObject, int, error) {
   168  	return w.writer.WriteSubBlock(batch, dataType)
   169  }
   170  
   171  func (w *BlockWriter) Sync(ctx context.Context) ([]objectio.BlockObject, objectio.Extent, error) {
   172  	if w.objMetaBuilder != nil {
   173  		if w.isSetPK {
   174  			w.objMetaBuilder.SetPKNdv(w.pk, w.objMetaBuilder.GetTotalRow())
   175  		}
   176  		cnt, meta := w.objMetaBuilder.Build()
   177  		w.writer.WriteObjectMeta(ctx, cnt, meta)
   178  	}
   179  	blocks, err := w.writer.WriteEnd(ctx)
   180  	if len(blocks) == 0 {
   181  		logutil.Debug("[WriteEnd]", common.OperationField(w.nameStr),
   182  			common.OperandField("[Size=0]"), common.OperandField(w.writer.GetSeqnums()))
   183  		return blocks, objectio.Extent{}, err
   184  	}
   185  
   186  	w.objectStats = w.writer.GetObjectStats()
   187  
   188  	logutil.Debug("[WriteEnd]",
   189  		common.OperationField(w.String(blocks)),
   190  		common.OperandField(w.writer.GetSeqnums()),
   191  		common.OperandField(w.writer.GetMaxSeqnum()))
   192  	return blocks, blocks[0].BlockHeader().MetaLocation(), err
   193  }
   194  func (w *BlockWriter) Stats() objectio.ObjectStats {
   195  	return w.writer.GetDataStats()
   196  }
   197  func (w *BlockWriter) GetName() objectio.ObjectName {
   198  	return w.name
   199  }
   200  
   201  func (w *BlockWriter) String(
   202  	blocks []objectio.BlockObject) string {
   203  	size, err := GetObjectSizeWithBlocks(blocks)
   204  	if err != nil {
   205  		return fmt.Sprintf("name: %s, err: %s", w.nameStr, err.Error())
   206  	}
   207  	return fmt.Sprintf("name: %s, block count: %d, size: %d",
   208  		w.nameStr,
   209  		len(blocks),
   210  		size,
   211  	)
   212  }