github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/blockio/writer.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package blockio 16 17 import ( 18 "context" 19 "fmt" 20 "math" 21 22 "github.com/matrixorigin/matrixone/pkg/container/batch" 23 "github.com/matrixorigin/matrixone/pkg/container/types" 24 "github.com/matrixorigin/matrixone/pkg/fileservice" 25 "github.com/matrixorigin/matrixone/pkg/logutil" 26 "github.com/matrixorigin/matrixone/pkg/objectio" 27 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 28 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 29 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/index" 30 ) 31 32 type BlockWriter struct { 33 writer *objectio.ObjectWriter 34 objMetaBuilder *ObjectColumnMetasBuilder 35 isSetPK bool 36 pk uint16 37 sortKeyIdx uint16 38 nameStr string 39 name objectio.ObjectName 40 objectStats []objectio.ObjectStats 41 } 42 43 func NewBlockWriter(fs fileservice.FileService, name string) (*BlockWriter, error) { 44 writer, err := objectio.NewObjectWriterSpecial(objectio.WriterETL, name, fs) 45 if err != nil { 46 return nil, err 47 } 48 return &BlockWriter{ 49 writer: writer, 50 isSetPK: false, 51 sortKeyIdx: math.MaxUint16, 52 nameStr: name, 53 }, nil 54 } 55 56 // seqnums is the column's seqnums of the batch written by `WriteBatch`. `WriteBatchWithoutIndex` will ignore the seqnums 57 func NewBlockWriterNew(fs fileservice.FileService, name objectio.ObjectName, schemaVer uint32, seqnums []uint16) (*BlockWriter, error) { 58 writer, err := objectio.NewObjectWriter(name, fs, schemaVer, seqnums) 59 if err != nil { 60 return nil, err 61 } 62 return &BlockWriter{ 63 writer: writer, 64 isSetPK: false, 65 sortKeyIdx: math.MaxUint16, 66 nameStr: name.String(), 67 name: name, 68 }, nil 69 } 70 71 func (w *BlockWriter) SetPrimaryKey(idx uint16) { 72 w.isSetPK = true 73 w.pk = idx 74 w.sortKeyIdx = idx 75 } 76 77 func (w *BlockWriter) SetSortKey(idx uint16) { 78 w.sortKeyIdx = idx 79 } 80 81 func (w *BlockWriter) SetAppendable() { 82 w.writer.SetAppendable() 83 } 84 85 func (w *BlockWriter) GetObjectStats() []objectio.ObjectStats { 86 return w.objectStats 87 } 88 89 // WriteBatch write a batch whose schema is decribed by seqnum in NewBlockWriterNew 90 func (w *BlockWriter) WriteBatch(batch *batch.Batch) (objectio.BlockObject, error) { 91 block, err := w.writer.Write(batch) 92 if err != nil { 93 return nil, err 94 } 95 if w.objMetaBuilder == nil { 96 w.objMetaBuilder = NewObjectColumnMetasBuilder(len(batch.Vecs)) 97 } 98 seqnums := w.writer.GetSeqnums() 99 if w.sortKeyIdx != math.MaxUint16 { 100 w.writer.SetSortKeySeqnum(seqnums[w.sortKeyIdx]) 101 } 102 for i, vec := range batch.Vecs { 103 isPK := false 104 if i == 0 { 105 w.objMetaBuilder.AddRowCnt(vec.Length()) 106 } 107 if vec.GetType().Oid == types.T_Rowid || vec.GetType().Oid == types.T_TS { 108 continue 109 } 110 if w.isSetPK && w.pk == uint16(i) { 111 isPK = true 112 } 113 columnData := containers.ToTNVector(vec, common.DefaultAllocator) 114 // update null count and distinct value 115 w.objMetaBuilder.InspectVector(i, columnData, isPK) 116 117 // Build ZM 118 zm := index.NewZM(vec.GetType().Oid, vec.GetType().Scale) 119 if err = index.BatchUpdateZM(zm, columnData.GetDownstreamVector()); err != nil { 120 return nil, err 121 } 122 index.SetZMSum(zm, columnData.GetDownstreamVector()) 123 // Update column meta zonemap 124 w.writer.UpdateBlockZM(objectio.SchemaData, int(block.GetID()), seqnums[i], zm) 125 // update object zonemap 126 w.objMetaBuilder.UpdateZm(i, zm) 127 128 if !w.isSetPK || w.pk != uint16(i) { 129 continue 130 } 131 w.objMetaBuilder.AddPKData(columnData) 132 bf, err := index.NewBinaryFuseFilter(columnData) 133 if err != nil { 134 return nil, err 135 } 136 buf, err := bf.Marshal() 137 if err != nil { 138 return nil, err 139 } 140 141 if err = w.writer.WriteBF(int(block.GetID()), seqnums[i], buf); err != nil { 142 return nil, err 143 } 144 } 145 return block, nil 146 } 147 148 func (w *BlockWriter) WriteTombstoneBatch(batch *batch.Batch) (objectio.BlockObject, error) { 149 block, err := w.writer.WriteTombstone(batch) 150 if err != nil { 151 return nil, err 152 } 153 for i, vec := range batch.Vecs { 154 columnData := containers.ToTNVector(vec, common.DefaultAllocator) 155 // Build ZM 156 zm := index.NewZM(vec.GetType().Oid, vec.GetType().Scale) 157 if err = index.BatchUpdateZM(zm, columnData.GetDownstreamVector()); err != nil { 158 return nil, err 159 } 160 index.SetZMSum(zm, columnData.GetDownstreamVector()) 161 // Update column meta zonemap 162 w.writer.UpdateBlockZM(objectio.SchemaTombstone, 0, uint16(i), zm) 163 } 164 return block, nil 165 } 166 167 func (w *BlockWriter) WriteSubBatch(batch *batch.Batch, dataType objectio.DataMetaType) (objectio.BlockObject, int, error) { 168 return w.writer.WriteSubBlock(batch, dataType) 169 } 170 171 func (w *BlockWriter) Sync(ctx context.Context) ([]objectio.BlockObject, objectio.Extent, error) { 172 if w.objMetaBuilder != nil { 173 if w.isSetPK { 174 w.objMetaBuilder.SetPKNdv(w.pk, w.objMetaBuilder.GetTotalRow()) 175 } 176 cnt, meta := w.objMetaBuilder.Build() 177 w.writer.WriteObjectMeta(ctx, cnt, meta) 178 } 179 blocks, err := w.writer.WriteEnd(ctx) 180 if len(blocks) == 0 { 181 logutil.Debug("[WriteEnd]", common.OperationField(w.nameStr), 182 common.OperandField("[Size=0]"), common.OperandField(w.writer.GetSeqnums())) 183 return blocks, objectio.Extent{}, err 184 } 185 186 w.objectStats = w.writer.GetObjectStats() 187 188 logutil.Debug("[WriteEnd]", 189 common.OperationField(w.String(blocks)), 190 common.OperandField(w.writer.GetSeqnums()), 191 common.OperandField(w.writer.GetMaxSeqnum())) 192 return blocks, blocks[0].BlockHeader().MetaLocation(), err 193 } 194 func (w *BlockWriter) Stats() objectio.ObjectStats { 195 return w.writer.GetDataStats() 196 } 197 func (w *BlockWriter) GetName() objectio.ObjectName { 198 return w.name 199 } 200 201 func (w *BlockWriter) String( 202 blocks []objectio.BlockObject) string { 203 size, err := GetObjectSizeWithBlocks(blocks) 204 if err != nil { 205 return fmt.Sprintf("name: %s, err: %s", w.nameStr, err.Error()) 206 } 207 return fmt.Sprintf("name: %s, block count: %d, size: %d", 208 w.nameStr, 209 len(blocks), 210 size, 211 ) 212 }