github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/blockio/write_index.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package blockio 16 17 import ( 18 hll "github.com/axiomhq/hyperloglog" 19 "github.com/cespare/xxhash/v2" 20 "github.com/matrixorigin/matrixone/pkg/objectio" 21 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 22 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/index" 23 ) 24 25 type ObjectColumnMetasBuilder struct { 26 totalRow uint32 27 metas []objectio.ColumnMeta 28 sks []*hll.Sketch 29 zms []index.ZM 30 pkData []containers.Vector 31 } 32 33 func NewObjectColumnMetasBuilder(colIdx int) *ObjectColumnMetasBuilder { 34 metas := make([]objectio.ColumnMeta, colIdx) 35 for i := range metas { 36 metas[i] = objectio.BuildObjectColumnMeta() 37 } 38 return &ObjectColumnMetasBuilder{ 39 metas: metas, 40 sks: make([]*hll.Sketch, colIdx), 41 zms: make([]index.ZM, colIdx), 42 pkData: make([]containers.Vector, 0), 43 } 44 } 45 46 func (b *ObjectColumnMetasBuilder) AddRowCnt(rows int) { 47 b.totalRow += uint32(rows) 48 } 49 50 func (b *ObjectColumnMetasBuilder) AddPKData(data containers.Vector) { 51 b.pkData = append(b.pkData, data) 52 } 53 54 func (b *ObjectColumnMetasBuilder) InspectVector(idx int, vec containers.Vector, isPK bool) { 55 if vec.HasNull() { 56 cnt := b.metas[idx].NullCnt() 57 cnt += uint32(vec.NullCount()) 58 b.metas[idx].SetNullCnt(cnt) 59 } 60 61 if b.zms[idx] == nil { 62 b.zms[idx] = index.NewZM(vec.GetType().Oid, vec.GetType().Scale) 63 } 64 if isPK { 65 return 66 } 67 if b.sks[idx] == nil { 68 b.sks[idx] = hll.New() 69 } 70 if vec.GetDownstreamVector().IsConstNull() { 71 return 72 } 73 containers.ForeachWindowBytes(vec.GetDownstreamVector(), 0, vec.Length(), func(v []byte, isNull bool, row int) (err error) { 74 if isNull { 75 return 76 } 77 b.sks[idx].InsertHash(xxhash.Sum64(v)) 78 return 79 }, nil) 80 } 81 82 func (b *ObjectColumnMetasBuilder) UpdateZm(idx int, zm index.ZM) { 83 // When UpdateZm is called, it is all in memroy, GetMin and GetMax has no loss 84 // min and max can be nil if the input vector is null vector 85 if !zm.IsInited() { 86 return 87 } 88 index.UpdateZM(b.zms[idx], zm.GetMinBuf()) 89 index.UpdateZM(b.zms[idx], zm.GetMaxBuf()) 90 if zm.IsString() && zm.MaxTruncated() { 91 b.zms[idx].SetMaxTruncated() 92 } 93 b.zms[idx].SetSum(zm.GetSumBuf()) 94 } 95 96 func (b *ObjectColumnMetasBuilder) GetPKData() []containers.Vector { 97 return b.pkData 98 } 99 100 func (b *ObjectColumnMetasBuilder) SetPKNdv(idx uint16, ndv uint32) { 101 b.metas[idx].SetNdv(ndv) 102 } 103 104 func (b *ObjectColumnMetasBuilder) GetTotalRow() uint32 { 105 return b.totalRow 106 } 107 108 func (b *ObjectColumnMetasBuilder) Build() (uint32, []objectio.ColumnMeta) { 109 for i := range b.metas { 110 if b.sks[i] != nil { // rowid or types.TS 111 b.metas[i].SetNdv(uint32(b.sks[i].Estimate())) 112 } 113 if b.zms[i] != nil { 114 zmbuf, _ := b.zms[i].Marshal() 115 b.metas[i].SetZoneMap(zmbuf) 116 } 117 } 118 ret := b.metas 119 b.metas = nil 120 b.sks = nil 121 b.zms = nil 122 return b.totalRow, ret 123 }