github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/blockio/write_index.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package blockio
    16  
    17  import (
    18  	hll "github.com/axiomhq/hyperloglog"
    19  	"github.com/cespare/xxhash/v2"
    20  	"github.com/matrixorigin/matrixone/pkg/objectio"
    21  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    22  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/index"
    23  )
    24  
    25  type ObjectColumnMetasBuilder struct {
    26  	totalRow uint32
    27  	metas    []objectio.ColumnMeta
    28  	sks      []*hll.Sketch
    29  	zms      []index.ZM
    30  	pkData   []containers.Vector
    31  }
    32  
    33  func NewObjectColumnMetasBuilder(colIdx int) *ObjectColumnMetasBuilder {
    34  	metas := make([]objectio.ColumnMeta, colIdx)
    35  	for i := range metas {
    36  		metas[i] = objectio.BuildObjectColumnMeta()
    37  	}
    38  	return &ObjectColumnMetasBuilder{
    39  		metas:  metas,
    40  		sks:    make([]*hll.Sketch, colIdx),
    41  		zms:    make([]index.ZM, colIdx),
    42  		pkData: make([]containers.Vector, 0),
    43  	}
    44  }
    45  
    46  func (b *ObjectColumnMetasBuilder) AddRowCnt(rows int) {
    47  	b.totalRow += uint32(rows)
    48  }
    49  
    50  func (b *ObjectColumnMetasBuilder) AddPKData(data containers.Vector) {
    51  	b.pkData = append(b.pkData, data)
    52  }
    53  
    54  func (b *ObjectColumnMetasBuilder) InspectVector(idx int, vec containers.Vector, isPK bool) {
    55  	if vec.HasNull() {
    56  		cnt := b.metas[idx].NullCnt()
    57  		cnt += uint32(vec.NullCount())
    58  		b.metas[idx].SetNullCnt(cnt)
    59  	}
    60  
    61  	if b.zms[idx] == nil {
    62  		b.zms[idx] = index.NewZM(vec.GetType().Oid, vec.GetType().Scale)
    63  	}
    64  	if isPK {
    65  		return
    66  	}
    67  	if b.sks[idx] == nil {
    68  		b.sks[idx] = hll.New()
    69  	}
    70  	if vec.GetDownstreamVector().IsConstNull() {
    71  		return
    72  	}
    73  	containers.ForeachWindowBytes(vec.GetDownstreamVector(), 0, vec.Length(), func(v []byte, isNull bool, row int) (err error) {
    74  		if isNull {
    75  			return
    76  		}
    77  		b.sks[idx].InsertHash(xxhash.Sum64(v))
    78  		return
    79  	}, nil)
    80  }
    81  
    82  func (b *ObjectColumnMetasBuilder) UpdateZm(idx int, zm index.ZM) {
    83  	// When UpdateZm is called, it is all in memroy, GetMin and GetMax has no loss
    84  	// min and max can be nil if the input vector is null vector
    85  	if !zm.IsInited() {
    86  		return
    87  	}
    88  	index.UpdateZM(b.zms[idx], zm.GetMinBuf())
    89  	index.UpdateZM(b.zms[idx], zm.GetMaxBuf())
    90  	if zm.IsString() && zm.MaxTruncated() {
    91  		b.zms[idx].SetMaxTruncated()
    92  	}
    93  	b.zms[idx].SetSum(zm.GetSumBuf())
    94  }
    95  
    96  func (b *ObjectColumnMetasBuilder) GetPKData() []containers.Vector {
    97  	return b.pkData
    98  }
    99  
   100  func (b *ObjectColumnMetasBuilder) SetPKNdv(idx uint16, ndv uint32) {
   101  	b.metas[idx].SetNdv(ndv)
   102  }
   103  
   104  func (b *ObjectColumnMetasBuilder) GetTotalRow() uint32 {
   105  	return b.totalRow
   106  }
   107  
   108  func (b *ObjectColumnMetasBuilder) Build() (uint32, []objectio.ColumnMeta) {
   109  	for i := range b.metas {
   110  		if b.sks[i] != nil { // rowid or types.TS
   111  			b.metas[i].SetNdv(uint32(b.sks[i].Estimate()))
   112  		}
   113  		if b.zms[i] != nil {
   114  			zmbuf, _ := b.zms[i].Marshal()
   115  			b.metas[i].SetZoneMap(zmbuf)
   116  		}
   117  	}
   118  	ret := b.metas
   119  	b.metas = nil
   120  	b.sks = nil
   121  	b.zms = nil
   122  	return b.totalRow, ret
   123  }