github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/agg/approxcd.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package agg
    16  
    17  import (
    18  	hll "github.com/axiomhq/hyperloglog"
    19  	"github.com/matrixorigin/matrixone/pkg/container/types"
    20  )
    21  
    22  type ApproxCountDistic[T any] struct {
    23  	Sk []*hll.Sketch
    24  }
    25  
    26  func ApproxCountReturnType(_ []types.Type) types.Type {
    27  	return types.New(types.T_uint64, 0, 0, 0)
    28  }
    29  
    30  func NewApproxc[T any]() *ApproxCountDistic[T] {
    31  	return &ApproxCountDistic[T]{}
    32  }
    33  
    34  func (a *ApproxCountDistic[T]) Grows(n int) {
    35  	if len(a.Sk) == 0 {
    36  		a.Sk = make([]*hll.Sketch, 0)
    37  	}
    38  
    39  	for i := 0; i < n; i++ {
    40  		a.Sk = append(a.Sk, hll.New())
    41  	}
    42  }
    43  
    44  func (a *ApproxCountDistic[T]) Eval(vs []uint64) []uint64 {
    45  	for i := range vs {
    46  		vs[i] = a.Sk[i].Estimate()
    47  	}
    48  
    49  	return vs
    50  }
    51  
    52  func (a *ApproxCountDistic[T]) Fill(n int64, v1 T, v2 uint64, _ int64, isEmpty bool, isNull bool) (uint64, bool) {
    53  	if !isNull {
    54  		data := getTheBytes(v1)
    55  		a.Sk[n].Insert(data)
    56  		isEmpty = false
    57  	}
    58  	return v2, isEmpty
    59  }
    60  
    61  func (a *ApproxCountDistic[T]) Merge(xIndex int64, yIndex int64, x uint64, _ uint64, xEmpty bool, yEmpty bool, yApxc any) (uint64, bool) {
    62  	ret := true
    63  	if !yEmpty {
    64  		ya := yApxc.(*ApproxCountDistic[T])
    65  		if !xEmpty {
    66  			if err := a.Sk[xIndex].Merge(ya.Sk[yIndex]); err != nil {
    67  				panic(err)
    68  			}
    69  		} else {
    70  			a.Sk[xIndex] = ya.Sk[yIndex].Clone()
    71  		}
    72  		ret = false
    73  	}
    74  	return x, ret
    75  }
    76  
    77  func getTheBytes(value any) []byte {
    78  	var data []byte
    79  	switch v := value.(type) {
    80  	case uint8:
    81  		data = append(data, types.EncodeFixed(v)...)
    82  	case uint16:
    83  		data = append(data, types.EncodeFixed(v)...)
    84  	case uint32:
    85  		data = append(data, types.EncodeFixed(v)...)
    86  	case uint64:
    87  		data = append(data, types.EncodeFixed(v)...)
    88  	case int8:
    89  		data = append(data, types.EncodeFixed(v)...)
    90  	case int16:
    91  		data = append(data, types.EncodeFixed(v)...)
    92  	case int32:
    93  		data = append(data, types.EncodeFixed(v)...)
    94  	case int64:
    95  		data = append(data, types.EncodeFixed(v)...)
    96  	case float32:
    97  		data = append(data, types.EncodeFixed(v)...)
    98  	case float64:
    99  		data = append(data, types.EncodeFixed(v)...)
   100  	case []byte:
   101  		data = append(data, v...)
   102  	case types.Decimal64:
   103  		data = append(data, types.EncodeFixed(v)...)
   104  	case types.Decimal128:
   105  		data = append(data, types.EncodeFixed(v)...)
   106  	default:
   107  		panic("not support for type")
   108  	}
   109  	return data
   110  }
   111  
   112  func (a *ApproxCountDistic[T]) MarshalBinary() ([]byte, error) {
   113  	return types.Encode(&a.Sk)
   114  }
   115  
   116  func (a *ApproxCountDistic[T]) UnmarshalBinary(data []byte) error {
   117  	// avoid resulting errors caused by morpc overusing memory
   118  	copyData := make([]byte, len(data))
   119  	copy(copyData, data)
   120  	return types.Decode(copyData, &a.Sk)
   121  }