github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/approx_count.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aggexec
    16  
    17  import (
    18  	hll "github.com/axiomhq/hyperloglog"
    19  	"github.com/matrixorigin/matrixone/pkg/container/types"
    20  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    21  )
    22  
    23  // approx_count() returns the approximate number of count(distinct) values in a group.
    24  type approxCountFixedExec[T types.FixedSizeTExceptStrType] struct {
    25  	singleAggInfo
    26  	singleAggExecExtraInformation
    27  	arg sFixedArg[T]
    28  	ret aggFuncResult[uint64]
    29  
    30  	groups []*hll.Sketch
    31  }
    32  
    33  type approxCountVarExec struct {
    34  	singleAggInfo
    35  	singleAggExecExtraInformation
    36  	arg sBytesArg
    37  	ret aggFuncResult[uint64]
    38  
    39  	groups []*hll.Sketch
    40  }
    41  
    42  func newApproxCountFixedExec[T types.FixedSizeTExceptStrType](mg AggMemoryManager, info singleAggInfo) AggFuncExec {
    43  	return &approxCountFixedExec[T]{
    44  		singleAggInfo: info,
    45  		ret:           initFixedAggFuncResult[uint64](mg, info.retType, false),
    46  	}
    47  }
    48  
    49  func makeApproxCount(mg AggMemoryManager, id int64, arg types.Type) AggFuncExec {
    50  	info := singleAggInfo{
    51  		aggID:     id,
    52  		distinct:  false,
    53  		argType:   arg,
    54  		retType:   types.T_uint64.ToType(),
    55  		emptyNull: false,
    56  	}
    57  
    58  	if info.argType.IsVarlen() {
    59  		return &approxCountVarExec{
    60  			singleAggInfo: info,
    61  			ret:           initFixedAggFuncResult[uint64](mg, info.retType, false),
    62  		}
    63  	}
    64  
    65  	switch info.argType.Oid {
    66  	case types.T_bool:
    67  		return newApproxCountFixedExec[bool](mg, info)
    68  	case types.T_bit, types.T_uint64:
    69  		return newApproxCountFixedExec[uint64](mg, info)
    70  	case types.T_int8:
    71  		return newApproxCountFixedExec[int8](mg, info)
    72  	case types.T_int16:
    73  		return newApproxCountFixedExec[int16](mg, info)
    74  	case types.T_int32:
    75  		return newApproxCountFixedExec[int32](mg, info)
    76  	case types.T_int64:
    77  		return newApproxCountFixedExec[int64](mg, info)
    78  	case types.T_uint8:
    79  		return newApproxCountFixedExec[uint8](mg, info)
    80  	case types.T_uint16:
    81  		return newApproxCountFixedExec[uint16](mg, info)
    82  	case types.T_uint32:
    83  		return newApproxCountFixedExec[uint32](mg, info)
    84  	case types.T_float32:
    85  		return newApproxCountFixedExec[float32](mg, info)
    86  	case types.T_float64:
    87  		return newApproxCountFixedExec[float64](mg, info)
    88  	case types.T_decimal64:
    89  		return newApproxCountFixedExec[types.Decimal64](mg, info)
    90  	case types.T_decimal128:
    91  		return newApproxCountFixedExec[types.Decimal128](mg, info)
    92  	case types.T_date:
    93  		return newApproxCountFixedExec[types.Date](mg, info)
    94  	case types.T_datetime:
    95  		return newApproxCountFixedExec[types.Datetime](mg, info)
    96  	case types.T_timestamp:
    97  		return newApproxCountFixedExec[types.Timestamp](mg, info)
    98  	case types.T_time:
    99  		return newApproxCountFixedExec[types.Time](mg, info)
   100  	case types.T_enum:
   101  		return newApproxCountFixedExec[types.Enum](mg, info)
   102  	case types.T_uuid:
   103  		return newApproxCountFixedExec[types.Uuid](mg, info)
   104  	default:
   105  		panic("unsupported type for approx_count()")
   106  	}
   107  }
   108  
   109  func (exec *approxCountFixedExec[T]) GroupGrow(more int) error {
   110  	oldLen, newLen := len(exec.groups), len(exec.groups)+more
   111  	exec.groups = append(exec.groups, make([]*hll.Sketch, more)...)
   112  	for i := oldLen; i < newLen; i++ {
   113  		exec.groups[i] = hll.New()
   114  	}
   115  	return exec.ret.grows(more)
   116  }
   117  
   118  func (exec *approxCountFixedExec[T]) PreAllocateGroups(more int) error {
   119  	return exec.ret.preAllocate(more)
   120  }
   121  
   122  func (exec *approxCountFixedExec[T]) Fill(groupIndex int, row int, vectors []*vector.Vector) error {
   123  	if vectors[0].IsNull(uint64(row)) {
   124  		return nil
   125  	}
   126  	if vectors[0].IsConst() {
   127  		row = 0
   128  	}
   129  	v := vector.MustFixedCol[T](vectors[0])[row]
   130  	exec.groups[groupIndex].Insert(types.EncodeFixed[T](v))
   131  	return nil
   132  }
   133  
   134  func (exec *approxCountFixedExec[T]) BulkFill(groupIndex int, vectors []*vector.Vector) error {
   135  	if vectors[0].IsConstNull() {
   136  		return nil
   137  	}
   138  	if vectors[0].IsConst() {
   139  		v := vector.MustFixedCol[T](vectors[0])[0]
   140  		exec.groups[groupIndex].Insert(types.EncodeFixed[T](v))
   141  		return nil
   142  	}
   143  	exec.arg.prepare(vectors[0])
   144  	if exec.arg.w.WithAnyNullValue() {
   145  		for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ {
   146  			if v, null := exec.arg.w.GetValue(i); !null {
   147  				exec.groups[groupIndex].Insert(types.EncodeFixed[T](v))
   148  			}
   149  		}
   150  		return nil
   151  	}
   152  
   153  	for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ {
   154  		v, _ := exec.arg.w.GetValue(i)
   155  		exec.groups[groupIndex].Insert(types.EncodeFixed[T](v))
   156  	}
   157  	return nil
   158  }
   159  
   160  func (exec *approxCountFixedExec[T]) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   161  	if vectors[0].IsConstNull() {
   162  		return nil
   163  	}
   164  	if vectors[0].IsConst() {
   165  		v := vector.MustFixedCol[T](vectors[0])[0]
   166  		for _, group := range groups {
   167  			if group != GroupNotMatched {
   168  				exec.groups[group-1].Insert(types.EncodeFixed[T](v))
   169  			}
   170  		}
   171  		return nil
   172  	}
   173  
   174  	exec.arg.prepare(vectors[0])
   175  	u64Offset := uint64(offset)
   176  	if exec.arg.w.WithAnyNullValue() {
   177  		for i, j := uint64(0), uint64(len(groups)); i < j; i++ {
   178  			if groups[i] != GroupNotMatched {
   179  				v, null := exec.arg.w.GetValue(i + u64Offset)
   180  				if !null {
   181  					exec.groups[groups[i]-1].Insert(types.EncodeFixed[T](v))
   182  				}
   183  			}
   184  		}
   185  		return nil
   186  	}
   187  
   188  	for i, j := uint64(0), uint64(len(groups)); i < j; i++ {
   189  		if groups[i] != GroupNotMatched {
   190  			v, _ := exec.arg.w.GetValue(i + u64Offset)
   191  			exec.groups[groups[i]-1].Insert(types.EncodeFixed[T](v))
   192  		}
   193  	}
   194  	return nil
   195  }
   196  
   197  func (exec *approxCountFixedExec[T]) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error {
   198  	nextExec := next.(*approxCountFixedExec[T])
   199  	return exec.groups[groupIdx1].Merge(nextExec.groups[groupIdx2])
   200  }
   201  
   202  func (exec *approxCountFixedExec[T]) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   203  	other := next.(*approxCountFixedExec[T])
   204  
   205  	for i := range groups {
   206  		if groups[i] == GroupNotMatched {
   207  			continue
   208  		}
   209  		g1, g2 := int(groups[i])-1, i+offset
   210  		if err := exec.groups[g1].Merge(other.groups[g2]); err != nil {
   211  			return err
   212  		}
   213  	}
   214  	return nil
   215  }
   216  
   217  func (exec *approxCountFixedExec[T]) Flush() (*vector.Vector, error) {
   218  	setter := exec.ret.aggSet
   219  	for i, group := range exec.groups {
   220  		exec.ret.groupToSet = i
   221  		setter(group.Estimate())
   222  	}
   223  
   224  	if exec.partialResult != nil {
   225  		getter := exec.ret.aggGet
   226  		exec.ret.groupToSet = exec.partialGroup
   227  		setter(getter() + exec.partialResult.(uint64))
   228  	}
   229  	return exec.ret.flush(), nil
   230  }
   231  
   232  func (exec *approxCountFixedExec[T]) Free() {
   233  	exec.ret.free()
   234  	exec.groups = nil
   235  }
   236  
   237  func (exec *approxCountVarExec) GroupGrow(more int) error {
   238  	oldLen, newLen := len(exec.groups), len(exec.groups)+more
   239  	if cap(exec.groups) >= newLen {
   240  		exec.groups = exec.groups[:newLen]
   241  	} else {
   242  		exec.groups = append(exec.groups, make([]*hll.Sketch, more)...)
   243  	}
   244  
   245  	for i := oldLen; i < newLen; i++ {
   246  		exec.groups[i] = hll.New()
   247  	}
   248  	return exec.ret.grows(more)
   249  }
   250  
   251  func (exec *approxCountVarExec) PreAllocateGroups(more int) error {
   252  	if len(exec.groups) == 0 {
   253  		exec.groups = make([]*hll.Sketch, 0, more)
   254  	} else {
   255  		oldLength := len(exec.groups)
   256  		exec.groups = append(exec.groups, make([]*hll.Sketch, more)...)
   257  		exec.groups = exec.groups[:oldLength]
   258  	}
   259  
   260  	return exec.ret.preAllocate(more)
   261  }
   262  
   263  func (exec *approxCountVarExec) Fill(groupIndex int, row int, vectors []*vector.Vector) error {
   264  	if vectors[0].IsNull(uint64(row)) {
   265  		return nil
   266  	}
   267  	if vectors[0].IsConst() {
   268  		row = 0
   269  	}
   270  	v := vector.MustBytesCol(vectors[0])[row]
   271  	exec.groups[groupIndex].Insert(v)
   272  	return nil
   273  }
   274  
   275  func (exec *approxCountVarExec) BulkFill(groupIndex int, vectors []*vector.Vector) error {
   276  	if vectors[0].IsConstNull() {
   277  		return nil
   278  	}
   279  	if vectors[0].IsConst() {
   280  		v := vector.MustBytesCol(vectors[0])[0]
   281  		exec.groups[groupIndex].Insert(v)
   282  		return nil
   283  	}
   284  	exec.arg.prepare(vectors[0])
   285  	if exec.arg.w.WithAnyNullValue() {
   286  		for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ {
   287  			if v, null := exec.arg.w.GetStrValue(i); !null {
   288  				exec.groups[groupIndex].Insert(v)
   289  			}
   290  		}
   291  		return nil
   292  	}
   293  
   294  	for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ {
   295  		v, _ := exec.arg.w.GetStrValue(i)
   296  		exec.groups[groupIndex].Insert(v)
   297  	}
   298  	return nil
   299  }
   300  
   301  func (exec *approxCountVarExec) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   302  	if vectors[0].IsConstNull() {
   303  		return nil
   304  	}
   305  	if vectors[0].IsConst() {
   306  		v := vector.MustBytesCol(vectors[0])[0]
   307  		for _, group := range groups {
   308  			if group != GroupNotMatched {
   309  				exec.groups[group-1].Insert(v)
   310  			}
   311  		}
   312  		return nil
   313  	}
   314  
   315  	exec.arg.prepare(vectors[0])
   316  	u64Offset := uint64(offset)
   317  	if exec.arg.w.WithAnyNullValue() {
   318  		for i, j := uint64(0), uint64(len(groups)); i < j; i++ {
   319  			if groups[i] != GroupNotMatched {
   320  				v, null := exec.arg.w.GetStrValue(i + u64Offset)
   321  				if !null {
   322  					exec.groups[groups[i]-1].Insert(v)
   323  				}
   324  			}
   325  		}
   326  		return nil
   327  	}
   328  
   329  	for i, j := uint64(0), uint64(len(groups)); i < j; i++ {
   330  		if groups[i] != GroupNotMatched {
   331  			v, _ := exec.arg.w.GetStrValue(i + u64Offset)
   332  			exec.groups[groups[i]-1].Insert(v)
   333  		}
   334  	}
   335  	return nil
   336  }
   337  
   338  func (exec *approxCountVarExec) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error {
   339  	nextExec := next.(*approxCountVarExec)
   340  	return exec.groups[groupIdx1].Merge(nextExec.groups[groupIdx2])
   341  }
   342  
   343  func (exec *approxCountVarExec) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   344  	other := next.(*approxCountVarExec)
   345  
   346  	for i := range groups {
   347  		if groups[i] == GroupNotMatched {
   348  			continue
   349  		}
   350  		g1, g2 := int(groups[i])-1, i+offset
   351  		if err := exec.groups[g1].Merge(other.groups[g2]); err != nil {
   352  			return err
   353  		}
   354  	}
   355  	return nil
   356  }
   357  
   358  func (exec *approxCountVarExec) Flush() (*vector.Vector, error) {
   359  	setter := exec.ret.aggSet
   360  	for i, group := range exec.groups {
   361  		exec.ret.groupToSet = i
   362  		setter(group.Estimate())
   363  	}
   364  
   365  	if exec.partialResult != nil {
   366  		getter := exec.ret.aggGet
   367  		exec.ret.groupToSet = exec.partialGroup
   368  		setter(getter() + exec.partialResult.(uint64))
   369  	}
   370  	return exec.ret.flush(), nil
   371  }
   372  
   373  func (exec *approxCountVarExec) Free() {
   374  	exec.ret.free()
   375  	exec.groups = nil
   376  }