github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/count.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aggexec
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/container/types"
    19  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    20  )
    21  
    22  var (
    23  	CountReturnType = func(_ []types.Type) types.Type {
    24  		return types.T_int64.ToType()
    25  	}
    26  )
    27  
    28  // count is a special agg because it can ignore what the value is but only if it was a null.
    29  type countColumnExec struct {
    30  	singleAggInfo
    31  	singleAggExecExtraInformation
    32  	distinctHash
    33  
    34  	ret aggFuncResult[int64]
    35  }
    36  
    37  func newCountColumnExecExec(mg AggMemoryManager, info singleAggInfo) AggFuncExec {
    38  	exec := &countColumnExec{
    39  		singleAggInfo: info,
    40  		ret:           initFixedAggFuncResult[int64](mg, info.retType, false),
    41  	}
    42  	if info.distinct {
    43  		exec.distinctHash = newDistinctHash(mg.Mp(), false)
    44  	}
    45  	return exec
    46  }
    47  
    48  func (exec *countColumnExec) GroupGrow(more int) error {
    49  	if exec.IsDistinct() {
    50  		if err := exec.distinctHash.grows(more); err != nil {
    51  			return err
    52  		}
    53  	}
    54  	return exec.ret.grows(more)
    55  }
    56  
    57  func (exec *countColumnExec) PreAllocateGroups(more int) error {
    58  	return exec.ret.preAllocate(more)
    59  }
    60  
    61  func (exec *countColumnExec) Fill(groupIndex int, row int, vectors []*vector.Vector) error {
    62  	if vectors[0].IsNull(uint64(row)) {
    63  		return nil
    64  	}
    65  
    66  	if exec.IsDistinct() {
    67  		if need, err := exec.distinctHash.fill(groupIndex, vectors, row); err != nil || !need {
    68  			return err
    69  		}
    70  	}
    71  
    72  	exec.ret.groupToSet = groupIndex
    73  	exec.ret.aggSet(exec.ret.aggGet() + 1)
    74  	return nil
    75  }
    76  
    77  func (exec *countColumnExec) BulkFill(groupIndex int, vectors []*vector.Vector) error {
    78  	if vectors[0].IsConstNull() {
    79  		return nil
    80  	}
    81  	exec.ret.groupToSet = groupIndex
    82  
    83  	old := exec.ret.aggGet()
    84  	if exec.IsDistinct() {
    85  		if vectors[0].IsConst() {
    86  			if need, err := exec.distinctHash.fill(groupIndex, vectors, 0); err != nil || !need {
    87  				return err
    88  			}
    89  			old++
    90  
    91  		} else {
    92  			needs, err := exec.distinctHash.bulkFill(groupIndex, vectors)
    93  			if err != nil {
    94  				return err
    95  			}
    96  			nsp := vectors[0].GetNulls()
    97  			for i, j := uint64(0), uint64(len(needs)); i < j; i++ {
    98  				if needs[i] && !nsp.Contains(i) {
    99  					old++
   100  				}
   101  			}
   102  		}
   103  
   104  	} else {
   105  		old += int64(vectors[0].Length() - vectors[0].GetNulls().Count())
   106  	}
   107  	exec.ret.aggSet(old)
   108  	return nil
   109  }
   110  
   111  func (exec *countColumnExec) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   112  	if vectors[0].IsConstNull() {
   113  		return nil
   114  	}
   115  
   116  	vs := exec.ret.values
   117  	if vectors[0].IsConst() || vectors[0].GetNulls().IsEmpty() {
   118  		if exec.IsDistinct() {
   119  			needs, err := exec.distinctHash.batchFill(vectors, offset, groups)
   120  			if err != nil {
   121  				return err
   122  			}
   123  			for i, group := range groups {
   124  				if needs[i] && group != GroupNotMatched {
   125  					vs[group-1]++
   126  				}
   127  			}
   128  			return nil
   129  		}
   130  
   131  		for _, group := range groups {
   132  			if group != GroupNotMatched {
   133  				vs[group-1]++
   134  			}
   135  		}
   136  		return nil
   137  	}
   138  
   139  	if exec.IsDistinct() {
   140  		needs, err := exec.distinctHash.batchFill(vectors, offset, groups)
   141  		if err != nil {
   142  			return err
   143  		}
   144  
   145  		if vectors[0].HasNull() {
   146  			nsp := vectors[0].GetNulls()
   147  			u64Offset := uint64(offset)
   148  			for i, j := uint64(0), uint64(len(groups)); i < j; i++ {
   149  				if needs[i] && !nsp.Contains(i+u64Offset) && groups[i] != GroupNotMatched {
   150  					vs[groups[i]-1]++
   151  				}
   152  			}
   153  
   154  		} else {
   155  			for i, group := range groups {
   156  				if needs[i] && group != GroupNotMatched {
   157  					vs[group-1]++
   158  				}
   159  			}
   160  			return nil
   161  		}
   162  		return nil
   163  	}
   164  
   165  	if vectors[0].HasNull() {
   166  		nsp := vectors[0].GetNulls()
   167  		u64Offset := uint64(offset)
   168  		for i, j := uint64(0), uint64(len(groups)); i < j; i++ {
   169  			if groups[i] != GroupNotMatched {
   170  				if !nsp.Contains(i + u64Offset) {
   171  					vs[groups[i]-1]++
   172  				}
   173  			}
   174  		}
   175  
   176  	} else {
   177  		for _, group := range groups {
   178  			if group != GroupNotMatched {
   179  				vs[group-1]++
   180  			}
   181  		}
   182  	}
   183  	return nil
   184  }
   185  
   186  func (exec *countColumnExec) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error {
   187  	other := next.(*countColumnExec)
   188  
   189  	exec.ret.groupToSet = groupIdx1
   190  	other.ret.groupToSet = groupIdx2
   191  	exec.ret.aggSet(exec.ret.aggGet() + other.ret.aggGet())
   192  	return exec.distinctHash.merge(&other.distinctHash)
   193  }
   194  
   195  func (exec *countColumnExec) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   196  	other := next.(*countColumnExec)
   197  	vs1 := exec.ret.values
   198  	vs2 := other.ret.values
   199  
   200  	for i := range groups {
   201  		if groups[i] == GroupNotMatched {
   202  			continue
   203  		}
   204  		g1, g2 := int(groups[i])-1, i+offset
   205  		exec.ret.mergeEmpty(other.ret.basicResult, g1, g2)
   206  		vs1[g1] += vs2[g2]
   207  	}
   208  	return exec.distinctHash.merge(&other.distinctHash)
   209  }
   210  
   211  func (exec *countColumnExec) Flush() (*vector.Vector, error) {
   212  	if exec.partialResult != nil {
   213  		exec.ret.values[exec.ret.groupToSet] += exec.partialResult.(int64)
   214  	}
   215  	return exec.ret.flush(), nil
   216  }
   217  
   218  func (exec *countColumnExec) Free() {
   219  	exec.ret.free()
   220  	exec.distinctHash.free()
   221  }
   222  
   223  type countStarExec struct {
   224  	singleAggInfo
   225  	singleAggExecExtraInformation
   226  	ret aggFuncResult[int64]
   227  }
   228  
   229  func newCountStarExec(mg AggMemoryManager, info singleAggInfo) AggFuncExec {
   230  	// todo: should we check if `distinct` here ?
   231  	return &countStarExec{
   232  		singleAggInfo: info,
   233  		ret:           initFixedAggFuncResult[int64](mg, info.retType, false),
   234  	}
   235  }
   236  
   237  func (exec *countStarExec) GroupGrow(more int) error {
   238  	return exec.ret.grows(more)
   239  }
   240  
   241  func (exec *countStarExec) PreAllocateGroups(more int) error {
   242  	return exec.ret.preAllocate(more)
   243  }
   244  
   245  func (exec *countStarExec) Fill(groupIndex int, row int, vectors []*vector.Vector) error {
   246  	exec.ret.groupToSet = groupIndex
   247  	exec.ret.aggSet(exec.ret.aggGet() + 1)
   248  	return nil
   249  }
   250  
   251  func (exec *countStarExec) BulkFill(groupIndex int, vectors []*vector.Vector) error {
   252  	exec.ret.groupToSet = groupIndex
   253  	exec.ret.aggSet(exec.ret.aggGet() + int64(vectors[0].Length()))
   254  	return nil
   255  }
   256  
   257  func (exec *countStarExec) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   258  	vs := exec.ret.values
   259  	for _, group := range groups {
   260  		if group != GroupNotMatched {
   261  			vs[group-1]++
   262  		}
   263  	}
   264  	return nil
   265  }
   266  
   267  func (exec *countStarExec) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error {
   268  	exec.ret.groupToSet = groupIdx1
   269  	exec.ret.aggSet(exec.ret.aggGet() + next.(*countStarExec).ret.aggGet())
   270  	return nil
   271  }
   272  
   273  func (exec *countStarExec) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   274  	other := next.(*countStarExec)
   275  	vs1 := exec.ret.values
   276  	vs2 := other.ret.values
   277  
   278  	for i := range groups {
   279  		if groups[i] == GroupNotMatched {
   280  			continue
   281  		}
   282  		g1, g2 := int(groups[i])-1, i+offset
   283  		exec.ret.mergeEmpty(other.ret.basicResult, g1, g2)
   284  		vs1[g1] += vs2[g2]
   285  	}
   286  	return nil
   287  }
   288  
   289  func (exec *countStarExec) Flush() (*vector.Vector, error) {
   290  	if exec.partialResult != nil {
   291  		exec.ret.values[exec.ret.groupToSet] += exec.partialResult.(int64)
   292  	}
   293  	return exec.ret.flush(), nil
   294  }
   295  
   296  func (exec *countStarExec) Free() {
   297  	exec.ret.free()
   298  }