github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/multi.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aggexec
    16  
    17  import (
    18  	"fmt"
    19  	"github.com/matrixorigin/matrixone/pkg/container/types"
    20  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    21  )
    22  
    23  type multiAggInfo struct {
    24  	aggID    int64
    25  	distinct bool
    26  	argTypes []types.Type
    27  	retType  types.Type
    28  
    29  	// emptyNull indicates that whether we should return null for a group without any input value.
    30  	emptyNull bool
    31  }
    32  
    33  func (info multiAggInfo) String() string {
    34  	args := "[" + info.argTypes[0].String()
    35  	for i := 1; i < len(info.argTypes); i++ {
    36  		args += ", " + info.argTypes[i].String()
    37  	}
    38  	args += "]"
    39  	return fmt.Sprintf("{aggID: %d, argTypes: %s, retType: %s}", info.aggID, args, info.retType.String())
    40  }
    41  
    42  func (info multiAggInfo) AggID() int64 {
    43  	return info.aggID
    44  }
    45  
    46  func (info multiAggInfo) IsDistinct() bool {
    47  	return info.distinct
    48  }
    49  
    50  func (info multiAggInfo) TypesInfo() ([]types.Type, types.Type) {
    51  	return info.argTypes, info.retType
    52  }
    53  
    54  func (info multiAggInfo) getEncoded() *EncodedBasicInfo {
    55  	return &EncodedBasicInfo{
    56  		Id:         info.aggID,
    57  		IsDistinct: info.distinct,
    58  		Args:       info.argTypes,
    59  		Ret:        info.retType,
    60  	}
    61  }
    62  
    63  // multiAggFuncExec1 and multiAggFuncExec2 are the executors of multi columns agg.
    64  // 1's return type is a fixed length type.
    65  // 2's return type is bytes.
    66  type multiAggFuncExec1[T types.FixedSizeTExceptStrType] struct {
    67  	multiAggInfo
    68  
    69  	args   []mArg1[T]
    70  	ret    aggFuncResult[T]
    71  	groups []MultiAggRetFixed[T]
    72  
    73  	initGroup MultiAggInit1[T]
    74  	// todo: it's an optimization to move rowValid into eval.
    75  	rowValid rowValidForMultiAgg1[T]
    76  	merge    MultiAggMerge1[T]
    77  	eval     MultiAggEval1[T]
    78  	flush    MultiAggFlush1[T]
    79  
    80  	// method to new the private structure for group growing.
    81  	gGroup func() MultiAggRetFixed[T]
    82  }
    83  type multiAggFuncExec2 struct {
    84  	multiAggInfo
    85  
    86  	args   []mArg2
    87  	ret    aggFuncBytesResult
    88  	groups []MultiAggRetVar
    89  
    90  	initGroup MultiAggInit2
    91  	rowValid  rowValidForMultiAgg2
    92  	merge     MultiAggMerge2
    93  	eval      MultiAggEval2
    94  	flush     MultiAggFlush2
    95  
    96  	// method to new the private structure for group growing.
    97  	gGroup func() MultiAggRetVar
    98  }
    99  
   100  func (exec *multiAggFuncExec1[T]) init(
   101  	mg AggMemoryManager,
   102  	info multiAggInfo,
   103  	impl multiColumnAggImplementation) {
   104  
   105  	exec.multiAggInfo = info
   106  	exec.args = make([]mArg1[T], len(info.argTypes))
   107  	exec.ret = initFixedAggFuncResult[T](mg, info.retType, info.emptyNull)
   108  	exec.groups = make([]MultiAggRetFixed[T], 0, 1)
   109  	exec.gGroup = impl.generator.(func() MultiAggRetFixed[T])
   110  	exec.args = make([]mArg1[T], len(info.argTypes))
   111  
   112  	fillNullWhich := impl.fillNullWhich.([]MultiAggFillNull1[T])
   113  	for i := range exec.args {
   114  		exec.args[i] = newArgumentOfMultiAgg1[T](info.argTypes[i])
   115  
   116  		exec.args[i].cacheFill(impl.fillWhich[i], fillNullWhich[i])
   117  	}
   118  	exec.rowValid = impl.rowValid.(rowValidForMultiAgg1[T])
   119  	exec.merge = impl.merge.(MultiAggMerge1[T])
   120  	exec.eval = impl.eval.(MultiAggEval1[T])
   121  	if impl.flush != nil {
   122  		exec.flush = impl.flush.(MultiAggFlush1[T])
   123  	}
   124  	if impl.init != nil {
   125  		exec.initGroup = impl.init.(MultiAggInit1[T])
   126  	}
   127  }
   128  
   129  func (exec *multiAggFuncExec1[T]) GroupGrow(more int) error {
   130  	if err := exec.ret.grows(more); err != nil {
   131  		return err
   132  	}
   133  	setter := exec.ret.aggSet
   134  	moreGroup := make([]MultiAggRetFixed[T], more)
   135  	for i := 0; i < more; i++ {
   136  		moreGroup[i] = exec.gGroup()
   137  	}
   138  
   139  	if exec.initGroup != nil {
   140  		for i := 0; i < more; i++ {
   141  			exec.ret.groupToSet = i + len(exec.groups)
   142  			exec.initGroup(moreGroup[i], setter, exec.argTypes, exec.retType)
   143  		}
   144  	}
   145  
   146  	exec.groups = append(exec.groups, moreGroup...)
   147  	return nil
   148  }
   149  
   150  func (exec *multiAggFuncExec1[T]) PreAllocateGroups(more int) error {
   151  	return exec.ret.preAllocate(more)
   152  }
   153  
   154  func (exec *multiAggFuncExec1[T]) Fill(groupIndex int, row int, vectors []*vector.Vector) error {
   155  	var err error
   156  	for i, arg := range exec.args {
   157  		arg.prepare(vectors[i])
   158  		if err = arg.doRowFill(exec.groups[groupIndex], uint64(row)); err != nil {
   159  			return err
   160  		}
   161  	}
   162  	exec.ret.groupToSet = groupIndex
   163  	if exec.rowValid(exec.groups[groupIndex]) {
   164  		exec.ret.setGroupNotEmpty(groupIndex)
   165  		if err = exec.eval(exec.groups[groupIndex], exec.ret.aggGet, exec.ret.aggSet); err != nil {
   166  			return err
   167  		}
   168  	}
   169  
   170  	return nil
   171  }
   172  
   173  func (exec *multiAggFuncExec1[T]) BulkFill(groupIndex int, vectors []*vector.Vector) error {
   174  	var err error
   175  	for i, arg := range exec.args {
   176  		arg.prepare(vectors[i])
   177  	}
   178  
   179  	setter := exec.ret.aggSet
   180  	getter := exec.ret.aggGet
   181  	exec.ret.groupToSet = groupIndex
   182  	for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ {
   183  		for _, arg := range exec.args {
   184  			if err = arg.doRowFill(exec.groups[groupIndex], i); err != nil {
   185  				return err
   186  			}
   187  		}
   188  		if exec.rowValid(exec.groups[groupIndex]) {
   189  			exec.ret.setGroupNotEmpty(groupIndex)
   190  			if err = exec.eval(exec.groups[groupIndex], getter, setter); err != nil {
   191  				return err
   192  			}
   193  		}
   194  	}
   195  
   196  	return nil
   197  }
   198  
   199  func (exec *multiAggFuncExec1[T]) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   200  	var err error
   201  	setter := exec.ret.aggSet
   202  	getter := exec.ret.aggGet
   203  	for i, arg := range exec.args {
   204  		arg.prepare(vectors[i])
   205  	}
   206  
   207  	for idx, i, j := 0, uint64(offset), uint64(offset+len(groups)); i < j; i++ {
   208  		if groups[idx] != GroupNotMatched {
   209  			groupIdx := int(groups[idx] - 1)
   210  			for _, arg := range exec.args {
   211  				if err = arg.doRowFill(exec.groups[groupIdx], i); err != nil {
   212  					return err
   213  				}
   214  			}
   215  			exec.ret.groupToSet = groupIdx
   216  			if exec.rowValid(exec.groups[groupIdx]) {
   217  				exec.ret.setGroupNotEmpty(groupIdx)
   218  				if err = exec.eval(exec.groups[groupIdx], getter, setter); err != nil {
   219  					return err
   220  				}
   221  			}
   222  
   223  		}
   224  		idx++
   225  	}
   226  
   227  	return nil
   228  }
   229  
   230  func (exec *multiAggFuncExec1[T]) SetExtraInformation(partialResult any, groupIndex int) error {
   231  	panic("unimplemented SetPreparedResult for multiAggFuncExec1")
   232  }
   233  
   234  func (exec *multiAggFuncExec1[T]) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error {
   235  	other := next.(*multiAggFuncExec1[T])
   236  	exec.ret.groupToSet = groupIdx1
   237  	other.ret.groupToSet = groupIdx2
   238  
   239  	exec.ret.mergeEmpty(other.ret.basicResult, groupIdx1, groupIdx2)
   240  	return exec.merge(
   241  		exec.groups[groupIdx1],
   242  		other.groups[groupIdx2],
   243  		exec.ret.aggGet, other.ret.aggGet,
   244  		exec.ret.aggSet)
   245  }
   246  
   247  func (exec *multiAggFuncExec1[T]) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   248  	other := next.(*multiAggFuncExec1[T])
   249  	setter := exec.ret.aggSet
   250  	getter1, getter2 := exec.ret.aggGet, other.ret.aggGet
   251  
   252  	for i := range groups {
   253  		if groups[i] == GroupNotMatched {
   254  			continue
   255  		}
   256  		groupIdx1, groupIdx2 := int(groups[i]-1), i+offset
   257  		exec.ret.groupToSet = groupIdx1
   258  		other.ret.groupToSet = groupIdx2
   259  
   260  		exec.ret.mergeEmpty(other.ret.basicResult, groupIdx1, groupIdx2)
   261  		if err := exec.merge(
   262  			exec.groups[groupIdx1],
   263  			other.groups[groupIdx2],
   264  			getter1, getter2,
   265  			setter); err != nil {
   266  			return err
   267  		}
   268  	}
   269  	return nil
   270  }
   271  
   272  func (exec *multiAggFuncExec1[T]) Flush() (*vector.Vector, error) {
   273  	setter := exec.ret.aggSet
   274  	getter := exec.ret.aggGet
   275  
   276  	if exec.flush == nil {
   277  		return exec.ret.flush(), nil
   278  	}
   279  
   280  	if exec.ret.emptyBeNull {
   281  		for i, group := range exec.groups {
   282  			if exec.ret.groupIsEmpty(i) {
   283  				continue
   284  			}
   285  			exec.ret.groupToSet = i
   286  			if err := exec.flush(group, getter, setter); err != nil {
   287  				return nil, err
   288  			}
   289  		}
   290  	} else {
   291  		for i, group := range exec.groups {
   292  			exec.ret.groupToSet = i
   293  			if err := exec.flush(group, getter, setter); err != nil {
   294  				return nil, err
   295  			}
   296  		}
   297  	}
   298  	return exec.ret.flush(), nil
   299  }
   300  
   301  func (exec *multiAggFuncExec1[T]) Free() {
   302  	exec.ret.free()
   303  }
   304  
   305  func (exec *multiAggFuncExec2) init(
   306  	mg AggMemoryManager,
   307  	info multiAggInfo,
   308  	impl multiColumnAggImplementation) {
   309  
   310  	exec.multiAggInfo = info
   311  	exec.args = make([]mArg2, len(info.argTypes))
   312  	exec.ret = initBytesAggFuncResult(mg, info.retType, info.emptyNull)
   313  	exec.groups = make([]MultiAggRetVar, 0, 1)
   314  	exec.gGroup = impl.generator.(func() MultiAggRetVar)
   315  	exec.args = make([]mArg2, len(info.argTypes))
   316  
   317  	fillNullWhich := impl.fillNullWhich.([]MultiAggFillNull2)
   318  	for i := range exec.args {
   319  		exec.args[i] = newArgumentOfMultiAgg2(info.argTypes[i])
   320  
   321  		exec.args[i].cacheFill(impl.fillWhich[i], fillNullWhich[i])
   322  	}
   323  	exec.rowValid = impl.rowValid.(rowValidForMultiAgg2)
   324  	exec.merge = impl.merge.(MultiAggMerge2)
   325  	exec.eval = impl.eval.(MultiAggEval2)
   326  	if impl.flush != nil {
   327  		exec.flush = impl.flush.(MultiAggFlush2)
   328  	}
   329  	if impl.init != nil {
   330  		exec.initGroup = impl.init.(MultiAggInit2)
   331  	}
   332  }
   333  
   334  func (exec *multiAggFuncExec2) GroupGrow(more int) error {
   335  	if err := exec.ret.grows(more); err != nil {
   336  		return err
   337  	}
   338  	setter := exec.ret.aggSet
   339  	moreGroup := make([]MultiAggRetVar, more)
   340  	for i := 0; i < more; i++ {
   341  		moreGroup[i] = exec.gGroup()
   342  	}
   343  
   344  	if exec.initGroup != nil {
   345  		for i := 0; i < more; i++ {
   346  			exec.ret.groupToSet = i + len(exec.groups)
   347  			exec.initGroup(moreGroup[i], setter, exec.argTypes, exec.retType)
   348  		}
   349  	}
   350  
   351  	exec.groups = append(exec.groups, moreGroup...)
   352  	return nil
   353  }
   354  
   355  func (exec *multiAggFuncExec2) PreAllocateGroups(more int) error {
   356  	return exec.ret.preAllocate(more)
   357  }
   358  
   359  func (exec *multiAggFuncExec2) Fill(groupIndex int, row int, vectors []*vector.Vector) error {
   360  	var err error
   361  	for i, arg := range exec.args {
   362  		arg.prepare(vectors[i])
   363  		if err = arg.doRowFill(exec.groups[groupIndex], uint64(row)); err != nil {
   364  			return err
   365  		}
   366  	}
   367  	exec.ret.groupToSet = groupIndex
   368  	if exec.rowValid(exec.groups[groupIndex]) {
   369  		exec.ret.setGroupNotEmpty(groupIndex)
   370  		return exec.eval(exec.groups[groupIndex], exec.ret.aggGet, exec.ret.aggSet)
   371  	}
   372  
   373  	return nil
   374  }
   375  
   376  func (exec *multiAggFuncExec2) BulkFill(groupIndex int, vectors []*vector.Vector) error {
   377  	var err error
   378  	for i, arg := range exec.args {
   379  		arg.prepare(vectors[i])
   380  	}
   381  
   382  	setter := exec.ret.aggSet
   383  	getter := exec.ret.aggGet
   384  	exec.ret.groupToSet = groupIndex
   385  
   386  	// todo: can do optimization here once all the vectors were constant.
   387  
   388  	for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ {
   389  		for _, arg := range exec.args {
   390  			if err = arg.doRowFill(exec.groups[groupIndex], i); err != nil {
   391  				return err
   392  			}
   393  		}
   394  		if exec.rowValid(exec.groups[groupIndex]) {
   395  			exec.ret.setGroupNotEmpty(groupIndex)
   396  			if err = exec.eval(exec.groups[groupIndex], getter, setter); err != nil {
   397  				return err
   398  			}
   399  		}
   400  	}
   401  
   402  	return nil
   403  }
   404  
   405  func (exec *multiAggFuncExec2) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   406  	var err error
   407  	setter := exec.ret.aggSet
   408  	getter := exec.ret.aggGet
   409  	for i, arg := range exec.args {
   410  		arg.prepare(vectors[i])
   411  	}
   412  
   413  	for idx, i, j := 0, uint64(offset), uint64(offset+len(groups)); i < j; i++ {
   414  		if groups[idx] != GroupNotMatched {
   415  			groupIdx := int(groups[idx] - 1)
   416  			for _, arg := range exec.args {
   417  				if err = arg.doRowFill(exec.groups[groupIdx], i); err != nil {
   418  					return err
   419  				}
   420  			}
   421  			exec.ret.groupToSet = groupIdx
   422  			if exec.rowValid(exec.groups[groupIdx]) {
   423  				exec.ret.setGroupNotEmpty(groupIdx)
   424  				if err = exec.eval(exec.groups[groupIdx], getter, setter); err != nil {
   425  					return err
   426  				}
   427  			}
   428  
   429  		}
   430  		idx++
   431  	}
   432  
   433  	return nil
   434  }
   435  
   436  func (exec *multiAggFuncExec2) SetExtraInformation(partialResult any, groupIndex int) error {
   437  	panic("unimplemented SetPreparedResult for multiAggFuncExec2")
   438  }
   439  
   440  func (exec *multiAggFuncExec2) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error {
   441  	other := next.(*multiAggFuncExec2)
   442  	exec.ret.groupToSet = groupIdx1
   443  	other.ret.groupToSet = groupIdx2
   444  
   445  	exec.ret.mergeEmpty(other.ret.basicResult, groupIdx1, groupIdx2)
   446  	return exec.merge(
   447  		exec.groups[groupIdx1],
   448  		other.groups[groupIdx2],
   449  		exec.ret.aggGet, other.ret.aggGet,
   450  		exec.ret.aggSet)
   451  }
   452  
   453  func (exec *multiAggFuncExec2) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   454  	other := next.(*multiAggFuncExec2)
   455  	setter := exec.ret.aggSet
   456  	getter1, getter2 := exec.ret.aggGet, other.ret.aggGet
   457  
   458  	for i := range groups {
   459  		if groups[i] == GroupNotMatched {
   460  			continue
   461  		}
   462  		groupIdx1, groupIdx2 := int(groups[i]-1), i+offset
   463  		exec.ret.groupToSet = groupIdx1
   464  		other.ret.groupToSet = groupIdx2
   465  
   466  		exec.ret.mergeEmpty(other.ret.basicResult, groupIdx1, groupIdx2)
   467  		if err := exec.merge(
   468  			exec.groups[groupIdx1],
   469  			other.groups[groupIdx2],
   470  			getter1, getter2,
   471  			setter); err != nil {
   472  			return err
   473  		}
   474  	}
   475  	return nil
   476  }
   477  
   478  func (exec *multiAggFuncExec2) Flush() (*vector.Vector, error) {
   479  	var err error
   480  	setter := exec.ret.aggSet
   481  	getter := exec.ret.aggGet
   482  
   483  	if exec.flush == nil {
   484  		return exec.ret.flush(), nil
   485  	}
   486  
   487  	if exec.ret.emptyBeNull {
   488  		for i, group := range exec.groups {
   489  			if exec.ret.groupIsEmpty(i) {
   490  				continue
   491  			}
   492  			exec.ret.groupToSet = i
   493  			if err = exec.flush(group, getter, setter); err != nil {
   494  				return nil, err
   495  			}
   496  		}
   497  	} else {
   498  		for i, group := range exec.groups {
   499  			exec.ret.groupToSet = i
   500  			if err = exec.flush(group, getter, setter); err != nil {
   501  				return nil, err
   502  			}
   503  		}
   504  	}
   505  	return exec.ret.flush(), nil
   506  }
   507  
   508  func (exec *multiAggFuncExec2) Free() {
   509  	exec.ret.free()
   510  }