github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/median.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aggexec
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    19  	"github.com/matrixorigin/matrixone/pkg/container/types"
    20  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    21  	"sort"
    22  )
    23  
    24  var MedianSupportedType = []types.T{
    25  	types.T_bit, types.T_int8, types.T_int16, types.T_int32, types.T_int64,
    26  	types.T_uint8, types.T_uint16, types.T_uint32, types.T_uint64,
    27  	types.T_float32, types.T_float64, types.T_decimal64, types.T_decimal128,
    28  }
    29  
    30  func MedianReturnType(args []types.Type) types.Type {
    31  	if args[0].IsDecimal() {
    32  		return types.New(types.T_decimal128, 38, args[0].Scale+1)
    33  	}
    34  	return types.T_float64.ToType()
    35  }
    36  
    37  type numeric interface {
    38  	types.Ints | types.UInts | types.Floats
    39  }
    40  
    41  type medianColumnExecSelf[T numeric | types.Decimal64 | types.Decimal128, R float64 | types.Decimal128] struct {
    42  	singleAggInfo
    43  	singleAggExecExtraInformation
    44  	distinctHash
    45  	arg sFixedArg[T]
    46  	ret aggFuncResult[R]
    47  
    48  	// groups stores the values of the column for each group.
    49  	// todo: it has a problem that same as the `clusterCentersExec.groupData` in `cluster_centers.go`
    50  	groups []*vector.Vector
    51  }
    52  
    53  func newMedianColumnExecSelf[T numeric | types.Decimal64 | types.Decimal128, R float64 | types.Decimal128](mg AggMemoryManager, info singleAggInfo) medianColumnExecSelf[T, R] {
    54  	s := medianColumnExecSelf[T, R]{
    55  		singleAggInfo: info,
    56  		ret:           initFixedAggFuncResult[R](mg, info.retType, info.emptyNull),
    57  	}
    58  	if info.IsDistinct() {
    59  		s.distinctHash = newDistinctHash(mg.Mp(), false)
    60  	}
    61  	return s
    62  }
    63  
    64  func (exec *medianColumnExecSelf[T, R]) GroupGrow(more int) error {
    65  	if exec.IsDistinct() {
    66  		if err := exec.distinctHash.grows(more); err != nil {
    67  			return err
    68  		}
    69  	}
    70  
    71  	oldLength := len(exec.groups)
    72  	if cap(exec.groups) >= oldLength+more {
    73  		exec.groups = exec.groups[:oldLength+more]
    74  	} else {
    75  		exec.groups = append(exec.groups, make([]*vector.Vector, more)...)
    76  	}
    77  
    78  	for i, j := oldLength, len(exec.groups); i < j; i++ {
    79  		exec.groups[i] = exec.ret.mg.GetVector(exec.singleAggInfo.argType)
    80  	}
    81  	return exec.ret.grows(more)
    82  }
    83  
    84  func (exec *medianColumnExecSelf[T, R]) PreAllocateGroups(more int) error {
    85  	if len(exec.groups) == 0 {
    86  		exec.groups = make([]*vector.Vector, 0, more)
    87  	} else {
    88  		oldLength := len(exec.groups)
    89  		exec.groups = append(exec.groups, make([]*vector.Vector, more)...)
    90  		exec.groups = exec.groups[:oldLength]
    91  	}
    92  
    93  	return exec.ret.preAllocate(more)
    94  }
    95  
    96  func (exec *medianColumnExecSelf[T, R]) Fill(groupIndex int, row int, vectors []*vector.Vector) error {
    97  	if vectors[0].IsNull(uint64(row)) {
    98  		return nil
    99  	}
   100  	if vectors[0].IsConst() {
   101  		row = 0
   102  	}
   103  	if exec.IsDistinct() {
   104  		if need, err := exec.distinctHash.fill(groupIndex, vectors, row); err != nil || !need {
   105  			return err
   106  		}
   107  	}
   108  
   109  	exec.ret.setGroupNotEmpty(groupIndex)
   110  	value := vector.MustFixedCol[T](vectors[0])[row]
   111  
   112  	return vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, value)
   113  }
   114  
   115  func (exec *medianColumnExecSelf[T, R]) BulkFill(groupIndex int, vectors []*vector.Vector) error {
   116  	if vectors[0].IsConstNull() {
   117  		return nil
   118  	}
   119  
   120  	if exec.IsDistinct() {
   121  		return exec.distinctBulkFill(groupIndex, vectors)
   122  	}
   123  
   124  	if vectors[0].IsConst() {
   125  		exec.ret.setGroupNotEmpty(groupIndex)
   126  		value := vector.MustFixedCol[T](vectors[0])[0]
   127  		return vector.AppendMultiFixed[T](exec.groups[0], value, false, vectors[0].Length(), exec.ret.mp)
   128  	}
   129  
   130  	exec.arg.prepare(vectors[0])
   131  	mustNotEmpty := false
   132  	for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ {
   133  		v, null := exec.arg.w.GetValue(i)
   134  		if null {
   135  			continue
   136  		}
   137  		mustNotEmpty = true
   138  		if err := vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, v); err != nil {
   139  			return err
   140  		}
   141  	}
   142  	if mustNotEmpty {
   143  		exec.ret.setGroupNotEmpty(groupIndex)
   144  	}
   145  	return nil
   146  }
   147  
   148  func (exec *medianColumnExecSelf[T, R]) distinctBulkFill(groupIndex int, vectors []*vector.Vector) error {
   149  	if vectors[0].IsConst() {
   150  		if need, err := exec.distinctHash.fill(groupIndex, vectors, 0); err != nil || !need {
   151  			return err
   152  		}
   153  
   154  		exec.ret.setGroupNotEmpty(groupIndex)
   155  		value := vector.MustFixedCol[T](vectors[0])[0]
   156  		return vector.AppendMultiFixed[T](exec.groups[groupIndex], value, false, vectors[0].Length(), exec.ret.mp)
   157  	}
   158  
   159  	needs, err := exec.distinctHash.bulkFill(groupIndex, vectors)
   160  	if err != nil {
   161  		return err
   162  	}
   163  	exec.arg.prepare(vectors[0])
   164  	mustNotEmpty := false
   165  	for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ {
   166  		if !needs[i] {
   167  			continue
   168  		}
   169  
   170  		v, null := exec.arg.w.GetValue(i)
   171  		if null {
   172  			continue
   173  		}
   174  		mustNotEmpty = true
   175  		if err = vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, v); err != nil {
   176  			return err
   177  		}
   178  	}
   179  	if mustNotEmpty {
   180  		exec.ret.setGroupNotEmpty(groupIndex)
   181  	}
   182  	return nil
   183  }
   184  
   185  func (exec *medianColumnExecSelf[T, R]) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   186  	if vectors[0].IsConstNull() {
   187  		return nil
   188  	}
   189  
   190  	if exec.IsDistinct() {
   191  		return exec.distinctBatchFill(offset, groups, vectors)
   192  	}
   193  
   194  	if vectors[0].IsConst() {
   195  		value := vector.MustFixedCol[T](vectors[0])[0]
   196  		for i := 0; i < len(groups); i++ {
   197  			if groups[i] != GroupNotMatched {
   198  				groupIndex := groups[i] - 1
   199  				exec.ret.setGroupNotEmpty(int(groupIndex))
   200  				if err := vectorAppendWildly(
   201  					exec.groups[groupIndex],
   202  					exec.ret.mp, value); err != nil {
   203  					return err
   204  				}
   205  			}
   206  		}
   207  		return nil
   208  	}
   209  
   210  	exec.arg.prepare(vectors[0])
   211  	for i, j, idx := uint64(offset), uint64(offset+len(groups)), 0; i < j; i++ {
   212  		if groups[idx] != GroupNotMatched {
   213  			v, null := exec.arg.w.GetValue(i)
   214  			if !null {
   215  				groupIndex := groups[idx] - 1
   216  				exec.ret.setGroupNotEmpty(int(groupIndex))
   217  
   218  				if err := vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, v); err != nil {
   219  					return err
   220  				}
   221  			}
   222  		}
   223  		idx++
   224  	}
   225  	return nil
   226  }
   227  
   228  func (exec *medianColumnExecSelf[T, R]) distinctBatchFill(offset int, groups []uint64, vectors []*vector.Vector) error {
   229  	needs, err := exec.distinctHash.batchFill(vectors, offset, groups)
   230  	if err != nil {
   231  		return err
   232  	}
   233  
   234  	if vectors[0].IsConst() {
   235  		value := vector.MustFixedCol[T](vectors[0])[0]
   236  		for i := 0; i < len(groups); i++ {
   237  			if needs[i] && groups[i] != GroupNotMatched {
   238  				groupIndex := groups[i] - 1
   239  				exec.ret.setGroupNotEmpty(int(groupIndex))
   240  				if err = vectorAppendWildly(
   241  					exec.groups[groupIndex],
   242  					exec.ret.mp, value); err != nil {
   243  					return err
   244  				}
   245  			}
   246  		}
   247  		return nil
   248  	}
   249  
   250  	exec.arg.prepare(vectors[0])
   251  	for i, j, idx := uint64(offset), uint64(offset+len(groups)), 0; i < j; i++ {
   252  		if needs[idx] && groups[idx] != GroupNotMatched {
   253  			v, null := exec.arg.w.GetValue(i)
   254  			if !null {
   255  				groupIndex := groups[idx] - 1
   256  				exec.ret.setGroupNotEmpty(int(groupIndex))
   257  				if err = vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, v); err != nil {
   258  					return err
   259  				}
   260  			}
   261  		}
   262  		idx++
   263  	}
   264  	return nil
   265  }
   266  
   267  func (exec *medianColumnExecSelf[T, R]) Merge(other *medianColumnExecSelf[T, R], groupIdx1, groupIdx2 int) error {
   268  	if exec.IsDistinct() {
   269  		return exec.distinctHash.merge(&other.distinctHash)
   270  	}
   271  	if other.groups[groupIdx2].Length() == 0 {
   272  		return nil
   273  	}
   274  	vs := vector.MustFixedCol[T](other.groups[groupIdx2])
   275  	return vector.AppendFixedList[T](exec.groups[groupIdx1], vs, nil, exec.ret.mp)
   276  }
   277  
   278  func (exec *medianColumnExecSelf[T, R]) BatchMerge(next *medianColumnExecSelf[T, R], offset int, groups []uint64) error {
   279  	for i, group := range groups {
   280  		if group != GroupNotMatched {
   281  			if err := exec.Merge(next, int(group)-1, i+offset); err != nil {
   282  				return err
   283  			}
   284  		}
   285  	}
   286  	return nil
   287  }
   288  
   289  func (exec *medianColumnExecSelf[T, R]) Free() {
   290  	if exec.ret.mg == nil {
   291  		return
   292  	}
   293  	for _, v := range exec.groups {
   294  		if v == nil {
   295  			continue
   296  		}
   297  		if v.NeedDup() {
   298  			v.Free(exec.ret.mp)
   299  		} else {
   300  			exec.ret.mg.PutVector(v)
   301  		}
   302  	}
   303  	exec.ret.free()
   304  	exec.distinctHash.free()
   305  }
   306  
   307  type medianColumnNumericExec[T numeric] struct {
   308  	medianColumnExecSelf[T, float64]
   309  }
   310  
   311  func newMedianColumnNumericExec[T numeric](mg AggMemoryManager, info singleAggInfo) AggFuncExec {
   312  	return &medianColumnNumericExec[T]{
   313  		medianColumnExecSelf: newMedianColumnExecSelf[T, float64](mg, info),
   314  	}
   315  }
   316  
   317  type medianColumnDecimalExec[T types.Decimal64 | types.Decimal128] struct {
   318  	medianColumnExecSelf[T, types.Decimal128]
   319  }
   320  
   321  func newMedianColumnDecimalExec[T types.Decimal64 | types.Decimal128](mg AggMemoryManager, info singleAggInfo) AggFuncExec {
   322  	return &medianColumnDecimalExec[T]{
   323  		medianColumnExecSelf: newMedianColumnExecSelf[T, types.Decimal128](mg, info),
   324  	}
   325  }
   326  
   327  func newMedianExecutor(mg AggMemoryManager, info singleAggInfo) (AggFuncExec, error) {
   328  	if info.distinct {
   329  		return nil, moerr.NewNotSupportedNoCtx("median in distinct mode")
   330  	}
   331  
   332  	switch info.argType.Oid {
   333  	case types.T_bit:
   334  		return newMedianColumnNumericExec[uint64](mg, info), nil
   335  	case types.T_int8:
   336  		return newMedianColumnNumericExec[int8](mg, info), nil
   337  	case types.T_int16:
   338  		return newMedianColumnNumericExec[int16](mg, info), nil
   339  	case types.T_int32:
   340  		return newMedianColumnNumericExec[int32](mg, info), nil
   341  	case types.T_int64:
   342  		return newMedianColumnNumericExec[int64](mg, info), nil
   343  	case types.T_uint8:
   344  		return newMedianColumnNumericExec[uint8](mg, info), nil
   345  	case types.T_uint16:
   346  		return newMedianColumnNumericExec[uint16](mg, info), nil
   347  	case types.T_uint32:
   348  		return newMedianColumnNumericExec[uint32](mg, info), nil
   349  	case types.T_uint64:
   350  		return newMedianColumnNumericExec[uint64](mg, info), nil
   351  	case types.T_float32:
   352  		return newMedianColumnNumericExec[float32](mg, info), nil
   353  	case types.T_float64:
   354  		return newMedianColumnNumericExec[float64](mg, info), nil
   355  	case types.T_decimal64:
   356  		return newMedianColumnDecimalExec[types.Decimal64](mg, info), nil
   357  	case types.T_decimal128:
   358  		return newMedianColumnDecimalExec[types.Decimal128](mg, info), nil
   359  	}
   360  	return nil, moerr.NewInternalErrorNoCtx("unsupported type for median()")
   361  }
   362  
   363  func (exec *medianColumnNumericExec[T]) Merge(next AggFuncExec, groupIdx1 int, groupIdx2 int) error {
   364  	other := next.(*medianColumnNumericExec[T])
   365  	return exec.medianColumnExecSelf.Merge(&other.medianColumnExecSelf, groupIdx1, groupIdx2)
   366  }
   367  
   368  func (exec *medianColumnNumericExec[T]) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   369  	other := next.(*medianColumnNumericExec[T])
   370  	return exec.medianColumnExecSelf.BatchMerge(&other.medianColumnExecSelf, offset, groups)
   371  }
   372  
   373  func (exec *medianColumnNumericExec[T]) Flush() (*vector.Vector, error) {
   374  	vs := exec.ret.values
   375  	for i := range exec.groups {
   376  		rows := exec.groups[i].Length()
   377  		if rows == 0 {
   378  			vs[i] = 0
   379  			continue
   380  		}
   381  
   382  		exec.ret.empty[i] = false
   383  		sort.Sort(generateSortableSlice(vector.MustFixedCol[T](exec.groups[i])))
   384  		srcs := vector.MustFixedCol[T](exec.groups[i])
   385  		if rows&1 == 1 {
   386  			vs[i] = float64(srcs[rows>>1])
   387  		} else {
   388  			vs[i] = float64(srcs[rows>>1-1]+srcs[rows>>1]) / 2
   389  		}
   390  	}
   391  	return exec.ret.flush(), nil
   392  }
   393  
   394  func (exec *medianColumnDecimalExec[T]) Merge(next AggFuncExec, groupIdx1 int, groupIdx2 int) error {
   395  	other := next.(*medianColumnDecimalExec[T])
   396  	return exec.medianColumnExecSelf.Merge(&other.medianColumnExecSelf, groupIdx1, groupIdx2)
   397  }
   398  
   399  func (exec *medianColumnDecimalExec[T]) BatchMerge(next AggFuncExec, offset int, groups []uint64) error {
   400  	other := next.(*medianColumnDecimalExec[T])
   401  	return exec.medianColumnExecSelf.BatchMerge(&other.medianColumnExecSelf, offset, groups)
   402  }
   403  
   404  func (exec *medianColumnDecimalExec[T]) Flush() (*vector.Vector, error) {
   405  	var err error
   406  	vs := exec.ret.values
   407  	argIsDecimal128 := exec.singleAggInfo.argType.Oid == types.T_decimal128
   408  
   409  	for i := range exec.groups {
   410  		rows := exec.groups[i].Length()
   411  		if rows == 0 {
   412  			continue
   413  		}
   414  
   415  		exec.ret.empty[i] = false
   416  		sort.Sort(generateSortableSlice2(vector.MustFixedCol[T](exec.groups[i])))
   417  		if argIsDecimal128 {
   418  			srcs := vector.MustFixedCol[types.Decimal128](exec.groups[i])
   419  			if rows&1 == 1 {
   420  				if vs[i], err = srcs[rows>>1].Scale(1); err != nil {
   421  					return nil, err
   422  				}
   423  			} else {
   424  				v1, v2 := srcs[rows>>1-1], srcs[rows>>1]
   425  				if vs[i], err = v1.Add128(v2); err != nil {
   426  					return nil, err
   427  				}
   428  				if vs[i].Sign() {
   429  					// scale(1) here because we set the result scale to be arg.Scale+1
   430  					if vs[i], err = vs[i].Minus().Scale(1); err != nil {
   431  						return nil, err
   432  					}
   433  					vs[i] = vs[i].Right(1).Minus()
   434  				} else {
   435  					if vs[i], err = vs[i].Scale(1); err != nil {
   436  						return nil, err
   437  					}
   438  					vs[i] = vs[i].Right(1)
   439  				}
   440  			}
   441  
   442  		} else {
   443  			srcs := vector.MustFixedCol[types.Decimal64](exec.groups[i])
   444  			if rows&1 == 1 {
   445  				if vs[i], err = FromD64ToD128(srcs[rows>>1]).Scale(1); err != nil {
   446  					return nil, err
   447  				}
   448  			} else {
   449  				v1, v2 := FromD64ToD128(srcs[rows>>1-1]), FromD64ToD128(srcs[rows>>1])
   450  				if vs[i], err = v1.Add128(v2); err != nil {
   451  					return nil, err
   452  				}
   453  				if vs[i].Sign() {
   454  					if vs[i], err = vs[i].Minus().Scale(1); err != nil {
   455  						return nil, err
   456  					}
   457  					vs[i] = vs[i].Right(1).Minus()
   458  				} else {
   459  					if vs[i], err = vs[i].Scale(1); err != nil {
   460  						return nil, err
   461  					}
   462  					vs[i] = vs[i].Right(1)
   463  				}
   464  			}
   465  		}
   466  	}
   467  	return exec.ret.flush(), nil
   468  }
   469  
   470  type numericSlice[T numeric] []T
   471  
   472  func (s numericSlice[T]) Len() int {
   473  	return len(s)
   474  }
   475  func (s numericSlice[T]) Less(i, j int) bool {
   476  	return s[i] < s[j]
   477  }
   478  func (s numericSlice[T]) Swap(i, j int) {
   479  	s[i], s[j] = s[j], s[i]
   480  }
   481  
   482  type decimal64Slice []types.Decimal64
   483  type decimal128Slice []types.Decimal128
   484  
   485  func (s decimal64Slice) Len() int { return len(s) }
   486  func (s decimal64Slice) Less(i, j int) bool {
   487  	return s[i].Compare(s[j]) < 0
   488  }
   489  func (s decimal64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
   490  
   491  func (s decimal128Slice) Len() int { return len(s) }
   492  func (s decimal128Slice) Less(i, j int) bool {
   493  	return s[i].Compare(s[j]) < 0
   494  }
   495  func (s decimal128Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
   496  
   497  func generateSortableSlice[T numeric](vs []T) sort.Interface {
   498  	return numericSlice[T](vs)
   499  }
   500  
   501  func generateSortableSlice2[T types.Decimal64 | types.Decimal128](vs []T) sort.Interface {
   502  	temp := any(vs)
   503  	if d64, ok := temp.([]types.Decimal64); ok {
   504  		return decimal64Slice(d64)
   505  	}
   506  	if d128, ok := temp.([]types.Decimal128); ok {
   507  		return decimal128Slice(d128)
   508  	}
   509  	panic("unsupported type")
   510  }