github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/agg/median.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package agg
    16  
    17  import (
    18  	"sort"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/container/types"
    21  )
    22  
    23  type decimal64Slice []types.Decimal64
    24  type decimal128Slice []types.Decimal128
    25  
    26  func (s decimal64Slice) Len() int {
    27  	return len(s)
    28  }
    29  
    30  func (s decimal64Slice) Less(i, j int) bool {
    31  	return s[i].Lt(s[j])
    32  }
    33  
    34  func (s decimal64Slice) Swap(i, j int) {
    35  	s[i], s[j] = s[j], s[i]
    36  }
    37  
    38  func (s decimal128Slice) Len() int {
    39  	return len(s)
    40  }
    41  
    42  func (s decimal128Slice) Less(i, j int) bool {
    43  	return s[i].Lt(s[j])
    44  }
    45  
    46  func (s decimal128Slice) Swap(i, j int) {
    47  	s[i], s[j] = s[j], s[i]
    48  }
    49  
    50  type numericSlice[T Numeric] []T
    51  
    52  func (s numericSlice[T]) Len() int {
    53  	return len(s)
    54  }
    55  
    56  func (s numericSlice[T]) Less(i, j int) bool {
    57  	return s[i] < s[j]
    58  }
    59  
    60  func (s numericSlice[T]) Swap(i, j int) {
    61  	s[i], s[j] = s[j], s[i]
    62  }
    63  
    64  type Median[T Numeric] struct {
    65  	Vals []numericSlice[T]
    66  }
    67  
    68  type Decimal64Median struct {
    69  	Vals []decimal64Slice
    70  }
    71  type Decimal128Median struct {
    72  	Vals []decimal128Slice
    73  }
    74  
    75  func MedianReturnType(typs []types.Type) types.Type {
    76  	switch typs[0].Oid {
    77  	case types.T_decimal64:
    78  		return types.New(types.T_decimal128, 0, typs[0].Scale, typs[0].Precision)
    79  	case types.T_decimal128:
    80  		return types.New(types.T_decimal128, 0, typs[0].Scale, typs[0].Precision)
    81  	case types.T_float32, types.T_float64:
    82  		return types.New(types.T_float64, 0, 0, 0)
    83  	case types.T_int8, types.T_int16, types.T_int32, types.T_int64:
    84  		return types.New(types.T_float64, 0, 0, 0)
    85  	case types.T_uint8, types.T_uint16, types.T_uint32, types.T_uint64:
    86  		return types.New(types.T_float64, 0, 0, 0)
    87  	default:
    88  		return types.Type{}
    89  	}
    90  }
    91  
    92  func NewMedian[T Numeric]() *Median[T] {
    93  	return &Median[T]{}
    94  }
    95  
    96  func (m *Median[T]) Grows(cnt int) {
    97  	if len(m.Vals) == 0 {
    98  		m.Vals = make([]numericSlice[T], 0, cnt)
    99  	}
   100  	for i := 0; i < cnt; i++ {
   101  		m.Vals = append(m.Vals, make(numericSlice[T], 0))
   102  	}
   103  }
   104  
   105  func (m *Median[T]) Eval(vs []float64) []float64 {
   106  	for i := range vs {
   107  		cnt := len(m.Vals[i])
   108  		if cnt == 0 {
   109  			continue
   110  		}
   111  		if !sort.IsSorted(m.Vals[i]) {
   112  			sort.Sort(m.Vals[i])
   113  		}
   114  		if cnt&1 == 1 {
   115  			vs[i] = float64(m.Vals[i][cnt>>1])
   116  		} else {
   117  			vs[i] = float64(m.Vals[i][cnt>>1]+m.Vals[i][(cnt>>1)-1]) / 2
   118  		}
   119  	}
   120  	return vs
   121  }
   122  
   123  func (m *Median[T]) Fill(i int64, value T, _ float64, z int64, isEmpty bool, isNull bool) (float64, bool) {
   124  	if !isNull {
   125  		for j := int64(0); j < z; j++ {
   126  			m.Vals[i] = append(m.Vals[i], value)
   127  		}
   128  		return 0, false
   129  	}
   130  	return 0, isEmpty
   131  }
   132  
   133  func (m *Median[T]) Merge(xIndex int64, yIndex int64, _ float64, _ float64, xEmpty bool, yEmpty bool, yMedian any) (float64, bool) {
   134  	if !yEmpty {
   135  		yM := yMedian.(*Median[T])
   136  		if !sort.IsSorted(yM.Vals[yIndex]) {
   137  			sort.Sort(yM.Vals[yIndex])
   138  		}
   139  		if xEmpty {
   140  			m.Vals[xIndex] = append(m.Vals[xIndex], yM.Vals[yIndex]...)
   141  			return 0, false
   142  		}
   143  		newCnt := len(m.Vals[xIndex]) + len(yM.Vals[yIndex])
   144  		newData := make(numericSlice[T], newCnt)
   145  		if !sort.IsSorted(m.Vals[xIndex]) {
   146  			sort.Sort(m.Vals[xIndex])
   147  		}
   148  		merge(m.Vals[xIndex], yM.Vals[yIndex], newData, func(a, b T) bool { return a < b })
   149  		m.Vals[xIndex] = newData
   150  		return 0, false
   151  	}
   152  
   153  	return 0, xEmpty
   154  }
   155  
   156  func (m *Median[T]) MarshalBinary() ([]byte, error) {
   157  	return types.Encode(&m.Vals)
   158  }
   159  
   160  func (m *Median[T]) UnmarshalBinary(data []byte) error {
   161  	// avoid resulting errors caused by morpc overusing memory
   162  	copyData := make([]byte, len(data))
   163  	copy(copyData, data)
   164  	return types.Decode(copyData, &m.Vals)
   165  }
   166  
   167  func NewD64Median() *Decimal64Median {
   168  	return &Decimal64Median{}
   169  }
   170  
   171  func (m *Decimal64Median) Grows(cnt int) {
   172  	if len(m.Vals) == 0 {
   173  		m.Vals = make([]decimal64Slice, 0, cnt)
   174  	}
   175  	for i := 0; i < cnt; i++ {
   176  		m.Vals = append(m.Vals, make(decimal64Slice, 0))
   177  	}
   178  }
   179  
   180  func (m *Decimal64Median) Eval(vs []types.Decimal128) []types.Decimal128 {
   181  	for i := range vs {
   182  		cnt := len(m.Vals[i])
   183  		if cnt == 0 {
   184  			continue
   185  		}
   186  		if !sort.IsSorted(m.Vals[i]) {
   187  			sort.Sort(m.Vals[i])
   188  		}
   189  		if cnt&1 == 1 {
   190  			vs[i] = types.Decimal128_FromDecimal64(m.Vals[i][cnt>>1])
   191  		} else {
   192  			a := types.Decimal128_FromDecimal64(m.Vals[i][cnt>>1])
   193  			b := types.Decimal128_FromDecimal64(m.Vals[i][(cnt>>1)-1])
   194  			vs[i] = a.Add(b).DivInt64(2)
   195  		}
   196  	}
   197  	return vs
   198  }
   199  
   200  func (m *Decimal64Median) Fill(i int64, value types.Decimal64, ov types.Decimal128, z int64, isEmpty bool, isNull bool) (types.Decimal128, bool) {
   201  	if !isNull {
   202  		for j := int64(0); j < z; j++ {
   203  			m.Vals[i] = append(m.Vals[i], value)
   204  		}
   205  		return types.Decimal128_Zero, false
   206  	}
   207  	return types.Decimal128_Zero, isEmpty
   208  }
   209  
   210  func (m *Decimal64Median) Merge(xIndex int64, yIndex int64, _ types.Decimal128, _ types.Decimal128, xEmpty bool, yEmpty bool, yMedian any) (types.Decimal128, bool) {
   211  	if !yEmpty {
   212  		yM := yMedian.(*Decimal64Median)
   213  		if !sort.IsSorted(yM.Vals[yIndex]) {
   214  			sort.Sort(yM.Vals[yIndex])
   215  		}
   216  		if xEmpty {
   217  			m.Vals[xIndex] = append(m.Vals[xIndex], yM.Vals[yIndex]...)
   218  			return types.Decimal128_Zero, false
   219  		}
   220  		newCnt := len(m.Vals[xIndex]) + len(yM.Vals[yIndex])
   221  		newData := make(decimal64Slice, newCnt)
   222  		if !sort.IsSorted(m.Vals[xIndex]) {
   223  			sort.Sort(m.Vals[xIndex])
   224  		}
   225  		merge(m.Vals[xIndex], yM.Vals[yIndex], newData, func(a, b types.Decimal64) bool { return a.Lt(b) })
   226  		m.Vals[xIndex] = newData
   227  		return types.Decimal128_Zero, false
   228  	}
   229  
   230  	return types.Decimal128_Zero, xEmpty
   231  }
   232  
   233  func (m *Decimal64Median) MarshalBinary() ([]byte, error) {
   234  	return types.Encode(&m.Vals)
   235  }
   236  func (m *Decimal64Median) UnmarshalBinary(dt []byte) error {
   237  	// avoid resulting errors caused by morpc overusing memory
   238  	cdt := make([]byte, len(dt))
   239  	copy(cdt, dt)
   240  	return types.Decode(cdt, &m.Vals)
   241  }
   242  
   243  func NewD128Median() *Decimal128Median {
   244  	return &Decimal128Median{}
   245  }
   246  
   247  func (m *Decimal128Median) Grows(cnt int) {
   248  	if len(m.Vals) == 0 {
   249  		m.Vals = make([]decimal128Slice, 0, cnt)
   250  	}
   251  	for i := 0; i < cnt; i++ {
   252  		m.Vals = append(m.Vals, make(decimal128Slice, 0))
   253  	}
   254  }
   255  
   256  func (m *Decimal128Median) Eval(vs []types.Decimal128) []types.Decimal128 {
   257  	for i := range vs {
   258  		cnt := len(m.Vals[i])
   259  		if cnt == 0 {
   260  			continue
   261  		}
   262  		if !sort.IsSorted(m.Vals[i]) {
   263  			sort.Sort(m.Vals[i])
   264  		}
   265  		if cnt&1 == 1 {
   266  			vs[i] = m.Vals[i][cnt>>1]
   267  		} else {
   268  			vs[i] = m.Vals[i][cnt>>1].Add(m.Vals[i][(cnt>>1)-1]).DivInt64(2)
   269  		}
   270  	}
   271  	return vs
   272  }
   273  
   274  func (m *Decimal128Median) Fill(i int64, value types.Decimal128, _ types.Decimal128, z int64, isEmpty bool, isNull bool) (types.Decimal128, bool) {
   275  	if !isNull {
   276  		for j := int64(0); j < z; j++ {
   277  			m.Vals[i] = append(m.Vals[i], value)
   278  		}
   279  		return types.Decimal128_Zero, false
   280  	}
   281  	return types.Decimal128_Zero, isEmpty
   282  }
   283  
   284  func (m *Decimal128Median) Merge(xIndex int64, yIndex int64, _ types.Decimal128, _ types.Decimal128, xEmpty bool, yEmpty bool, yMedian any) (types.Decimal128, bool) {
   285  	if !yEmpty {
   286  		yM := yMedian.(*Decimal128Median)
   287  		if !sort.IsSorted(yM.Vals[yIndex]) {
   288  			sort.Sort(yM.Vals[yIndex])
   289  		}
   290  		if xEmpty {
   291  			m.Vals[xIndex] = append(m.Vals[xIndex], yM.Vals[yIndex]...)
   292  			return types.Decimal128_Zero, false
   293  		}
   294  		if !sort.IsSorted(m.Vals[xIndex]) {
   295  			sort.Sort(m.Vals[xIndex])
   296  		}
   297  		newCnt := len(m.Vals[xIndex]) + len(yM.Vals[yIndex])
   298  		newData := make(decimal128Slice, newCnt)
   299  		merge(m.Vals[xIndex], yM.Vals[yIndex], newData, func(a, b types.Decimal128) bool { return a.Lt(b) })
   300  		m.Vals[xIndex] = newData
   301  		return types.Decimal128_Zero, false
   302  	}
   303  	return types.Decimal128_Zero, xEmpty
   304  }
   305  
   306  func (m *Decimal128Median) MarshalBinary() ([]byte, error) {
   307  	return types.Encode(&m.Vals)
   308  }
   309  
   310  func (m *Decimal128Median) UnmarshalBinary(dt []byte) error {
   311  	// avoid resulting errors caused by morpc overusing memory
   312  	cdt := make([]byte, len(dt))
   313  	copy(cdt, dt)
   314  	return types.Decode(cdt, &m.Vals)
   315  }
   316  
   317  func merge[T Numeric | types.Decimal64 | types.Decimal128](s1, s2, rs []T, lt func(a, b T) bool) []T {
   318  	i, j, cnt := 0, 0, 0
   319  	for i < len(s1) && j < len(s2) {
   320  		if lt(s1[i], s2[j]) {
   321  			rs[cnt] = s1[i]
   322  			i++
   323  		} else {
   324  			rs[cnt] = s2[j]
   325  			j++
   326  		}
   327  		cnt++
   328  	}
   329  	for ; i < len(s1); i++ {
   330  		rs[cnt] = s1[i]
   331  		cnt++
   332  	}
   333  	for ; j < len(s2); j++ {
   334  		rs[cnt] = s2[j]
   335  		cnt++
   336  	}
   337  	return rs
   338  }