github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/median.go (about) 1 // Copyright 2024 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package aggexec 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/common/moerr" 19 "github.com/matrixorigin/matrixone/pkg/container/types" 20 "github.com/matrixorigin/matrixone/pkg/container/vector" 21 "sort" 22 ) 23 24 var MedianSupportedType = []types.T{ 25 types.T_bit, types.T_int8, types.T_int16, types.T_int32, types.T_int64, 26 types.T_uint8, types.T_uint16, types.T_uint32, types.T_uint64, 27 types.T_float32, types.T_float64, types.T_decimal64, types.T_decimal128, 28 } 29 30 func MedianReturnType(args []types.Type) types.Type { 31 if args[0].IsDecimal() { 32 return types.New(types.T_decimal128, 38, args[0].Scale+1) 33 } 34 return types.T_float64.ToType() 35 } 36 37 type numeric interface { 38 types.Ints | types.UInts | types.Floats 39 } 40 41 type medianColumnExecSelf[T numeric | types.Decimal64 | types.Decimal128, R float64 | types.Decimal128] struct { 42 singleAggInfo 43 singleAggExecExtraInformation 44 distinctHash 45 arg sFixedArg[T] 46 ret aggFuncResult[R] 47 48 // groups stores the values of the column for each group. 49 // todo: it has a problem that same as the `clusterCentersExec.groupData` in `cluster_centers.go` 50 groups []*vector.Vector 51 } 52 53 func newMedianColumnExecSelf[T numeric | types.Decimal64 | types.Decimal128, R float64 | types.Decimal128](mg AggMemoryManager, info singleAggInfo) medianColumnExecSelf[T, R] { 54 s := medianColumnExecSelf[T, R]{ 55 singleAggInfo: info, 56 ret: initFixedAggFuncResult[R](mg, info.retType, info.emptyNull), 57 } 58 if info.IsDistinct() { 59 s.distinctHash = newDistinctHash(mg.Mp(), false) 60 } 61 return s 62 } 63 64 func (exec *medianColumnExecSelf[T, R]) GroupGrow(more int) error { 65 if exec.IsDistinct() { 66 if err := exec.distinctHash.grows(more); err != nil { 67 return err 68 } 69 } 70 71 oldLength := len(exec.groups) 72 if cap(exec.groups) >= oldLength+more { 73 exec.groups = exec.groups[:oldLength+more] 74 } else { 75 exec.groups = append(exec.groups, make([]*vector.Vector, more)...) 76 } 77 78 for i, j := oldLength, len(exec.groups); i < j; i++ { 79 exec.groups[i] = exec.ret.mg.GetVector(exec.singleAggInfo.argType) 80 } 81 return exec.ret.grows(more) 82 } 83 84 func (exec *medianColumnExecSelf[T, R]) PreAllocateGroups(more int) error { 85 if len(exec.groups) == 0 { 86 exec.groups = make([]*vector.Vector, 0, more) 87 } else { 88 oldLength := len(exec.groups) 89 exec.groups = append(exec.groups, make([]*vector.Vector, more)...) 90 exec.groups = exec.groups[:oldLength] 91 } 92 93 return exec.ret.preAllocate(more) 94 } 95 96 func (exec *medianColumnExecSelf[T, R]) Fill(groupIndex int, row int, vectors []*vector.Vector) error { 97 if vectors[0].IsNull(uint64(row)) { 98 return nil 99 } 100 if vectors[0].IsConst() { 101 row = 0 102 } 103 if exec.IsDistinct() { 104 if need, err := exec.distinctHash.fill(groupIndex, vectors, row); err != nil || !need { 105 return err 106 } 107 } 108 109 exec.ret.setGroupNotEmpty(groupIndex) 110 value := vector.MustFixedCol[T](vectors[0])[row] 111 112 return vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, value) 113 } 114 115 func (exec *medianColumnExecSelf[T, R]) BulkFill(groupIndex int, vectors []*vector.Vector) error { 116 if vectors[0].IsConstNull() { 117 return nil 118 } 119 120 if exec.IsDistinct() { 121 return exec.distinctBulkFill(groupIndex, vectors) 122 } 123 124 if vectors[0].IsConst() { 125 exec.ret.setGroupNotEmpty(groupIndex) 126 value := vector.MustFixedCol[T](vectors[0])[0] 127 return vector.AppendMultiFixed[T](exec.groups[0], value, false, vectors[0].Length(), exec.ret.mp) 128 } 129 130 exec.arg.prepare(vectors[0]) 131 mustNotEmpty := false 132 for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ { 133 v, null := exec.arg.w.GetValue(i) 134 if null { 135 continue 136 } 137 mustNotEmpty = true 138 if err := vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, v); err != nil { 139 return err 140 } 141 } 142 if mustNotEmpty { 143 exec.ret.setGroupNotEmpty(groupIndex) 144 } 145 return nil 146 } 147 148 func (exec *medianColumnExecSelf[T, R]) distinctBulkFill(groupIndex int, vectors []*vector.Vector) error { 149 if vectors[0].IsConst() { 150 if need, err := exec.distinctHash.fill(groupIndex, vectors, 0); err != nil || !need { 151 return err 152 } 153 154 exec.ret.setGroupNotEmpty(groupIndex) 155 value := vector.MustFixedCol[T](vectors[0])[0] 156 return vector.AppendMultiFixed[T](exec.groups[groupIndex], value, false, vectors[0].Length(), exec.ret.mp) 157 } 158 159 needs, err := exec.distinctHash.bulkFill(groupIndex, vectors) 160 if err != nil { 161 return err 162 } 163 exec.arg.prepare(vectors[0]) 164 mustNotEmpty := false 165 for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ { 166 if !needs[i] { 167 continue 168 } 169 170 v, null := exec.arg.w.GetValue(i) 171 if null { 172 continue 173 } 174 mustNotEmpty = true 175 if err = vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, v); err != nil { 176 return err 177 } 178 } 179 if mustNotEmpty { 180 exec.ret.setGroupNotEmpty(groupIndex) 181 } 182 return nil 183 } 184 185 func (exec *medianColumnExecSelf[T, R]) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error { 186 if vectors[0].IsConstNull() { 187 return nil 188 } 189 190 if exec.IsDistinct() { 191 return exec.distinctBatchFill(offset, groups, vectors) 192 } 193 194 if vectors[0].IsConst() { 195 value := vector.MustFixedCol[T](vectors[0])[0] 196 for i := 0; i < len(groups); i++ { 197 if groups[i] != GroupNotMatched { 198 groupIndex := groups[i] - 1 199 exec.ret.setGroupNotEmpty(int(groupIndex)) 200 if err := vectorAppendWildly( 201 exec.groups[groupIndex], 202 exec.ret.mp, value); err != nil { 203 return err 204 } 205 } 206 } 207 return nil 208 } 209 210 exec.arg.prepare(vectors[0]) 211 for i, j, idx := uint64(offset), uint64(offset+len(groups)), 0; i < j; i++ { 212 if groups[idx] != GroupNotMatched { 213 v, null := exec.arg.w.GetValue(i) 214 if !null { 215 groupIndex := groups[idx] - 1 216 exec.ret.setGroupNotEmpty(int(groupIndex)) 217 218 if err := vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, v); err != nil { 219 return err 220 } 221 } 222 } 223 idx++ 224 } 225 return nil 226 } 227 228 func (exec *medianColumnExecSelf[T, R]) distinctBatchFill(offset int, groups []uint64, vectors []*vector.Vector) error { 229 needs, err := exec.distinctHash.batchFill(vectors, offset, groups) 230 if err != nil { 231 return err 232 } 233 234 if vectors[0].IsConst() { 235 value := vector.MustFixedCol[T](vectors[0])[0] 236 for i := 0; i < len(groups); i++ { 237 if needs[i] && groups[i] != GroupNotMatched { 238 groupIndex := groups[i] - 1 239 exec.ret.setGroupNotEmpty(int(groupIndex)) 240 if err = vectorAppendWildly( 241 exec.groups[groupIndex], 242 exec.ret.mp, value); err != nil { 243 return err 244 } 245 } 246 } 247 return nil 248 } 249 250 exec.arg.prepare(vectors[0]) 251 for i, j, idx := uint64(offset), uint64(offset+len(groups)), 0; i < j; i++ { 252 if needs[idx] && groups[idx] != GroupNotMatched { 253 v, null := exec.arg.w.GetValue(i) 254 if !null { 255 groupIndex := groups[idx] - 1 256 exec.ret.setGroupNotEmpty(int(groupIndex)) 257 if err = vectorAppendWildly(exec.groups[groupIndex], exec.ret.mp, v); err != nil { 258 return err 259 } 260 } 261 } 262 idx++ 263 } 264 return nil 265 } 266 267 func (exec *medianColumnExecSelf[T, R]) Merge(other *medianColumnExecSelf[T, R], groupIdx1, groupIdx2 int) error { 268 if exec.IsDistinct() { 269 return exec.distinctHash.merge(&other.distinctHash) 270 } 271 if other.groups[groupIdx2].Length() == 0 { 272 return nil 273 } 274 vs := vector.MustFixedCol[T](other.groups[groupIdx2]) 275 return vector.AppendFixedList[T](exec.groups[groupIdx1], vs, nil, exec.ret.mp) 276 } 277 278 func (exec *medianColumnExecSelf[T, R]) BatchMerge(next *medianColumnExecSelf[T, R], offset int, groups []uint64) error { 279 for i, group := range groups { 280 if group != GroupNotMatched { 281 if err := exec.Merge(next, int(group)-1, i+offset); err != nil { 282 return err 283 } 284 } 285 } 286 return nil 287 } 288 289 func (exec *medianColumnExecSelf[T, R]) Free() { 290 if exec.ret.mg == nil { 291 return 292 } 293 for _, v := range exec.groups { 294 if v == nil { 295 continue 296 } 297 if v.NeedDup() { 298 v.Free(exec.ret.mp) 299 } else { 300 exec.ret.mg.PutVector(v) 301 } 302 } 303 exec.ret.free() 304 exec.distinctHash.free() 305 } 306 307 type medianColumnNumericExec[T numeric] struct { 308 medianColumnExecSelf[T, float64] 309 } 310 311 func newMedianColumnNumericExec[T numeric](mg AggMemoryManager, info singleAggInfo) AggFuncExec { 312 return &medianColumnNumericExec[T]{ 313 medianColumnExecSelf: newMedianColumnExecSelf[T, float64](mg, info), 314 } 315 } 316 317 type medianColumnDecimalExec[T types.Decimal64 | types.Decimal128] struct { 318 medianColumnExecSelf[T, types.Decimal128] 319 } 320 321 func newMedianColumnDecimalExec[T types.Decimal64 | types.Decimal128](mg AggMemoryManager, info singleAggInfo) AggFuncExec { 322 return &medianColumnDecimalExec[T]{ 323 medianColumnExecSelf: newMedianColumnExecSelf[T, types.Decimal128](mg, info), 324 } 325 } 326 327 func newMedianExecutor(mg AggMemoryManager, info singleAggInfo) (AggFuncExec, error) { 328 if info.distinct { 329 return nil, moerr.NewNotSupportedNoCtx("median in distinct mode") 330 } 331 332 switch info.argType.Oid { 333 case types.T_bit: 334 return newMedianColumnNumericExec[uint64](mg, info), nil 335 case types.T_int8: 336 return newMedianColumnNumericExec[int8](mg, info), nil 337 case types.T_int16: 338 return newMedianColumnNumericExec[int16](mg, info), nil 339 case types.T_int32: 340 return newMedianColumnNumericExec[int32](mg, info), nil 341 case types.T_int64: 342 return newMedianColumnNumericExec[int64](mg, info), nil 343 case types.T_uint8: 344 return newMedianColumnNumericExec[uint8](mg, info), nil 345 case types.T_uint16: 346 return newMedianColumnNumericExec[uint16](mg, info), nil 347 case types.T_uint32: 348 return newMedianColumnNumericExec[uint32](mg, info), nil 349 case types.T_uint64: 350 return newMedianColumnNumericExec[uint64](mg, info), nil 351 case types.T_float32: 352 return newMedianColumnNumericExec[float32](mg, info), nil 353 case types.T_float64: 354 return newMedianColumnNumericExec[float64](mg, info), nil 355 case types.T_decimal64: 356 return newMedianColumnDecimalExec[types.Decimal64](mg, info), nil 357 case types.T_decimal128: 358 return newMedianColumnDecimalExec[types.Decimal128](mg, info), nil 359 } 360 return nil, moerr.NewInternalErrorNoCtx("unsupported type for median()") 361 } 362 363 func (exec *medianColumnNumericExec[T]) Merge(next AggFuncExec, groupIdx1 int, groupIdx2 int) error { 364 other := next.(*medianColumnNumericExec[T]) 365 return exec.medianColumnExecSelf.Merge(&other.medianColumnExecSelf, groupIdx1, groupIdx2) 366 } 367 368 func (exec *medianColumnNumericExec[T]) BatchMerge(next AggFuncExec, offset int, groups []uint64) error { 369 other := next.(*medianColumnNumericExec[T]) 370 return exec.medianColumnExecSelf.BatchMerge(&other.medianColumnExecSelf, offset, groups) 371 } 372 373 func (exec *medianColumnNumericExec[T]) Flush() (*vector.Vector, error) { 374 vs := exec.ret.values 375 for i := range exec.groups { 376 rows := exec.groups[i].Length() 377 if rows == 0 { 378 vs[i] = 0 379 continue 380 } 381 382 exec.ret.empty[i] = false 383 sort.Sort(generateSortableSlice(vector.MustFixedCol[T](exec.groups[i]))) 384 srcs := vector.MustFixedCol[T](exec.groups[i]) 385 if rows&1 == 1 { 386 vs[i] = float64(srcs[rows>>1]) 387 } else { 388 vs[i] = float64(srcs[rows>>1-1]+srcs[rows>>1]) / 2 389 } 390 } 391 return exec.ret.flush(), nil 392 } 393 394 func (exec *medianColumnDecimalExec[T]) Merge(next AggFuncExec, groupIdx1 int, groupIdx2 int) error { 395 other := next.(*medianColumnDecimalExec[T]) 396 return exec.medianColumnExecSelf.Merge(&other.medianColumnExecSelf, groupIdx1, groupIdx2) 397 } 398 399 func (exec *medianColumnDecimalExec[T]) BatchMerge(next AggFuncExec, offset int, groups []uint64) error { 400 other := next.(*medianColumnDecimalExec[T]) 401 return exec.medianColumnExecSelf.BatchMerge(&other.medianColumnExecSelf, offset, groups) 402 } 403 404 func (exec *medianColumnDecimalExec[T]) Flush() (*vector.Vector, error) { 405 var err error 406 vs := exec.ret.values 407 argIsDecimal128 := exec.singleAggInfo.argType.Oid == types.T_decimal128 408 409 for i := range exec.groups { 410 rows := exec.groups[i].Length() 411 if rows == 0 { 412 continue 413 } 414 415 exec.ret.empty[i] = false 416 sort.Sort(generateSortableSlice2(vector.MustFixedCol[T](exec.groups[i]))) 417 if argIsDecimal128 { 418 srcs := vector.MustFixedCol[types.Decimal128](exec.groups[i]) 419 if rows&1 == 1 { 420 if vs[i], err = srcs[rows>>1].Scale(1); err != nil { 421 return nil, err 422 } 423 } else { 424 v1, v2 := srcs[rows>>1-1], srcs[rows>>1] 425 if vs[i], err = v1.Add128(v2); err != nil { 426 return nil, err 427 } 428 if vs[i].Sign() { 429 // scale(1) here because we set the result scale to be arg.Scale+1 430 if vs[i], err = vs[i].Minus().Scale(1); err != nil { 431 return nil, err 432 } 433 vs[i] = vs[i].Right(1).Minus() 434 } else { 435 if vs[i], err = vs[i].Scale(1); err != nil { 436 return nil, err 437 } 438 vs[i] = vs[i].Right(1) 439 } 440 } 441 442 } else { 443 srcs := vector.MustFixedCol[types.Decimal64](exec.groups[i]) 444 if rows&1 == 1 { 445 if vs[i], err = FromD64ToD128(srcs[rows>>1]).Scale(1); err != nil { 446 return nil, err 447 } 448 } else { 449 v1, v2 := FromD64ToD128(srcs[rows>>1-1]), FromD64ToD128(srcs[rows>>1]) 450 if vs[i], err = v1.Add128(v2); err != nil { 451 return nil, err 452 } 453 if vs[i].Sign() { 454 if vs[i], err = vs[i].Minus().Scale(1); err != nil { 455 return nil, err 456 } 457 vs[i] = vs[i].Right(1).Minus() 458 } else { 459 if vs[i], err = vs[i].Scale(1); err != nil { 460 return nil, err 461 } 462 vs[i] = vs[i].Right(1) 463 } 464 } 465 } 466 } 467 return exec.ret.flush(), nil 468 } 469 470 type numericSlice[T numeric] []T 471 472 func (s numericSlice[T]) Len() int { 473 return len(s) 474 } 475 func (s numericSlice[T]) Less(i, j int) bool { 476 return s[i] < s[j] 477 } 478 func (s numericSlice[T]) Swap(i, j int) { 479 s[i], s[j] = s[j], s[i] 480 } 481 482 type decimal64Slice []types.Decimal64 483 type decimal128Slice []types.Decimal128 484 485 func (s decimal64Slice) Len() int { return len(s) } 486 func (s decimal64Slice) Less(i, j int) bool { 487 return s[i].Compare(s[j]) < 0 488 } 489 func (s decimal64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 490 491 func (s decimal128Slice) Len() int { return len(s) } 492 func (s decimal128Slice) Less(i, j int) bool { 493 return s[i].Compare(s[j]) < 0 494 } 495 func (s decimal128Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 496 497 func generateSortableSlice[T numeric](vs []T) sort.Interface { 498 return numericSlice[T](vs) 499 } 500 501 func generateSortableSlice2[T types.Decimal64 | types.Decimal128](vs []T) sort.Interface { 502 temp := any(vs) 503 if d64, ok := temp.([]types.Decimal64); ok { 504 return decimal64Slice(d64) 505 } 506 if d128, ok := temp.([]types.Decimal128); ok { 507 return decimal128Slice(d128) 508 } 509 panic("unsupported type") 510 }