github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/multi.go (about) 1 // Copyright 2024 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package aggexec 16 17 import ( 18 "fmt" 19 "github.com/matrixorigin/matrixone/pkg/container/types" 20 "github.com/matrixorigin/matrixone/pkg/container/vector" 21 ) 22 23 type multiAggInfo struct { 24 aggID int64 25 distinct bool 26 argTypes []types.Type 27 retType types.Type 28 29 // emptyNull indicates that whether we should return null for a group without any input value. 30 emptyNull bool 31 } 32 33 func (info multiAggInfo) String() string { 34 args := "[" + info.argTypes[0].String() 35 for i := 1; i < len(info.argTypes); i++ { 36 args += ", " + info.argTypes[i].String() 37 } 38 args += "]" 39 return fmt.Sprintf("{aggID: %d, argTypes: %s, retType: %s}", info.aggID, args, info.retType.String()) 40 } 41 42 func (info multiAggInfo) AggID() int64 { 43 return info.aggID 44 } 45 46 func (info multiAggInfo) IsDistinct() bool { 47 return info.distinct 48 } 49 50 func (info multiAggInfo) TypesInfo() ([]types.Type, types.Type) { 51 return info.argTypes, info.retType 52 } 53 54 func (info multiAggInfo) getEncoded() *EncodedBasicInfo { 55 return &EncodedBasicInfo{ 56 Id: info.aggID, 57 IsDistinct: info.distinct, 58 Args: info.argTypes, 59 Ret: info.retType, 60 } 61 } 62 63 // multiAggFuncExec1 and multiAggFuncExec2 are the executors of multi columns agg. 64 // 1's return type is a fixed length type. 65 // 2's return type is bytes. 66 type multiAggFuncExec1[T types.FixedSizeTExceptStrType] struct { 67 multiAggInfo 68 69 args []mArg1[T] 70 ret aggFuncResult[T] 71 groups []MultiAggRetFixed[T] 72 73 initGroup MultiAggInit1[T] 74 // todo: it's an optimization to move rowValid into eval. 75 rowValid rowValidForMultiAgg1[T] 76 merge MultiAggMerge1[T] 77 eval MultiAggEval1[T] 78 flush MultiAggFlush1[T] 79 80 // method to new the private structure for group growing. 81 gGroup func() MultiAggRetFixed[T] 82 } 83 type multiAggFuncExec2 struct { 84 multiAggInfo 85 86 args []mArg2 87 ret aggFuncBytesResult 88 groups []MultiAggRetVar 89 90 initGroup MultiAggInit2 91 rowValid rowValidForMultiAgg2 92 merge MultiAggMerge2 93 eval MultiAggEval2 94 flush MultiAggFlush2 95 96 // method to new the private structure for group growing. 97 gGroup func() MultiAggRetVar 98 } 99 100 func (exec *multiAggFuncExec1[T]) init( 101 mg AggMemoryManager, 102 info multiAggInfo, 103 impl multiColumnAggImplementation) { 104 105 exec.multiAggInfo = info 106 exec.args = make([]mArg1[T], len(info.argTypes)) 107 exec.ret = initFixedAggFuncResult[T](mg, info.retType, info.emptyNull) 108 exec.groups = make([]MultiAggRetFixed[T], 0, 1) 109 exec.gGroup = impl.generator.(func() MultiAggRetFixed[T]) 110 exec.args = make([]mArg1[T], len(info.argTypes)) 111 112 fillNullWhich := impl.fillNullWhich.([]MultiAggFillNull1[T]) 113 for i := range exec.args { 114 exec.args[i] = newArgumentOfMultiAgg1[T](info.argTypes[i]) 115 116 exec.args[i].cacheFill(impl.fillWhich[i], fillNullWhich[i]) 117 } 118 exec.rowValid = impl.rowValid.(rowValidForMultiAgg1[T]) 119 exec.merge = impl.merge.(MultiAggMerge1[T]) 120 exec.eval = impl.eval.(MultiAggEval1[T]) 121 if impl.flush != nil { 122 exec.flush = impl.flush.(MultiAggFlush1[T]) 123 } 124 if impl.init != nil { 125 exec.initGroup = impl.init.(MultiAggInit1[T]) 126 } 127 } 128 129 func (exec *multiAggFuncExec1[T]) GroupGrow(more int) error { 130 if err := exec.ret.grows(more); err != nil { 131 return err 132 } 133 setter := exec.ret.aggSet 134 moreGroup := make([]MultiAggRetFixed[T], more) 135 for i := 0; i < more; i++ { 136 moreGroup[i] = exec.gGroup() 137 } 138 139 if exec.initGroup != nil { 140 for i := 0; i < more; i++ { 141 exec.ret.groupToSet = i + len(exec.groups) 142 exec.initGroup(moreGroup[i], setter, exec.argTypes, exec.retType) 143 } 144 } 145 146 exec.groups = append(exec.groups, moreGroup...) 147 return nil 148 } 149 150 func (exec *multiAggFuncExec1[T]) PreAllocateGroups(more int) error { 151 return exec.ret.preAllocate(more) 152 } 153 154 func (exec *multiAggFuncExec1[T]) Fill(groupIndex int, row int, vectors []*vector.Vector) error { 155 var err error 156 for i, arg := range exec.args { 157 arg.prepare(vectors[i]) 158 if err = arg.doRowFill(exec.groups[groupIndex], uint64(row)); err != nil { 159 return err 160 } 161 } 162 exec.ret.groupToSet = groupIndex 163 if exec.rowValid(exec.groups[groupIndex]) { 164 exec.ret.setGroupNotEmpty(groupIndex) 165 if err = exec.eval(exec.groups[groupIndex], exec.ret.aggGet, exec.ret.aggSet); err != nil { 166 return err 167 } 168 } 169 170 return nil 171 } 172 173 func (exec *multiAggFuncExec1[T]) BulkFill(groupIndex int, vectors []*vector.Vector) error { 174 var err error 175 for i, arg := range exec.args { 176 arg.prepare(vectors[i]) 177 } 178 179 setter := exec.ret.aggSet 180 getter := exec.ret.aggGet 181 exec.ret.groupToSet = groupIndex 182 for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ { 183 for _, arg := range exec.args { 184 if err = arg.doRowFill(exec.groups[groupIndex], i); err != nil { 185 return err 186 } 187 } 188 if exec.rowValid(exec.groups[groupIndex]) { 189 exec.ret.setGroupNotEmpty(groupIndex) 190 if err = exec.eval(exec.groups[groupIndex], getter, setter); err != nil { 191 return err 192 } 193 } 194 } 195 196 return nil 197 } 198 199 func (exec *multiAggFuncExec1[T]) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error { 200 var err error 201 setter := exec.ret.aggSet 202 getter := exec.ret.aggGet 203 for i, arg := range exec.args { 204 arg.prepare(vectors[i]) 205 } 206 207 for idx, i, j := 0, uint64(offset), uint64(offset+len(groups)); i < j; i++ { 208 if groups[idx] != GroupNotMatched { 209 groupIdx := int(groups[idx] - 1) 210 for _, arg := range exec.args { 211 if err = arg.doRowFill(exec.groups[groupIdx], i); err != nil { 212 return err 213 } 214 } 215 exec.ret.groupToSet = groupIdx 216 if exec.rowValid(exec.groups[groupIdx]) { 217 exec.ret.setGroupNotEmpty(groupIdx) 218 if err = exec.eval(exec.groups[groupIdx], getter, setter); err != nil { 219 return err 220 } 221 } 222 223 } 224 idx++ 225 } 226 227 return nil 228 } 229 230 func (exec *multiAggFuncExec1[T]) SetExtraInformation(partialResult any, groupIndex int) error { 231 panic("unimplemented SetPreparedResult for multiAggFuncExec1") 232 } 233 234 func (exec *multiAggFuncExec1[T]) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error { 235 other := next.(*multiAggFuncExec1[T]) 236 exec.ret.groupToSet = groupIdx1 237 other.ret.groupToSet = groupIdx2 238 239 exec.ret.mergeEmpty(other.ret.basicResult, groupIdx1, groupIdx2) 240 return exec.merge( 241 exec.groups[groupIdx1], 242 other.groups[groupIdx2], 243 exec.ret.aggGet, other.ret.aggGet, 244 exec.ret.aggSet) 245 } 246 247 func (exec *multiAggFuncExec1[T]) BatchMerge(next AggFuncExec, offset int, groups []uint64) error { 248 other := next.(*multiAggFuncExec1[T]) 249 setter := exec.ret.aggSet 250 getter1, getter2 := exec.ret.aggGet, other.ret.aggGet 251 252 for i := range groups { 253 if groups[i] == GroupNotMatched { 254 continue 255 } 256 groupIdx1, groupIdx2 := int(groups[i]-1), i+offset 257 exec.ret.groupToSet = groupIdx1 258 other.ret.groupToSet = groupIdx2 259 260 exec.ret.mergeEmpty(other.ret.basicResult, groupIdx1, groupIdx2) 261 if err := exec.merge( 262 exec.groups[groupIdx1], 263 other.groups[groupIdx2], 264 getter1, getter2, 265 setter); err != nil { 266 return err 267 } 268 } 269 return nil 270 } 271 272 func (exec *multiAggFuncExec1[T]) Flush() (*vector.Vector, error) { 273 setter := exec.ret.aggSet 274 getter := exec.ret.aggGet 275 276 if exec.flush == nil { 277 return exec.ret.flush(), nil 278 } 279 280 if exec.ret.emptyBeNull { 281 for i, group := range exec.groups { 282 if exec.ret.groupIsEmpty(i) { 283 continue 284 } 285 exec.ret.groupToSet = i 286 if err := exec.flush(group, getter, setter); err != nil { 287 return nil, err 288 } 289 } 290 } else { 291 for i, group := range exec.groups { 292 exec.ret.groupToSet = i 293 if err := exec.flush(group, getter, setter); err != nil { 294 return nil, err 295 } 296 } 297 } 298 return exec.ret.flush(), nil 299 } 300 301 func (exec *multiAggFuncExec1[T]) Free() { 302 exec.ret.free() 303 } 304 305 func (exec *multiAggFuncExec2) init( 306 mg AggMemoryManager, 307 info multiAggInfo, 308 impl multiColumnAggImplementation) { 309 310 exec.multiAggInfo = info 311 exec.args = make([]mArg2, len(info.argTypes)) 312 exec.ret = initBytesAggFuncResult(mg, info.retType, info.emptyNull) 313 exec.groups = make([]MultiAggRetVar, 0, 1) 314 exec.gGroup = impl.generator.(func() MultiAggRetVar) 315 exec.args = make([]mArg2, len(info.argTypes)) 316 317 fillNullWhich := impl.fillNullWhich.([]MultiAggFillNull2) 318 for i := range exec.args { 319 exec.args[i] = newArgumentOfMultiAgg2(info.argTypes[i]) 320 321 exec.args[i].cacheFill(impl.fillWhich[i], fillNullWhich[i]) 322 } 323 exec.rowValid = impl.rowValid.(rowValidForMultiAgg2) 324 exec.merge = impl.merge.(MultiAggMerge2) 325 exec.eval = impl.eval.(MultiAggEval2) 326 if impl.flush != nil { 327 exec.flush = impl.flush.(MultiAggFlush2) 328 } 329 if impl.init != nil { 330 exec.initGroup = impl.init.(MultiAggInit2) 331 } 332 } 333 334 func (exec *multiAggFuncExec2) GroupGrow(more int) error { 335 if err := exec.ret.grows(more); err != nil { 336 return err 337 } 338 setter := exec.ret.aggSet 339 moreGroup := make([]MultiAggRetVar, more) 340 for i := 0; i < more; i++ { 341 moreGroup[i] = exec.gGroup() 342 } 343 344 if exec.initGroup != nil { 345 for i := 0; i < more; i++ { 346 exec.ret.groupToSet = i + len(exec.groups) 347 exec.initGroup(moreGroup[i], setter, exec.argTypes, exec.retType) 348 } 349 } 350 351 exec.groups = append(exec.groups, moreGroup...) 352 return nil 353 } 354 355 func (exec *multiAggFuncExec2) PreAllocateGroups(more int) error { 356 return exec.ret.preAllocate(more) 357 } 358 359 func (exec *multiAggFuncExec2) Fill(groupIndex int, row int, vectors []*vector.Vector) error { 360 var err error 361 for i, arg := range exec.args { 362 arg.prepare(vectors[i]) 363 if err = arg.doRowFill(exec.groups[groupIndex], uint64(row)); err != nil { 364 return err 365 } 366 } 367 exec.ret.groupToSet = groupIndex 368 if exec.rowValid(exec.groups[groupIndex]) { 369 exec.ret.setGroupNotEmpty(groupIndex) 370 return exec.eval(exec.groups[groupIndex], exec.ret.aggGet, exec.ret.aggSet) 371 } 372 373 return nil 374 } 375 376 func (exec *multiAggFuncExec2) BulkFill(groupIndex int, vectors []*vector.Vector) error { 377 var err error 378 for i, arg := range exec.args { 379 arg.prepare(vectors[i]) 380 } 381 382 setter := exec.ret.aggSet 383 getter := exec.ret.aggGet 384 exec.ret.groupToSet = groupIndex 385 386 // todo: can do optimization here once all the vectors were constant. 387 388 for i, j := uint64(0), uint64(vectors[0].Length()); i < j; i++ { 389 for _, arg := range exec.args { 390 if err = arg.doRowFill(exec.groups[groupIndex], i); err != nil { 391 return err 392 } 393 } 394 if exec.rowValid(exec.groups[groupIndex]) { 395 exec.ret.setGroupNotEmpty(groupIndex) 396 if err = exec.eval(exec.groups[groupIndex], getter, setter); err != nil { 397 return err 398 } 399 } 400 } 401 402 return nil 403 } 404 405 func (exec *multiAggFuncExec2) BatchFill(offset int, groups []uint64, vectors []*vector.Vector) error { 406 var err error 407 setter := exec.ret.aggSet 408 getter := exec.ret.aggGet 409 for i, arg := range exec.args { 410 arg.prepare(vectors[i]) 411 } 412 413 for idx, i, j := 0, uint64(offset), uint64(offset+len(groups)); i < j; i++ { 414 if groups[idx] != GroupNotMatched { 415 groupIdx := int(groups[idx] - 1) 416 for _, arg := range exec.args { 417 if err = arg.doRowFill(exec.groups[groupIdx], i); err != nil { 418 return err 419 } 420 } 421 exec.ret.groupToSet = groupIdx 422 if exec.rowValid(exec.groups[groupIdx]) { 423 exec.ret.setGroupNotEmpty(groupIdx) 424 if err = exec.eval(exec.groups[groupIdx], getter, setter); err != nil { 425 return err 426 } 427 } 428 429 } 430 idx++ 431 } 432 433 return nil 434 } 435 436 func (exec *multiAggFuncExec2) SetExtraInformation(partialResult any, groupIndex int) error { 437 panic("unimplemented SetPreparedResult for multiAggFuncExec2") 438 } 439 440 func (exec *multiAggFuncExec2) Merge(next AggFuncExec, groupIdx1, groupIdx2 int) error { 441 other := next.(*multiAggFuncExec2) 442 exec.ret.groupToSet = groupIdx1 443 other.ret.groupToSet = groupIdx2 444 445 exec.ret.mergeEmpty(other.ret.basicResult, groupIdx1, groupIdx2) 446 return exec.merge( 447 exec.groups[groupIdx1], 448 other.groups[groupIdx2], 449 exec.ret.aggGet, other.ret.aggGet, 450 exec.ret.aggSet) 451 } 452 453 func (exec *multiAggFuncExec2) BatchMerge(next AggFuncExec, offset int, groups []uint64) error { 454 other := next.(*multiAggFuncExec2) 455 setter := exec.ret.aggSet 456 getter1, getter2 := exec.ret.aggGet, other.ret.aggGet 457 458 for i := range groups { 459 if groups[i] == GroupNotMatched { 460 continue 461 } 462 groupIdx1, groupIdx2 := int(groups[i]-1), i+offset 463 exec.ret.groupToSet = groupIdx1 464 other.ret.groupToSet = groupIdx2 465 466 exec.ret.mergeEmpty(other.ret.basicResult, groupIdx1, groupIdx2) 467 if err := exec.merge( 468 exec.groups[groupIdx1], 469 other.groups[groupIdx2], 470 getter1, getter2, 471 setter); err != nil { 472 return err 473 } 474 } 475 return nil 476 } 477 478 func (exec *multiAggFuncExec2) Flush() (*vector.Vector, error) { 479 var err error 480 setter := exec.ret.aggSet 481 getter := exec.ret.aggGet 482 483 if exec.flush == nil { 484 return exec.ret.flush(), nil 485 } 486 487 if exec.ret.emptyBeNull { 488 for i, group := range exec.groups { 489 if exec.ret.groupIsEmpty(i) { 490 continue 491 } 492 exec.ret.groupToSet = i 493 if err = exec.flush(group, getter, setter); err != nil { 494 return nil, err 495 } 496 } 497 } else { 498 for i, group := range exec.groups { 499 exec.ret.groupToSet = i 500 if err = exec.flush(group, getter, setter); err != nil { 501 return nil, err 502 } 503 } 504 } 505 return exec.ret.flush(), nil 506 } 507 508 func (exec *multiAggFuncExec2) Free() { 509 exec.ret.free() 510 }