github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/shuffle/shuffle.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package shuffle 16 17 import ( 18 "bytes" 19 20 "github.com/matrixorigin/matrixone/pkg/container/batch" 21 "github.com/matrixorigin/matrixone/pkg/container/types" 22 "github.com/matrixorigin/matrixone/pkg/container/vector" 23 "github.com/matrixorigin/matrixone/pkg/pb/plan" 24 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 25 plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan" 26 "github.com/matrixorigin/matrixone/pkg/vm" 27 "github.com/matrixorigin/matrixone/pkg/vm/process" 28 ) 29 30 const argName = "shuffle" 31 32 func (arg *Argument) String(buf *bytes.Buffer) { 33 buf.WriteString(argName) 34 buf.WriteString(": shuffle") 35 } 36 37 func (arg *Argument) Prepare(proc *process.Process) error { 38 ap := arg 39 ctr := new(container) 40 ap.ctr = ctr 41 ap.initShuffle() 42 return nil 43 } 44 45 // there are two ways for shuffle to send a batch 46 // if a batch belongs to one bucket, send this batch directly, and shuffle need to do nothing 47 // else split this batch into pieces, write data into pool. if one bucket is full, send this bucket. 48 // next time, set this bucket rowcount to 0 and reuse it 49 // for now, we shuffle null to the first bucket 50 func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) { 51 if err, isCancel := vm.CancelCheck(proc); isCancel { 52 return vm.CancelResult, err 53 } 54 ap := arg 55 anal := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor()) 56 anal.Start() 57 defer func() { 58 anal.Stop() 59 }() 60 61 if ap.ctr.lastSentBatch != nil { 62 proc.PutBatch(ap.ctr.lastSentBatch) 63 ap.ctr.lastSentBatch = nil 64 } 65 66 SENDLAST: 67 if ap.ctr.ending { 68 result := vm.NewCallResult() 69 //send shuffle pool 70 for i, bat := range ap.ctr.shufflePool { 71 if bat != nil { 72 result.Batch = bat 73 ap.ctr.lastSentBatch = result.Batch 74 ap.ctr.shufflePool[i] = nil 75 return result, nil 76 } 77 } 78 //end 79 result.Status = vm.ExecStop 80 return result, nil 81 } 82 83 for len(ap.ctr.sendPool) == 0 { 84 // do input 85 result, err := vm.ChildrenCall(arg.GetChildren(0), proc, anal) 86 if err != nil { 87 return result, err 88 } 89 bat := result.Batch 90 if bat == nil { 91 ap.ctr.ending = true 92 goto SENDLAST 93 } else if !bat.IsEmpty() { 94 if ap.ShuffleType == int32(plan.ShuffleType_Hash) { 95 bat, err = hashShuffle(ap, bat, proc) 96 } else if ap.ShuffleType == int32(plan.ShuffleType_Range) { 97 bat, err = rangeShuffle(ap, bat, proc) 98 } 99 if err != nil { 100 return result, err 101 } 102 if bat != nil { 103 // can directly send this batch 104 return result, nil 105 } 106 } 107 } 108 109 // send batch in send pool 110 result := vm.NewCallResult() 111 length := len(ap.ctr.sendPool) 112 result.Batch = ap.ctr.sendPool[length-1] 113 ap.ctr.lastSentBatch = result.Batch 114 ap.ctr.sendPool = ap.ctr.sendPool[:length-1] 115 return result, nil 116 } 117 118 func (arg *Argument) initShuffle() { 119 if arg.ctr.sels == nil { 120 arg.ctr.sels = make([][]int32, arg.AliveRegCnt) 121 for i := 0; i < int(arg.AliveRegCnt); i++ { 122 arg.ctr.sels[i] = make([]int32, 0, colexec.DefaultBatchSize/arg.AliveRegCnt*2) 123 } 124 arg.ctr.shufflePool = make([]*batch.Batch, arg.AliveRegCnt) 125 } 126 } 127 128 func (arg *Argument) getSels() [][]int32 { 129 for i := range arg.ctr.sels { 130 arg.ctr.sels[i] = arg.ctr.sels[i][:0] 131 } 132 return arg.ctr.sels 133 } 134 135 func shuffleConstVectorByHash(ap *Argument, bat *batch.Batch) uint64 { 136 lenRegs := uint64(ap.AliveRegCnt) 137 groupByVec := bat.Vecs[ap.ShuffleColIdx] 138 switch groupByVec.GetType().Oid { 139 case types.T_bit: 140 groupByCol := vector.MustFixedCol[uint64](groupByVec) 141 return plan2.SimpleInt64HashToRange(groupByCol[0], lenRegs) 142 case types.T_int64: 143 groupByCol := vector.MustFixedCol[int64](groupByVec) 144 return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs) 145 case types.T_int32: 146 groupByCol := vector.MustFixedCol[int32](groupByVec) 147 return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs) 148 case types.T_int16: 149 groupByCol := vector.MustFixedCol[int16](groupByVec) 150 return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs) 151 case types.T_uint64: 152 groupByCol := vector.MustFixedCol[uint64](groupByVec) 153 return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs) 154 case types.T_uint32: 155 groupByCol := vector.MustFixedCol[uint32](groupByVec) 156 return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs) 157 case types.T_uint16: 158 groupByCol := vector.MustFixedCol[uint16](groupByVec) 159 return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs) 160 case types.T_char, types.T_varchar, types.T_text: 161 groupByCol, area := vector.MustVarlenaRawData(groupByVec) 162 return plan2.SimpleCharHashToRange(groupByCol[0].GetByteSlice(area), lenRegs) 163 default: 164 panic("unsupported shuffle type, wrong plan!") //something got wrong here! 165 } 166 } 167 168 func getShuffledSelsByHashWithNull(ap *Argument, bat *batch.Batch) [][]int32 { 169 sels := ap.getSels() 170 lenRegs := uint64(ap.AliveRegCnt) 171 groupByVec := bat.Vecs[ap.ShuffleColIdx] 172 switch groupByVec.GetType().Oid { 173 case types.T_bit: 174 groupByCol := vector.MustFixedCol[uint64](groupByVec) 175 for row, v := range groupByCol { 176 var regIndex uint64 = 0 177 if !groupByVec.IsNull(uint64(row)) { 178 regIndex = plan2.SimpleInt64HashToRange(v, lenRegs) 179 } 180 sels[regIndex] = append(sels[regIndex], int32(row)) 181 } 182 case types.T_int64: 183 groupByCol := vector.MustFixedCol[int64](groupByVec) 184 for row, v := range groupByCol { 185 var regIndex uint64 = 0 186 if !groupByVec.IsNull(uint64(row)) { 187 regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 188 } 189 sels[regIndex] = append(sels[regIndex], int32(row)) 190 } 191 case types.T_int32: 192 groupByCol := vector.MustFixedCol[int32](groupByVec) 193 for row, v := range groupByCol { 194 var regIndex uint64 = 0 195 if !groupByVec.IsNull(uint64(row)) { 196 regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 197 } 198 sels[regIndex] = append(sels[regIndex], int32(row)) 199 } 200 case types.T_int16: 201 groupByCol := vector.MustFixedCol[int16](groupByVec) 202 for row, v := range groupByCol { 203 var regIndex uint64 = 0 204 if !groupByVec.IsNull(uint64(row)) { 205 regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 206 } 207 sels[regIndex] = append(sels[regIndex], int32(row)) 208 } 209 case types.T_uint64: 210 groupByCol := vector.MustFixedCol[uint64](groupByVec) 211 for row, v := range groupByCol { 212 var regIndex uint64 = 0 213 if !groupByVec.IsNull(uint64(row)) { 214 regIndex = plan2.SimpleInt64HashToRange(v, lenRegs) 215 } 216 sels[regIndex] = append(sels[regIndex], int32(row)) 217 } 218 case types.T_uint32: 219 groupByCol := vector.MustFixedCol[uint32](groupByVec) 220 for row, v := range groupByCol { 221 var regIndex uint64 = 0 222 if !groupByVec.IsNull(uint64(row)) { 223 regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 224 } 225 sels[regIndex] = append(sels[regIndex], int32(row)) 226 } 227 case types.T_uint16: 228 groupByCol := vector.MustFixedCol[uint16](groupByVec) 229 for row, v := range groupByCol { 230 var regIndex uint64 = 0 231 if !groupByVec.IsNull(uint64(row)) { 232 regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 233 } 234 sels[regIndex] = append(sels[regIndex], int32(row)) 235 } 236 case types.T_char, types.T_varchar, types.T_text: 237 groupByCol, area := vector.MustVarlenaRawData(groupByVec) 238 for row, v := range groupByCol { 239 var regIndex uint64 = 0 240 if !groupByVec.IsNull(uint64(row)) { 241 regIndex = plan2.SimpleCharHashToRange(v.GetByteSlice(area), lenRegs) 242 } 243 sels[regIndex] = append(sels[regIndex], int32(row)) 244 } 245 default: 246 panic("unsupported shuffle type, wrong plan!") //something got wrong here! 247 } 248 return sels 249 } 250 251 func getShuffledSelsByHashWithoutNull(ap *Argument, bat *batch.Batch) [][]int32 { 252 sels := ap.getSels() 253 lenRegs := uint64(ap.AliveRegCnt) 254 groupByVec := bat.Vecs[ap.ShuffleColIdx] 255 switch groupByVec.GetType().Oid { 256 case types.T_bit: 257 groupByCol := vector.MustFixedCol[uint64](groupByVec) 258 for row, v := range groupByCol { 259 regIndex := plan2.SimpleInt64HashToRange(v, lenRegs) 260 sels[regIndex] = append(sels[regIndex], int32(row)) 261 } 262 case types.T_int64: 263 groupByCol := vector.MustFixedCol[int64](groupByVec) 264 for row, v := range groupByCol { 265 regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 266 sels[regIndex] = append(sels[regIndex], int32(row)) 267 } 268 case types.T_int32: 269 groupByCol := vector.MustFixedCol[int32](groupByVec) 270 for row, v := range groupByCol { 271 regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 272 sels[regIndex] = append(sels[regIndex], int32(row)) 273 } 274 case types.T_int16: 275 groupByCol := vector.MustFixedCol[int16](groupByVec) 276 for row, v := range groupByCol { 277 regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 278 sels[regIndex] = append(sels[regIndex], int32(row)) 279 } 280 case types.T_uint64: 281 groupByCol := vector.MustFixedCol[uint64](groupByVec) 282 for row, v := range groupByCol { 283 regIndex := plan2.SimpleInt64HashToRange(v, lenRegs) 284 sels[regIndex] = append(sels[regIndex], int32(row)) 285 } 286 case types.T_uint32: 287 groupByCol := vector.MustFixedCol[uint32](groupByVec) 288 for row, v := range groupByCol { 289 regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 290 sels[regIndex] = append(sels[regIndex], int32(row)) 291 } 292 case types.T_uint16: 293 groupByCol := vector.MustFixedCol[uint16](groupByVec) 294 for row, v := range groupByCol { 295 regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs) 296 sels[regIndex] = append(sels[regIndex], int32(row)) 297 } 298 case types.T_char, types.T_varchar, types.T_text: 299 groupByCol, area := vector.MustVarlenaRawData(groupByVec) 300 for row, v := range groupByCol { 301 regIndex := plan2.SimpleCharHashToRange(v.GetByteSlice(area), lenRegs) 302 sels[regIndex] = append(sels[regIndex], int32(row)) 303 } 304 default: 305 panic("unsupported shuffle type, wrong plan!") //something got wrong here! 306 } 307 return sels 308 } 309 310 func hashShuffle(ap *Argument, bat *batch.Batch, proc *process.Process) (*batch.Batch, error) { 311 groupByVec := bat.Vecs[ap.ShuffleColIdx] 312 if groupByVec.IsConstNull() { 313 bat.ShuffleIDX = 0 314 return bat, nil 315 } 316 if groupByVec.IsConst() { 317 bat.ShuffleIDX = int(shuffleConstVectorByHash(ap, bat)) 318 return bat, nil 319 } 320 321 var sels [][]int32 322 if groupByVec.HasNull() { 323 sels = getShuffledSelsByHashWithNull(ap, bat) 324 } else { 325 sels = getShuffledSelsByHashWithoutNull(ap, bat) 326 } 327 for i := range sels { 328 if len(sels[i]) > 0 && len(sels[i]) != bat.RowCount() { 329 break 330 } 331 if len(sels[i]) == bat.RowCount() { 332 bat.ShuffleIDX = i 333 return bat, nil 334 } 335 } 336 337 return nil, putBatchIntoShuffledPoolsBySels(ap, bat, sels, proc) 338 } 339 340 func allBatchInOneRange(ap *Argument, bat *batch.Batch) (bool, uint64) { 341 lenRegs := uint64(ap.AliveRegCnt) 342 groupByVec := bat.Vecs[ap.ShuffleColIdx] 343 if groupByVec.IsConstNull() { 344 return true, 0 345 } 346 if groupByVec.HasNull() { 347 return false, 0 348 } 349 350 var firstValueSigned, lastValueSigned int64 351 var firstValueUnsigned, lastValueUnsigned uint64 352 var signed bool 353 switch groupByVec.GetType().Oid { 354 case types.T_bit: 355 groupByCol := vector.MustFixedCol[uint64](groupByVec) 356 firstValueUnsigned = groupByCol[0] 357 if groupByVec.IsConst() { 358 lastValueUnsigned = firstValueUnsigned 359 } else { 360 lastValueUnsigned = groupByCol[groupByVec.Length()-1] 361 } 362 case types.T_int64: 363 signed = true 364 groupByCol := vector.MustFixedCol[int64](groupByVec) 365 firstValueSigned = groupByCol[0] 366 if groupByVec.IsConst() { 367 lastValueSigned = firstValueSigned 368 } else { 369 lastValueSigned = groupByCol[groupByVec.Length()-1] 370 } 371 case types.T_int32: 372 signed = true 373 groupByCol := vector.MustFixedCol[int32](groupByVec) 374 firstValueSigned = int64(groupByCol[0]) 375 if groupByVec.IsConst() { 376 lastValueSigned = firstValueSigned 377 } else { 378 lastValueSigned = int64(groupByCol[groupByVec.Length()-1]) 379 } 380 case types.T_int16: 381 signed = true 382 groupByCol := vector.MustFixedCol[int16](groupByVec) 383 firstValueSigned = int64(groupByCol[0]) 384 if groupByVec.IsConst() { 385 lastValueSigned = firstValueSigned 386 } else { 387 lastValueSigned = int64(groupByCol[groupByVec.Length()-1]) 388 } 389 case types.T_uint64: 390 groupByCol := vector.MustFixedCol[uint64](groupByVec) 391 firstValueUnsigned = groupByCol[0] 392 if groupByVec.IsConst() { 393 lastValueUnsigned = firstValueUnsigned 394 } else { 395 lastValueUnsigned = groupByCol[groupByVec.Length()-1] 396 } 397 case types.T_uint32: 398 groupByCol := vector.MustFixedCol[uint32](groupByVec) 399 firstValueUnsigned = uint64(groupByCol[0]) 400 if groupByVec.IsConst() { 401 lastValueUnsigned = firstValueUnsigned 402 } else { 403 lastValueUnsigned = uint64(groupByCol[groupByVec.Length()-1]) 404 } 405 case types.T_uint16: 406 groupByCol := vector.MustFixedCol[uint16](groupByVec) 407 firstValueUnsigned = uint64(groupByCol[0]) 408 if groupByVec.IsConst() { 409 lastValueUnsigned = firstValueUnsigned 410 } else { 411 lastValueUnsigned = uint64(groupByCol[groupByVec.Length()-1]) 412 } 413 case types.T_char, types.T_varchar, types.T_text: 414 groupByCol, area := vector.MustVarlenaRawData(groupByVec) 415 firstValueUnsigned = plan2.VarlenaToUint64(&groupByCol[0], area) 416 if groupByVec.IsConst() { 417 lastValueUnsigned = firstValueUnsigned 418 } else { 419 lastValueUnsigned = plan2.VarlenaToUint64(&groupByCol[groupByVec.Length()-1], area) 420 } 421 default: 422 panic("unsupported shuffle type, wrong plan!") //something got wrong here! 423 } 424 425 var regIndexFirst, regIndexLast uint64 426 if ap.ShuffleRangeInt64 != nil { 427 regIndexFirst = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, firstValueSigned) 428 regIndexLast = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, lastValueSigned) 429 } else if ap.ShuffleRangeUint64 != nil { 430 regIndexFirst = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, firstValueUnsigned) 431 regIndexLast = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, lastValueUnsigned) 432 } else if signed { 433 regIndexFirst = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, firstValueSigned, lenRegs) 434 regIndexLast = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, lastValueSigned, lenRegs) 435 } else { 436 regIndexFirst = plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), firstValueUnsigned, lenRegs) 437 regIndexLast = plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), lastValueUnsigned, lenRegs) 438 } 439 440 if regIndexFirst == regIndexLast { 441 return true, regIndexFirst 442 } else { 443 return false, 0 444 } 445 } 446 447 func getShuffledSelsByRangeWithoutNull(ap *Argument, bat *batch.Batch) [][]int32 { 448 sels := ap.getSels() 449 lenRegs := uint64(ap.AliveRegCnt) 450 groupByVec := bat.Vecs[ap.ShuffleColIdx] 451 switch groupByVec.GetType().Oid { 452 case types.T_bit: 453 groupByCol := vector.MustFixedCol[uint64](groupByVec) 454 if ap.ShuffleRangeUint64 != nil { 455 for row, v := range groupByCol { 456 regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v) 457 sels[regIndex] = append(sels[regIndex], int32(row)) 458 } 459 } else { 460 for row, v := range groupByCol { 461 regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs) 462 sels[regIndex] = append(sels[regIndex], int32(row)) 463 } 464 } 465 case types.T_int64: 466 groupByCol := vector.MustFixedCol[int64](groupByVec) 467 if ap.ShuffleRangeInt64 != nil { 468 for row, v := range groupByCol { 469 regIndex := plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, v) 470 sels[regIndex] = append(sels[regIndex], int32(row)) 471 } 472 } else { 473 for row, v := range groupByCol { 474 regIndex := plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, v, lenRegs) 475 sels[regIndex] = append(sels[regIndex], int32(row)) 476 } 477 } 478 case types.T_int32: 479 groupByCol := vector.MustFixedCol[int32](groupByVec) 480 if ap.ShuffleRangeInt64 != nil { 481 for row, v := range groupByCol { 482 regIndex := plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, int64(v)) 483 sels[regIndex] = append(sels[regIndex], int32(row)) 484 } 485 } else { 486 for row, v := range groupByCol { 487 regIndex := plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs) 488 sels[regIndex] = append(sels[regIndex], int32(row)) 489 } 490 } 491 case types.T_int16: 492 groupByCol := vector.MustFixedCol[int16](groupByVec) 493 if ap.ShuffleRangeInt64 != nil { 494 for row, v := range groupByCol { 495 regIndex := plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, int64(v)) 496 sels[regIndex] = append(sels[regIndex], int32(row)) 497 } 498 } else { 499 for row, v := range groupByCol { 500 regIndex := plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs) 501 sels[regIndex] = append(sels[regIndex], int32(row)) 502 } 503 } 504 case types.T_uint64: 505 groupByCol := vector.MustFixedCol[uint64](groupByVec) 506 if ap.ShuffleRangeUint64 != nil { 507 for row, v := range groupByCol { 508 regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v) 509 sels[regIndex] = append(sels[regIndex], int32(row)) 510 } 511 } else { 512 for row, v := range groupByCol { 513 regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs) 514 sels[regIndex] = append(sels[regIndex], int32(row)) 515 } 516 } 517 case types.T_uint32: 518 groupByCol := vector.MustFixedCol[uint32](groupByVec) 519 if ap.ShuffleRangeUint64 != nil { 520 for row, v := range groupByCol { 521 regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, uint64(v)) 522 sels[regIndex] = append(sels[regIndex], int32(row)) 523 } 524 } else { 525 for row, v := range groupByCol { 526 regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), uint64(v), lenRegs) 527 sels[regIndex] = append(sels[regIndex], int32(row)) 528 } 529 } 530 case types.T_uint16: 531 groupByCol := vector.MustFixedCol[uint16](groupByVec) 532 if ap.ShuffleRangeUint64 != nil { 533 for row, v := range groupByCol { 534 regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, uint64(v)) 535 sels[regIndex] = append(sels[regIndex], int32(row)) 536 } 537 } else { 538 for row, v := range groupByCol { 539 regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), uint64(v), lenRegs) 540 sels[regIndex] = append(sels[regIndex], int32(row)) 541 } 542 } 543 case types.T_char, types.T_varchar, types.T_text: 544 groupByCol, area := vector.MustVarlenaRawData(groupByVec) 545 if area == nil { 546 if ap.ShuffleRangeUint64 != nil { 547 for row := range groupByCol { 548 v := plan2.VarlenaToUint64Inline(&groupByCol[row]) 549 regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v) 550 sels[regIndex] = append(sels[regIndex], int32(row)) 551 } 552 } else { 553 for row := range groupByCol { 554 v := plan2.VarlenaToUint64Inline(&groupByCol[row]) 555 regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs) 556 sels[regIndex] = append(sels[regIndex], int32(row)) 557 } 558 } 559 } else { 560 if ap.ShuffleRangeUint64 != nil { 561 for row := range groupByCol { 562 v := plan2.VarlenaToUint64(&groupByCol[row], area) 563 regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v) 564 sels[regIndex] = append(sels[regIndex], int32(row)) 565 } 566 } else { 567 for row := range groupByCol { 568 v := plan2.VarlenaToUint64(&groupByCol[row], area) 569 regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs) 570 sels[regIndex] = append(sels[regIndex], int32(row)) 571 } 572 } 573 } 574 default: 575 panic("unsupported shuffle type, wrong plan!") //something got wrong here! 576 } 577 return sels 578 } 579 580 func getShuffledSelsByRangeWithNull(ap *Argument, bat *batch.Batch) [][]int32 { 581 sels := ap.getSels() 582 lenRegs := uint64(ap.AliveRegCnt) 583 groupByVec := bat.Vecs[ap.ShuffleColIdx] 584 switch groupByVec.GetType().Oid { 585 case types.T_bit: 586 groupByCol := vector.MustFixedCol[uint64](groupByVec) 587 if ap.ShuffleRangeUint64 != nil { 588 for row, v := range groupByCol { 589 var regIndex uint64 = 0 590 if !groupByVec.IsNull(uint64(row)) { 591 regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v) 592 } 593 sels[regIndex] = append(sels[regIndex], int32(row)) 594 } 595 } else { 596 for row, v := range groupByCol { 597 var regIndex uint64 = 0 598 if !groupByVec.IsNull(uint64(row)) { 599 regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs) 600 } 601 sels[regIndex] = append(sels[regIndex], int32(row)) 602 } 603 } 604 case types.T_int64: 605 groupByCol := vector.MustFixedCol[int64](groupByVec) 606 if ap.ShuffleRangeInt64 != nil { 607 for row, v := range groupByCol { 608 var regIndex uint64 = 0 609 if !groupByVec.IsNull(uint64(row)) { 610 regIndex = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, v) 611 } 612 sels[regIndex] = append(sels[regIndex], int32(row)) 613 } 614 } else { 615 for row, v := range groupByCol { 616 var regIndex uint64 = 0 617 if !groupByVec.IsNull(uint64(row)) { 618 regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, v, lenRegs) 619 } 620 sels[regIndex] = append(sels[regIndex], int32(row)) 621 } 622 } 623 case types.T_int32: 624 groupByCol := vector.MustFixedCol[int32](groupByVec) 625 if ap.ShuffleRangeInt64 != nil { 626 for row, v := range groupByCol { 627 var regIndex uint64 = 0 628 if !groupByVec.IsNull(uint64(row)) { 629 regIndex = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, int64(v)) 630 } 631 sels[regIndex] = append(sels[regIndex], int32(row)) 632 } 633 } else { 634 for row, v := range groupByCol { 635 var regIndex uint64 = 0 636 if !groupByVec.IsNull(uint64(row)) { 637 regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs) 638 } 639 sels[regIndex] = append(sels[regIndex], int32(row)) 640 } 641 } 642 case types.T_int16: 643 groupByCol := vector.MustFixedCol[int16](groupByVec) 644 if ap.ShuffleRangeInt64 != nil { 645 for row, v := range groupByCol { 646 var regIndex uint64 = 0 647 if !groupByVec.IsNull(uint64(row)) { 648 regIndex = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, int64(v)) 649 } 650 sels[regIndex] = append(sels[regIndex], int32(row)) 651 } 652 } else { 653 for row, v := range groupByCol { 654 var regIndex uint64 = 0 655 if !groupByVec.IsNull(uint64(row)) { 656 regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs) 657 } 658 sels[regIndex] = append(sels[regIndex], int32(row)) 659 } 660 } 661 case types.T_uint64: 662 groupByCol := vector.MustFixedCol[uint64](groupByVec) 663 if ap.ShuffleRangeUint64 != nil { 664 for row, v := range groupByCol { 665 var regIndex uint64 = 0 666 if !groupByVec.IsNull(uint64(row)) { 667 regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v) 668 } 669 sels[regIndex] = append(sels[regIndex], int32(row)) 670 } 671 } else { 672 for row, v := range groupByCol { 673 var regIndex uint64 = 0 674 if !groupByVec.IsNull(uint64(row)) { 675 regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs) 676 } 677 sels[regIndex] = append(sels[regIndex], int32(row)) 678 } 679 } 680 case types.T_uint32: 681 groupByCol := vector.MustFixedCol[uint32](groupByVec) 682 if ap.ShuffleRangeUint64 != nil { 683 for row, v := range groupByCol { 684 var regIndex uint64 = 0 685 if !groupByVec.IsNull(uint64(row)) { 686 regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, uint64(v)) 687 } 688 sels[regIndex] = append(sels[regIndex], int32(row)) 689 } 690 } else { 691 for row, v := range groupByCol { 692 var regIndex uint64 = 0 693 if !groupByVec.IsNull(uint64(row)) { 694 regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs) 695 } 696 sels[regIndex] = append(sels[regIndex], int32(row)) 697 } 698 } 699 case types.T_uint16: 700 groupByCol := vector.MustFixedCol[uint16](groupByVec) 701 if ap.ShuffleRangeUint64 != nil { 702 for row, v := range groupByCol { 703 var regIndex uint64 = 0 704 if !groupByVec.IsNull(uint64(row)) { 705 regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, uint64(v)) 706 } 707 sels[regIndex] = append(sels[regIndex], int32(row)) 708 } 709 } else { 710 for row, v := range groupByCol { 711 var regIndex uint64 = 0 712 if !groupByVec.IsNull(uint64(row)) { 713 regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs) 714 } 715 sels[regIndex] = append(sels[regIndex], int32(row)) 716 } 717 } 718 case types.T_char, types.T_varchar, types.T_text: 719 groupByCol, area := vector.MustVarlenaRawData(groupByVec) 720 if area == nil { 721 if ap.ShuffleRangeUint64 != nil { 722 for row := range groupByCol { 723 var regIndex uint64 = 0 724 if !groupByVec.IsNull(uint64(row)) { 725 v := plan2.VarlenaToUint64Inline(&groupByCol[row]) 726 regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v) 727 } 728 sels[regIndex] = append(sels[regIndex], int32(row)) 729 } 730 } else { 731 for row := range groupByCol { 732 var regIndex uint64 = 0 733 if !groupByVec.IsNull(uint64(row)) { 734 v := plan2.VarlenaToUint64Inline(&groupByCol[row]) 735 regIndex = plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs) 736 } 737 sels[regIndex] = append(sels[regIndex], int32(row)) 738 } 739 } 740 } else { 741 if ap.ShuffleRangeUint64 != nil { 742 for row := range groupByCol { 743 var regIndex uint64 = 0 744 if !groupByVec.IsNull(uint64(row)) { 745 v := plan2.VarlenaToUint64(&groupByCol[row], area) 746 regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v) 747 } 748 sels[regIndex] = append(sels[regIndex], int32(row)) 749 } 750 } else { 751 for row := range groupByCol { 752 var regIndex uint64 = 0 753 if !groupByVec.IsNull(uint64(row)) { 754 v := plan2.VarlenaToUint64(&groupByCol[row], area) 755 regIndex = plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs) 756 } 757 sels[regIndex] = append(sels[regIndex], int32(row)) 758 } 759 } 760 } 761 default: 762 panic("unsupported shuffle type, wrong plan!") //something got wrong here! 763 } 764 return sels 765 } 766 767 func putBatchIntoShuffledPoolsBySels(ap *Argument, srcBatch *batch.Batch, sels [][]int32, proc *process.Process) error { 768 shuffledPool := ap.ctr.shufflePool 769 var err error 770 for regIndex := range shuffledPool { 771 newSels := sels[regIndex] 772 for len(newSels) > 0 { 773 bat := shuffledPool[regIndex] 774 if bat == nil { 775 bat, err = proc.NewBatchFromSrc(srcBatch, colexec.DefaultBatchSize) 776 if err != nil { 777 return err 778 } 779 bat.ShuffleIDX = regIndex 780 ap.ctr.shufflePool[regIndex] = bat 781 } 782 length := len(newSels) 783 if length+bat.RowCount() > colexec.DefaultBatchSize { 784 length = colexec.DefaultBatchSize - bat.RowCount() 785 } 786 for vecIndex := range bat.Vecs { 787 v := bat.Vecs[vecIndex] 788 v.SetSorted(false) 789 err = v.Union(srcBatch.Vecs[vecIndex], newSels[:length], proc.Mp()) 790 if err != nil { 791 return err 792 } 793 } 794 bat.AddRowCount(length) 795 newSels = newSels[length:] 796 if bat.RowCount() == colexec.DefaultBatchSize { 797 ap.ctr.sendPool = append(ap.ctr.sendPool, bat) 798 shuffledPool[regIndex] = nil 799 } 800 } 801 } 802 return nil 803 } 804 805 func rangeShuffle(ap *Argument, bat *batch.Batch, proc *process.Process) (*batch.Batch, error) { 806 groupByVec := bat.Vecs[ap.ShuffleColIdx] 807 if groupByVec.GetSorted() || groupByVec.IsConst() { 808 ok, regIndex := allBatchInOneRange(ap, bat) 809 if ok { 810 bat.ShuffleIDX = int(regIndex) 811 return bat, nil 812 } 813 } 814 var sels [][]int32 815 if groupByVec.HasNull() { 816 sels = getShuffledSelsByRangeWithNull(ap, bat) 817 } else { 818 sels = getShuffledSelsByRangeWithoutNull(ap, bat) 819 } 820 for i := range sels { 821 if len(sels[i]) > 0 && len(sels[i]) != bat.RowCount() { 822 break 823 } 824 if len(sels[i]) == bat.RowCount() { 825 bat.ShuffleIDX = i 826 return bat, nil 827 } 828 } 829 err := putBatchIntoShuffledPoolsBySels(ap, bat, sels, proc) 830 return nil, err 831 }