github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/shuffle/shuffle.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package shuffle
    16  
    17  import (
    18  	"bytes"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    21  	"github.com/matrixorigin/matrixone/pkg/container/types"
    22  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    23  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    24  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    25  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    26  	"github.com/matrixorigin/matrixone/pkg/vm"
    27  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    28  )
    29  
    30  const argName = "shuffle"
    31  
    32  func (arg *Argument) String(buf *bytes.Buffer) {
    33  	buf.WriteString(argName)
    34  	buf.WriteString(": shuffle")
    35  }
    36  
    37  func (arg *Argument) Prepare(proc *process.Process) error {
    38  	ap := arg
    39  	ctr := new(container)
    40  	ap.ctr = ctr
    41  	ap.initShuffle()
    42  	return nil
    43  }
    44  
    45  // there are two ways for shuffle to send a batch
    46  // if a batch belongs to one bucket, send this batch directly, and shuffle need to do nothing
    47  // else split this batch into pieces, write data into pool. if one bucket is full, send this bucket.
    48  // next time, set this bucket rowcount to 0 and reuse it
    49  // for now, we shuffle null to the first bucket
    50  func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) {
    51  	if err, isCancel := vm.CancelCheck(proc); isCancel {
    52  		return vm.CancelResult, err
    53  	}
    54  	ap := arg
    55  	anal := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor())
    56  	anal.Start()
    57  	defer func() {
    58  		anal.Stop()
    59  	}()
    60  
    61  	if ap.ctr.lastSentBatch != nil {
    62  		proc.PutBatch(ap.ctr.lastSentBatch)
    63  		ap.ctr.lastSentBatch = nil
    64  	}
    65  
    66  SENDLAST:
    67  	if ap.ctr.ending {
    68  		result := vm.NewCallResult()
    69  		//send shuffle pool
    70  		for i, bat := range ap.ctr.shufflePool {
    71  			if bat != nil {
    72  				result.Batch = bat
    73  				ap.ctr.lastSentBatch = result.Batch
    74  				ap.ctr.shufflePool[i] = nil
    75  				return result, nil
    76  			}
    77  		}
    78  		//end
    79  		result.Status = vm.ExecStop
    80  		return result, nil
    81  	}
    82  
    83  	for len(ap.ctr.sendPool) == 0 {
    84  		// do input
    85  		result, err := vm.ChildrenCall(arg.GetChildren(0), proc, anal)
    86  		if err != nil {
    87  			return result, err
    88  		}
    89  		bat := result.Batch
    90  		if bat == nil {
    91  			ap.ctr.ending = true
    92  			goto SENDLAST
    93  		} else if !bat.IsEmpty() {
    94  			if ap.ShuffleType == int32(plan.ShuffleType_Hash) {
    95  				bat, err = hashShuffle(ap, bat, proc)
    96  			} else if ap.ShuffleType == int32(plan.ShuffleType_Range) {
    97  				bat, err = rangeShuffle(ap, bat, proc)
    98  			}
    99  			if err != nil {
   100  				return result, err
   101  			}
   102  			if bat != nil {
   103  				// can directly send this batch
   104  				return result, nil
   105  			}
   106  		}
   107  	}
   108  
   109  	// send batch in send pool
   110  	result := vm.NewCallResult()
   111  	length := len(ap.ctr.sendPool)
   112  	result.Batch = ap.ctr.sendPool[length-1]
   113  	ap.ctr.lastSentBatch = result.Batch
   114  	ap.ctr.sendPool = ap.ctr.sendPool[:length-1]
   115  	return result, nil
   116  }
   117  
   118  func (arg *Argument) initShuffle() {
   119  	if arg.ctr.sels == nil {
   120  		arg.ctr.sels = make([][]int32, arg.AliveRegCnt)
   121  		for i := 0; i < int(arg.AliveRegCnt); i++ {
   122  			arg.ctr.sels[i] = make([]int32, 0, colexec.DefaultBatchSize/arg.AliveRegCnt*2)
   123  		}
   124  		arg.ctr.shufflePool = make([]*batch.Batch, arg.AliveRegCnt)
   125  	}
   126  }
   127  
   128  func (arg *Argument) getSels() [][]int32 {
   129  	for i := range arg.ctr.sels {
   130  		arg.ctr.sels[i] = arg.ctr.sels[i][:0]
   131  	}
   132  	return arg.ctr.sels
   133  }
   134  
   135  func shuffleConstVectorByHash(ap *Argument, bat *batch.Batch) uint64 {
   136  	lenRegs := uint64(ap.AliveRegCnt)
   137  	groupByVec := bat.Vecs[ap.ShuffleColIdx]
   138  	switch groupByVec.GetType().Oid {
   139  	case types.T_bit:
   140  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   141  		return plan2.SimpleInt64HashToRange(groupByCol[0], lenRegs)
   142  	case types.T_int64:
   143  		groupByCol := vector.MustFixedCol[int64](groupByVec)
   144  		return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs)
   145  	case types.T_int32:
   146  		groupByCol := vector.MustFixedCol[int32](groupByVec)
   147  		return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs)
   148  	case types.T_int16:
   149  		groupByCol := vector.MustFixedCol[int16](groupByVec)
   150  		return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs)
   151  	case types.T_uint64:
   152  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   153  		return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs)
   154  	case types.T_uint32:
   155  		groupByCol := vector.MustFixedCol[uint32](groupByVec)
   156  		return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs)
   157  	case types.T_uint16:
   158  		groupByCol := vector.MustFixedCol[uint16](groupByVec)
   159  		return plan2.SimpleInt64HashToRange(uint64(groupByCol[0]), lenRegs)
   160  	case types.T_char, types.T_varchar, types.T_text:
   161  		groupByCol, area := vector.MustVarlenaRawData(groupByVec)
   162  		return plan2.SimpleCharHashToRange(groupByCol[0].GetByteSlice(area), lenRegs)
   163  	default:
   164  		panic("unsupported shuffle type, wrong plan!") //something got wrong here!
   165  	}
   166  }
   167  
   168  func getShuffledSelsByHashWithNull(ap *Argument, bat *batch.Batch) [][]int32 {
   169  	sels := ap.getSels()
   170  	lenRegs := uint64(ap.AliveRegCnt)
   171  	groupByVec := bat.Vecs[ap.ShuffleColIdx]
   172  	switch groupByVec.GetType().Oid {
   173  	case types.T_bit:
   174  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   175  		for row, v := range groupByCol {
   176  			var regIndex uint64 = 0
   177  			if !groupByVec.IsNull(uint64(row)) {
   178  				regIndex = plan2.SimpleInt64HashToRange(v, lenRegs)
   179  			}
   180  			sels[regIndex] = append(sels[regIndex], int32(row))
   181  		}
   182  	case types.T_int64:
   183  		groupByCol := vector.MustFixedCol[int64](groupByVec)
   184  		for row, v := range groupByCol {
   185  			var regIndex uint64 = 0
   186  			if !groupByVec.IsNull(uint64(row)) {
   187  				regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   188  			}
   189  			sels[regIndex] = append(sels[regIndex], int32(row))
   190  		}
   191  	case types.T_int32:
   192  		groupByCol := vector.MustFixedCol[int32](groupByVec)
   193  		for row, v := range groupByCol {
   194  			var regIndex uint64 = 0
   195  			if !groupByVec.IsNull(uint64(row)) {
   196  				regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   197  			}
   198  			sels[regIndex] = append(sels[regIndex], int32(row))
   199  		}
   200  	case types.T_int16:
   201  		groupByCol := vector.MustFixedCol[int16](groupByVec)
   202  		for row, v := range groupByCol {
   203  			var regIndex uint64 = 0
   204  			if !groupByVec.IsNull(uint64(row)) {
   205  				regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   206  			}
   207  			sels[regIndex] = append(sels[regIndex], int32(row))
   208  		}
   209  	case types.T_uint64:
   210  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   211  		for row, v := range groupByCol {
   212  			var regIndex uint64 = 0
   213  			if !groupByVec.IsNull(uint64(row)) {
   214  				regIndex = plan2.SimpleInt64HashToRange(v, lenRegs)
   215  			}
   216  			sels[regIndex] = append(sels[regIndex], int32(row))
   217  		}
   218  	case types.T_uint32:
   219  		groupByCol := vector.MustFixedCol[uint32](groupByVec)
   220  		for row, v := range groupByCol {
   221  			var regIndex uint64 = 0
   222  			if !groupByVec.IsNull(uint64(row)) {
   223  				regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   224  			}
   225  			sels[regIndex] = append(sels[regIndex], int32(row))
   226  		}
   227  	case types.T_uint16:
   228  		groupByCol := vector.MustFixedCol[uint16](groupByVec)
   229  		for row, v := range groupByCol {
   230  			var regIndex uint64 = 0
   231  			if !groupByVec.IsNull(uint64(row)) {
   232  				regIndex = plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   233  			}
   234  			sels[regIndex] = append(sels[regIndex], int32(row))
   235  		}
   236  	case types.T_char, types.T_varchar, types.T_text:
   237  		groupByCol, area := vector.MustVarlenaRawData(groupByVec)
   238  		for row, v := range groupByCol {
   239  			var regIndex uint64 = 0
   240  			if !groupByVec.IsNull(uint64(row)) {
   241  				regIndex = plan2.SimpleCharHashToRange(v.GetByteSlice(area), lenRegs)
   242  			}
   243  			sels[regIndex] = append(sels[regIndex], int32(row))
   244  		}
   245  	default:
   246  		panic("unsupported shuffle type, wrong plan!") //something got wrong here!
   247  	}
   248  	return sels
   249  }
   250  
   251  func getShuffledSelsByHashWithoutNull(ap *Argument, bat *batch.Batch) [][]int32 {
   252  	sels := ap.getSels()
   253  	lenRegs := uint64(ap.AliveRegCnt)
   254  	groupByVec := bat.Vecs[ap.ShuffleColIdx]
   255  	switch groupByVec.GetType().Oid {
   256  	case types.T_bit:
   257  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   258  		for row, v := range groupByCol {
   259  			regIndex := plan2.SimpleInt64HashToRange(v, lenRegs)
   260  			sels[regIndex] = append(sels[regIndex], int32(row))
   261  		}
   262  	case types.T_int64:
   263  		groupByCol := vector.MustFixedCol[int64](groupByVec)
   264  		for row, v := range groupByCol {
   265  			regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   266  			sels[regIndex] = append(sels[regIndex], int32(row))
   267  		}
   268  	case types.T_int32:
   269  		groupByCol := vector.MustFixedCol[int32](groupByVec)
   270  		for row, v := range groupByCol {
   271  			regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   272  			sels[regIndex] = append(sels[regIndex], int32(row))
   273  		}
   274  	case types.T_int16:
   275  		groupByCol := vector.MustFixedCol[int16](groupByVec)
   276  		for row, v := range groupByCol {
   277  			regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   278  			sels[regIndex] = append(sels[regIndex], int32(row))
   279  		}
   280  	case types.T_uint64:
   281  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   282  		for row, v := range groupByCol {
   283  			regIndex := plan2.SimpleInt64HashToRange(v, lenRegs)
   284  			sels[regIndex] = append(sels[regIndex], int32(row))
   285  		}
   286  	case types.T_uint32:
   287  		groupByCol := vector.MustFixedCol[uint32](groupByVec)
   288  		for row, v := range groupByCol {
   289  			regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   290  			sels[regIndex] = append(sels[regIndex], int32(row))
   291  		}
   292  	case types.T_uint16:
   293  		groupByCol := vector.MustFixedCol[uint16](groupByVec)
   294  		for row, v := range groupByCol {
   295  			regIndex := plan2.SimpleInt64HashToRange(uint64(v), lenRegs)
   296  			sels[regIndex] = append(sels[regIndex], int32(row))
   297  		}
   298  	case types.T_char, types.T_varchar, types.T_text:
   299  		groupByCol, area := vector.MustVarlenaRawData(groupByVec)
   300  		for row, v := range groupByCol {
   301  			regIndex := plan2.SimpleCharHashToRange(v.GetByteSlice(area), lenRegs)
   302  			sels[regIndex] = append(sels[regIndex], int32(row))
   303  		}
   304  	default:
   305  		panic("unsupported shuffle type, wrong plan!") //something got wrong here!
   306  	}
   307  	return sels
   308  }
   309  
   310  func hashShuffle(ap *Argument, bat *batch.Batch, proc *process.Process) (*batch.Batch, error) {
   311  	groupByVec := bat.Vecs[ap.ShuffleColIdx]
   312  	if groupByVec.IsConstNull() {
   313  		bat.ShuffleIDX = 0
   314  		return bat, nil
   315  	}
   316  	if groupByVec.IsConst() {
   317  		bat.ShuffleIDX = int(shuffleConstVectorByHash(ap, bat))
   318  		return bat, nil
   319  	}
   320  
   321  	var sels [][]int32
   322  	if groupByVec.HasNull() {
   323  		sels = getShuffledSelsByHashWithNull(ap, bat)
   324  	} else {
   325  		sels = getShuffledSelsByHashWithoutNull(ap, bat)
   326  	}
   327  	for i := range sels {
   328  		if len(sels[i]) > 0 && len(sels[i]) != bat.RowCount() {
   329  			break
   330  		}
   331  		if len(sels[i]) == bat.RowCount() {
   332  			bat.ShuffleIDX = i
   333  			return bat, nil
   334  		}
   335  	}
   336  
   337  	return nil, putBatchIntoShuffledPoolsBySels(ap, bat, sels, proc)
   338  }
   339  
   340  func allBatchInOneRange(ap *Argument, bat *batch.Batch) (bool, uint64) {
   341  	lenRegs := uint64(ap.AliveRegCnt)
   342  	groupByVec := bat.Vecs[ap.ShuffleColIdx]
   343  	if groupByVec.IsConstNull() {
   344  		return true, 0
   345  	}
   346  	if groupByVec.HasNull() {
   347  		return false, 0
   348  	}
   349  
   350  	var firstValueSigned, lastValueSigned int64
   351  	var firstValueUnsigned, lastValueUnsigned uint64
   352  	var signed bool
   353  	switch groupByVec.GetType().Oid {
   354  	case types.T_bit:
   355  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   356  		firstValueUnsigned = groupByCol[0]
   357  		if groupByVec.IsConst() {
   358  			lastValueUnsigned = firstValueUnsigned
   359  		} else {
   360  			lastValueUnsigned = groupByCol[groupByVec.Length()-1]
   361  		}
   362  	case types.T_int64:
   363  		signed = true
   364  		groupByCol := vector.MustFixedCol[int64](groupByVec)
   365  		firstValueSigned = groupByCol[0]
   366  		if groupByVec.IsConst() {
   367  			lastValueSigned = firstValueSigned
   368  		} else {
   369  			lastValueSigned = groupByCol[groupByVec.Length()-1]
   370  		}
   371  	case types.T_int32:
   372  		signed = true
   373  		groupByCol := vector.MustFixedCol[int32](groupByVec)
   374  		firstValueSigned = int64(groupByCol[0])
   375  		if groupByVec.IsConst() {
   376  			lastValueSigned = firstValueSigned
   377  		} else {
   378  			lastValueSigned = int64(groupByCol[groupByVec.Length()-1])
   379  		}
   380  	case types.T_int16:
   381  		signed = true
   382  		groupByCol := vector.MustFixedCol[int16](groupByVec)
   383  		firstValueSigned = int64(groupByCol[0])
   384  		if groupByVec.IsConst() {
   385  			lastValueSigned = firstValueSigned
   386  		} else {
   387  			lastValueSigned = int64(groupByCol[groupByVec.Length()-1])
   388  		}
   389  	case types.T_uint64:
   390  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   391  		firstValueUnsigned = groupByCol[0]
   392  		if groupByVec.IsConst() {
   393  			lastValueUnsigned = firstValueUnsigned
   394  		} else {
   395  			lastValueUnsigned = groupByCol[groupByVec.Length()-1]
   396  		}
   397  	case types.T_uint32:
   398  		groupByCol := vector.MustFixedCol[uint32](groupByVec)
   399  		firstValueUnsigned = uint64(groupByCol[0])
   400  		if groupByVec.IsConst() {
   401  			lastValueUnsigned = firstValueUnsigned
   402  		} else {
   403  			lastValueUnsigned = uint64(groupByCol[groupByVec.Length()-1])
   404  		}
   405  	case types.T_uint16:
   406  		groupByCol := vector.MustFixedCol[uint16](groupByVec)
   407  		firstValueUnsigned = uint64(groupByCol[0])
   408  		if groupByVec.IsConst() {
   409  			lastValueUnsigned = firstValueUnsigned
   410  		} else {
   411  			lastValueUnsigned = uint64(groupByCol[groupByVec.Length()-1])
   412  		}
   413  	case types.T_char, types.T_varchar, types.T_text:
   414  		groupByCol, area := vector.MustVarlenaRawData(groupByVec)
   415  		firstValueUnsigned = plan2.VarlenaToUint64(&groupByCol[0], area)
   416  		if groupByVec.IsConst() {
   417  			lastValueUnsigned = firstValueUnsigned
   418  		} else {
   419  			lastValueUnsigned = plan2.VarlenaToUint64(&groupByCol[groupByVec.Length()-1], area)
   420  		}
   421  	default:
   422  		panic("unsupported shuffle type, wrong plan!") //something got wrong here!
   423  	}
   424  
   425  	var regIndexFirst, regIndexLast uint64
   426  	if ap.ShuffleRangeInt64 != nil {
   427  		regIndexFirst = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, firstValueSigned)
   428  		regIndexLast = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, lastValueSigned)
   429  	} else if ap.ShuffleRangeUint64 != nil {
   430  		regIndexFirst = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, firstValueUnsigned)
   431  		regIndexLast = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, lastValueUnsigned)
   432  	} else if signed {
   433  		regIndexFirst = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, firstValueSigned, lenRegs)
   434  		regIndexLast = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, lastValueSigned, lenRegs)
   435  	} else {
   436  		regIndexFirst = plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), firstValueUnsigned, lenRegs)
   437  		regIndexLast = plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), lastValueUnsigned, lenRegs)
   438  	}
   439  
   440  	if regIndexFirst == regIndexLast {
   441  		return true, regIndexFirst
   442  	} else {
   443  		return false, 0
   444  	}
   445  }
   446  
   447  func getShuffledSelsByRangeWithoutNull(ap *Argument, bat *batch.Batch) [][]int32 {
   448  	sels := ap.getSels()
   449  	lenRegs := uint64(ap.AliveRegCnt)
   450  	groupByVec := bat.Vecs[ap.ShuffleColIdx]
   451  	switch groupByVec.GetType().Oid {
   452  	case types.T_bit:
   453  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   454  		if ap.ShuffleRangeUint64 != nil {
   455  			for row, v := range groupByCol {
   456  				regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v)
   457  				sels[regIndex] = append(sels[regIndex], int32(row))
   458  			}
   459  		} else {
   460  			for row, v := range groupByCol {
   461  				regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs)
   462  				sels[regIndex] = append(sels[regIndex], int32(row))
   463  			}
   464  		}
   465  	case types.T_int64:
   466  		groupByCol := vector.MustFixedCol[int64](groupByVec)
   467  		if ap.ShuffleRangeInt64 != nil {
   468  			for row, v := range groupByCol {
   469  				regIndex := plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, v)
   470  				sels[regIndex] = append(sels[regIndex], int32(row))
   471  			}
   472  		} else {
   473  			for row, v := range groupByCol {
   474  				regIndex := plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, v, lenRegs)
   475  				sels[regIndex] = append(sels[regIndex], int32(row))
   476  			}
   477  		}
   478  	case types.T_int32:
   479  		groupByCol := vector.MustFixedCol[int32](groupByVec)
   480  		if ap.ShuffleRangeInt64 != nil {
   481  			for row, v := range groupByCol {
   482  				regIndex := plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, int64(v))
   483  				sels[regIndex] = append(sels[regIndex], int32(row))
   484  			}
   485  		} else {
   486  			for row, v := range groupByCol {
   487  				regIndex := plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs)
   488  				sels[regIndex] = append(sels[regIndex], int32(row))
   489  			}
   490  		}
   491  	case types.T_int16:
   492  		groupByCol := vector.MustFixedCol[int16](groupByVec)
   493  		if ap.ShuffleRangeInt64 != nil {
   494  			for row, v := range groupByCol {
   495  				regIndex := plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, int64(v))
   496  				sels[regIndex] = append(sels[regIndex], int32(row))
   497  			}
   498  		} else {
   499  			for row, v := range groupByCol {
   500  				regIndex := plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs)
   501  				sels[regIndex] = append(sels[regIndex], int32(row))
   502  			}
   503  		}
   504  	case types.T_uint64:
   505  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   506  		if ap.ShuffleRangeUint64 != nil {
   507  			for row, v := range groupByCol {
   508  				regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v)
   509  				sels[regIndex] = append(sels[regIndex], int32(row))
   510  			}
   511  		} else {
   512  			for row, v := range groupByCol {
   513  				regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs)
   514  				sels[regIndex] = append(sels[regIndex], int32(row))
   515  			}
   516  		}
   517  	case types.T_uint32:
   518  		groupByCol := vector.MustFixedCol[uint32](groupByVec)
   519  		if ap.ShuffleRangeUint64 != nil {
   520  			for row, v := range groupByCol {
   521  				regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, uint64(v))
   522  				sels[regIndex] = append(sels[regIndex], int32(row))
   523  			}
   524  		} else {
   525  			for row, v := range groupByCol {
   526  				regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), uint64(v), lenRegs)
   527  				sels[regIndex] = append(sels[regIndex], int32(row))
   528  			}
   529  		}
   530  	case types.T_uint16:
   531  		groupByCol := vector.MustFixedCol[uint16](groupByVec)
   532  		if ap.ShuffleRangeUint64 != nil {
   533  			for row, v := range groupByCol {
   534  				regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, uint64(v))
   535  				sels[regIndex] = append(sels[regIndex], int32(row))
   536  			}
   537  		} else {
   538  			for row, v := range groupByCol {
   539  				regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), uint64(v), lenRegs)
   540  				sels[regIndex] = append(sels[regIndex], int32(row))
   541  			}
   542  		}
   543  	case types.T_char, types.T_varchar, types.T_text:
   544  		groupByCol, area := vector.MustVarlenaRawData(groupByVec)
   545  		if area == nil {
   546  			if ap.ShuffleRangeUint64 != nil {
   547  				for row := range groupByCol {
   548  					v := plan2.VarlenaToUint64Inline(&groupByCol[row])
   549  					regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v)
   550  					sels[regIndex] = append(sels[regIndex], int32(row))
   551  				}
   552  			} else {
   553  				for row := range groupByCol {
   554  					v := plan2.VarlenaToUint64Inline(&groupByCol[row])
   555  					regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs)
   556  					sels[regIndex] = append(sels[regIndex], int32(row))
   557  				}
   558  			}
   559  		} else {
   560  			if ap.ShuffleRangeUint64 != nil {
   561  				for row := range groupByCol {
   562  					v := plan2.VarlenaToUint64(&groupByCol[row], area)
   563  					regIndex := plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v)
   564  					sels[regIndex] = append(sels[regIndex], int32(row))
   565  				}
   566  			} else {
   567  				for row := range groupByCol {
   568  					v := plan2.VarlenaToUint64(&groupByCol[row], area)
   569  					regIndex := plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs)
   570  					sels[regIndex] = append(sels[regIndex], int32(row))
   571  				}
   572  			}
   573  		}
   574  	default:
   575  		panic("unsupported shuffle type, wrong plan!") //something got wrong here!
   576  	}
   577  	return sels
   578  }
   579  
   580  func getShuffledSelsByRangeWithNull(ap *Argument, bat *batch.Batch) [][]int32 {
   581  	sels := ap.getSels()
   582  	lenRegs := uint64(ap.AliveRegCnt)
   583  	groupByVec := bat.Vecs[ap.ShuffleColIdx]
   584  	switch groupByVec.GetType().Oid {
   585  	case types.T_bit:
   586  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   587  		if ap.ShuffleRangeUint64 != nil {
   588  			for row, v := range groupByCol {
   589  				var regIndex uint64 = 0
   590  				if !groupByVec.IsNull(uint64(row)) {
   591  					regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v)
   592  				}
   593  				sels[regIndex] = append(sels[regIndex], int32(row))
   594  			}
   595  		} else {
   596  			for row, v := range groupByCol {
   597  				var regIndex uint64 = 0
   598  				if !groupByVec.IsNull(uint64(row)) {
   599  					regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs)
   600  				}
   601  				sels[regIndex] = append(sels[regIndex], int32(row))
   602  			}
   603  		}
   604  	case types.T_int64:
   605  		groupByCol := vector.MustFixedCol[int64](groupByVec)
   606  		if ap.ShuffleRangeInt64 != nil {
   607  			for row, v := range groupByCol {
   608  				var regIndex uint64 = 0
   609  				if !groupByVec.IsNull(uint64(row)) {
   610  					regIndex = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, v)
   611  				}
   612  				sels[regIndex] = append(sels[regIndex], int32(row))
   613  			}
   614  		} else {
   615  			for row, v := range groupByCol {
   616  				var regIndex uint64 = 0
   617  				if !groupByVec.IsNull(uint64(row)) {
   618  					regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, v, lenRegs)
   619  				}
   620  				sels[regIndex] = append(sels[regIndex], int32(row))
   621  			}
   622  		}
   623  	case types.T_int32:
   624  		groupByCol := vector.MustFixedCol[int32](groupByVec)
   625  		if ap.ShuffleRangeInt64 != nil {
   626  			for row, v := range groupByCol {
   627  				var regIndex uint64 = 0
   628  				if !groupByVec.IsNull(uint64(row)) {
   629  					regIndex = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, int64(v))
   630  				}
   631  				sels[regIndex] = append(sels[regIndex], int32(row))
   632  			}
   633  		} else {
   634  			for row, v := range groupByCol {
   635  				var regIndex uint64 = 0
   636  				if !groupByVec.IsNull(uint64(row)) {
   637  					regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs)
   638  				}
   639  				sels[regIndex] = append(sels[regIndex], int32(row))
   640  			}
   641  		}
   642  	case types.T_int16:
   643  		groupByCol := vector.MustFixedCol[int16](groupByVec)
   644  		if ap.ShuffleRangeInt64 != nil {
   645  			for row, v := range groupByCol {
   646  				var regIndex uint64 = 0
   647  				if !groupByVec.IsNull(uint64(row)) {
   648  					regIndex = plan2.GetRangeShuffleIndexSignedSlice(ap.ShuffleRangeInt64, int64(v))
   649  				}
   650  				sels[regIndex] = append(sels[regIndex], int32(row))
   651  			}
   652  		} else {
   653  			for row, v := range groupByCol {
   654  				var regIndex uint64 = 0
   655  				if !groupByVec.IsNull(uint64(row)) {
   656  					regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs)
   657  				}
   658  				sels[regIndex] = append(sels[regIndex], int32(row))
   659  			}
   660  		}
   661  	case types.T_uint64:
   662  		groupByCol := vector.MustFixedCol[uint64](groupByVec)
   663  		if ap.ShuffleRangeUint64 != nil {
   664  			for row, v := range groupByCol {
   665  				var regIndex uint64 = 0
   666  				if !groupByVec.IsNull(uint64(row)) {
   667  					regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v)
   668  				}
   669  				sels[regIndex] = append(sels[regIndex], int32(row))
   670  			}
   671  		} else {
   672  			for row, v := range groupByCol {
   673  				var regIndex uint64 = 0
   674  				if !groupByVec.IsNull(uint64(row)) {
   675  					regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs)
   676  				}
   677  				sels[regIndex] = append(sels[regIndex], int32(row))
   678  			}
   679  		}
   680  	case types.T_uint32:
   681  		groupByCol := vector.MustFixedCol[uint32](groupByVec)
   682  		if ap.ShuffleRangeUint64 != nil {
   683  			for row, v := range groupByCol {
   684  				var regIndex uint64 = 0
   685  				if !groupByVec.IsNull(uint64(row)) {
   686  					regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, uint64(v))
   687  				}
   688  				sels[regIndex] = append(sels[regIndex], int32(row))
   689  			}
   690  		} else {
   691  			for row, v := range groupByCol {
   692  				var regIndex uint64 = 0
   693  				if !groupByVec.IsNull(uint64(row)) {
   694  					regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs)
   695  				}
   696  				sels[regIndex] = append(sels[regIndex], int32(row))
   697  			}
   698  		}
   699  	case types.T_uint16:
   700  		groupByCol := vector.MustFixedCol[uint16](groupByVec)
   701  		if ap.ShuffleRangeUint64 != nil {
   702  			for row, v := range groupByCol {
   703  				var regIndex uint64 = 0
   704  				if !groupByVec.IsNull(uint64(row)) {
   705  					regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, uint64(v))
   706  				}
   707  				sels[regIndex] = append(sels[regIndex], int32(row))
   708  			}
   709  		} else {
   710  			for row, v := range groupByCol {
   711  				var regIndex uint64 = 0
   712  				if !groupByVec.IsNull(uint64(row)) {
   713  					regIndex = plan2.GetRangeShuffleIndexSignedMinMax(ap.ShuffleColMin, ap.ShuffleColMax, int64(v), lenRegs)
   714  				}
   715  				sels[regIndex] = append(sels[regIndex], int32(row))
   716  			}
   717  		}
   718  	case types.T_char, types.T_varchar, types.T_text:
   719  		groupByCol, area := vector.MustVarlenaRawData(groupByVec)
   720  		if area == nil {
   721  			if ap.ShuffleRangeUint64 != nil {
   722  				for row := range groupByCol {
   723  					var regIndex uint64 = 0
   724  					if !groupByVec.IsNull(uint64(row)) {
   725  						v := plan2.VarlenaToUint64Inline(&groupByCol[row])
   726  						regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v)
   727  					}
   728  					sels[regIndex] = append(sels[regIndex], int32(row))
   729  				}
   730  			} else {
   731  				for row := range groupByCol {
   732  					var regIndex uint64 = 0
   733  					if !groupByVec.IsNull(uint64(row)) {
   734  						v := plan2.VarlenaToUint64Inline(&groupByCol[row])
   735  						regIndex = plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs)
   736  					}
   737  					sels[regIndex] = append(sels[regIndex], int32(row))
   738  				}
   739  			}
   740  		} else {
   741  			if ap.ShuffleRangeUint64 != nil {
   742  				for row := range groupByCol {
   743  					var regIndex uint64 = 0
   744  					if !groupByVec.IsNull(uint64(row)) {
   745  						v := plan2.VarlenaToUint64(&groupByCol[row], area)
   746  						regIndex = plan2.GetRangeShuffleIndexUnsignedSlice(ap.ShuffleRangeUint64, v)
   747  					}
   748  					sels[regIndex] = append(sels[regIndex], int32(row))
   749  				}
   750  			} else {
   751  				for row := range groupByCol {
   752  					var regIndex uint64 = 0
   753  					if !groupByVec.IsNull(uint64(row)) {
   754  						v := plan2.VarlenaToUint64(&groupByCol[row], area)
   755  						regIndex = plan2.GetRangeShuffleIndexUnsignedMinMax(uint64(ap.ShuffleColMin), uint64(ap.ShuffleColMax), v, lenRegs)
   756  					}
   757  					sels[regIndex] = append(sels[regIndex], int32(row))
   758  				}
   759  			}
   760  		}
   761  	default:
   762  		panic("unsupported shuffle type, wrong plan!") //something got wrong here!
   763  	}
   764  	return sels
   765  }
   766  
   767  func putBatchIntoShuffledPoolsBySels(ap *Argument, srcBatch *batch.Batch, sels [][]int32, proc *process.Process) error {
   768  	shuffledPool := ap.ctr.shufflePool
   769  	var err error
   770  	for regIndex := range shuffledPool {
   771  		newSels := sels[regIndex]
   772  		for len(newSels) > 0 {
   773  			bat := shuffledPool[regIndex]
   774  			if bat == nil {
   775  				bat, err = proc.NewBatchFromSrc(srcBatch, colexec.DefaultBatchSize)
   776  				if err != nil {
   777  					return err
   778  				}
   779  				bat.ShuffleIDX = regIndex
   780  				ap.ctr.shufflePool[regIndex] = bat
   781  			}
   782  			length := len(newSels)
   783  			if length+bat.RowCount() > colexec.DefaultBatchSize {
   784  				length = colexec.DefaultBatchSize - bat.RowCount()
   785  			}
   786  			for vecIndex := range bat.Vecs {
   787  				v := bat.Vecs[vecIndex]
   788  				v.SetSorted(false)
   789  				err = v.Union(srcBatch.Vecs[vecIndex], newSels[:length], proc.Mp())
   790  				if err != nil {
   791  					return err
   792  				}
   793  			}
   794  			bat.AddRowCount(length)
   795  			newSels = newSels[length:]
   796  			if bat.RowCount() == colexec.DefaultBatchSize {
   797  				ap.ctr.sendPool = append(ap.ctr.sendPool, bat)
   798  				shuffledPool[regIndex] = nil
   799  			}
   800  		}
   801  	}
   802  	return nil
   803  }
   804  
   805  func rangeShuffle(ap *Argument, bat *batch.Batch, proc *process.Process) (*batch.Batch, error) {
   806  	groupByVec := bat.Vecs[ap.ShuffleColIdx]
   807  	if groupByVec.GetSorted() || groupByVec.IsConst() {
   808  		ok, regIndex := allBatchInOneRange(ap, bat)
   809  		if ok {
   810  			bat.ShuffleIDX = int(regIndex)
   811  			return bat, nil
   812  		}
   813  	}
   814  	var sels [][]int32
   815  	if groupByVec.HasNull() {
   816  		sels = getShuffledSelsByRangeWithNull(ap, bat)
   817  	} else {
   818  		sels = getShuffledSelsByRangeWithoutNull(ap, bat)
   819  	}
   820  	for i := range sels {
   821  		if len(sels[i]) > 0 && len(sels[i]) != bat.RowCount() {
   822  			break
   823  		}
   824  		if len(sels[i]) == bat.RowCount() {
   825  			bat.ShuffleIDX = i
   826  			return bat, nil
   827  		}
   828  	}
   829  	err := putBatchIntoShuffledPoolsBySels(ap, bat, sels, proc)
   830  	return nil, err
   831  }