
     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    15  package plan
    17  import (
    18  	"math/bits"
    19  	"unsafe"
    21  	""
    23  	""
    24  	""
    25  	""
    26  	""
    27  	pb ""
    28  	""
    29  )
    31  const (
    32  	HashMapSizeForShuffle           = 160000
    33  	threshHoldForHybirdShuffle      = 4000000
    34  	MAXShuffleDOP                   = 64
    35  	ShuffleThreshHoldOfNDV          = 50000
    36  	ShuffleTypeThreshHoldLowerLimit = 16
    37  	ShuffleTypeThreshHoldUpperLimit = 1024
    38  )
    40  const (
    41  	ShuffleToRegIndex        int32 = 0
    42  	ShuffleToLocalMatchedReg int32 = 1
    43  	ShuffleToMultiMatchedReg int32 = 2
    44  )
    46  // convert first 8 bytes to uint64, slice might be less than 8 bytes
    47  func ByteSliceToUint64(bytes []byte) uint64 {
    48  	var result uint64 = 0
    49  	i := 0
    50  	length := len(bytes)
    51  	for ; i < 8; i++ {
    52  		result = result * 256
    53  		if i < length {
    54  			result += uint64(bytes[i])
    55  		}
    56  	}
    57  	return result
    58  }
    60  // convert first 8 bytes to uint64. vec.area must be nil
    61  // if varlena length less than 8 bytes, should have filled zero in varlena
    62  func VarlenaToUint64Inline(v *types.Varlena) uint64 {
    63  	return bits.ReverseBytes64(*(*uint64)(unsafe.Add(unsafe.Pointer(&v[0]), 1)))
    64  }
    66  // convert first 8 bytes to uint64
    67  func VarlenaToUint64(v *types.Varlena, area []byte) uint64 {
    68  	svlen := (*v)[0]
    69  	if svlen <= types.VarlenaInlineSize {
    70  		return VarlenaToUint64Inline(v)
    71  	} else {
    72  		voff, _ := v.OffsetLen()
    73  		return bits.ReverseBytes64(*(*uint64)(unsafe.Pointer(&area[voff])))
    74  	}
    75  }
    77  func SimpleCharHashToRange(bytes []byte, upperLimit uint64) uint64 {
    78  	lenBytes := len(bytes)
    79  	if lenBytes == 0 {
    80  		// always hash empty string to first bucket
    81  		return 0
    82  	}
    83  	//sample five bytes
    84  	h := (uint64(bytes[0])*(uint64(bytes[lenBytes/4])+uint64(bytes[lenBytes/2])+uint64(bytes[lenBytes*3/4])) + uint64(bytes[lenBytes-1]))
    85  	return hashtable.Int64HashWithFixedSeed(h) % upperLimit
    86  }
    88  func SimpleInt64HashToRange(i uint64, upperLimit uint64) uint64 {
    89  	return hashtable.Int64HashWithFixedSeed(i) % upperLimit
    90  }
    92  func GetCenterValueForZMSigned(zm objectio.ZoneMap) int64 {
    93  	switch zm.GetType() {
    94  	case types.T_int64:
    95  		return types.DecodeInt64(zm.GetMinBuf())/2 + types.DecodeInt64(zm.GetMaxBuf())/2
    96  	case types.T_int32:
    97  		return int64(types.DecodeInt32(zm.GetMinBuf()))/2 + int64(types.DecodeInt32(zm.GetMaxBuf()))/2
    98  	case types.T_int16:
    99  		return int64(types.DecodeInt16(zm.GetMinBuf()))/2 + int64(types.DecodeInt16(zm.GetMaxBuf()))/2
   100  	default:
   101  		panic("wrong type!")
   102  	}
   103  }
   105  func GetCenterValueForZMUnsigned(zm objectio.ZoneMap) uint64 {
   106  	switch zm.GetType() {
   107  	case types.T_bit:
   108  		return types.DecodeUint64(zm.GetMinBuf())/2 + types.DecodeUint64(zm.GetMaxBuf())/2
   109  	case types.T_uint64:
   110  		return types.DecodeUint64(zm.GetMinBuf())/2 + types.DecodeUint64(zm.GetMaxBuf())/2
   111  	case types.T_uint32:
   112  		return uint64(types.DecodeUint32(zm.GetMinBuf()))/2 + uint64(types.DecodeUint32(zm.GetMaxBuf()))/2
   113  	case types.T_uint16:
   114  		return uint64(types.DecodeUint16(zm.GetMinBuf()))/2 + uint64(types.DecodeUint16(zm.GetMaxBuf()))/2
   115  	case types.T_varchar, types.T_char, types.T_text:
   116  		return ByteSliceToUint64(zm.GetMinBuf())/2 + ByteSliceToUint64(zm.GetMaxBuf())/2
   117  	default:
   118  		panic("wrong type!")
   119  	}
   120  }
   122  func GetRangeShuffleIndexForZM(minVal, maxVal int64, zm objectio.ZoneMap, upplerLimit uint64) uint64 {
   123  	switch zm.GetType() {
   124  	case types.T_int64, types.T_int32, types.T_int16:
   125  		return GetRangeShuffleIndexSignedMinMax(minVal, maxVal, GetCenterValueForZMSigned(zm), upplerLimit)
   126  	case types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text, types.T_bit:
   127  		return GetRangeShuffleIndexUnsignedMinMax(uint64(minVal), uint64(maxVal), GetCenterValueForZMUnsigned(zm), upplerLimit)
   128  	}
   129  	panic("unsupported shuffle type!")
   130  }
   132  func GetRangeShuffleIndexForZMSignedSlice(val []int64, zm objectio.ZoneMap) uint64 {
   133  	switch zm.GetType() {
   134  	case types.T_int64, types.T_int32, types.T_int16:
   135  		return GetRangeShuffleIndexSignedSlice(val, GetCenterValueForZMSigned(zm))
   136  	}
   137  	panic("wrong type!")
   138  }
   140  func GetRangeShuffleIndexForZMUnsignedSlice(val []uint64, zm objectio.ZoneMap) uint64 {
   141  	switch zm.GetType() {
   142  	case types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text, types.T_bit:
   143  		return GetRangeShuffleIndexUnsignedSlice(val, GetCenterValueForZMUnsigned(zm))
   144  	}
   145  	panic("wrong type!")
   146  }
   148  func GetRangeShuffleIndexSignedMinMax(minVal, maxVal, currentVal int64, upplerLimit uint64) uint64 {
   149  	if currentVal <= minVal {
   150  		return 0
   151  	} else if currentVal >= maxVal {
   152  		return upplerLimit - 1
   153  	} else {
   154  		step := uint64(maxVal-minVal) / upplerLimit
   155  		ret := uint64(currentVal-minVal) / step
   156  		if ret >= upplerLimit {
   157  			return upplerLimit - 1
   158  		}
   159  		return ret
   160  	}
   161  }
   163  func GetRangeShuffleIndexUnsignedMinMax(minVal, maxVal, currentVal uint64, upplerLimit uint64) uint64 {
   164  	if currentVal <= minVal {
   165  		return 0
   166  	} else if currentVal >= maxVal {
   167  		return upplerLimit - 1
   168  	} else {
   169  		step := (maxVal - minVal) / upplerLimit
   170  		ret := (currentVal - minVal) / step
   171  		if ret >= upplerLimit {
   172  			return upplerLimit - 1
   173  		}
   174  		return ret
   175  	}
   176  }
   178  func GetRangeShuffleIndexSignedSlice(val []int64, currentVal int64) uint64 {
   179  	if currentVal <= val[0] {
   180  		return 0
   181  	}
   182  	left := 0
   183  	right := len(val) - 1
   184  	for left < right {
   185  		mid := (left + right) >> 1
   186  		if currentVal > val[mid] {
   187  			left = mid + 1
   188  		} else {
   189  			right = mid
   190  		}
   191  	}
   192  	if currentVal > val[right] {
   193  		right += 1
   194  	}
   195  	return uint64(right)
   196  }
   198  func GetRangeShuffleIndexUnsignedSlice(val []uint64, currentVal uint64) uint64 {
   199  	if currentVal <= val[0] {
   200  		return 0
   201  	}
   202  	left := 0
   203  	right := len(val) - 1
   204  	for left < right {
   205  		mid := (left + right) >> 1
   206  		if currentVal > val[mid] {
   207  			left = mid + 1
   208  		} else {
   209  			right = mid
   210  		}
   211  	}
   212  	if currentVal > val[right] {
   213  		right += 1
   214  	}
   215  	return uint64(right)
   216  }
   218  func GetHashColumn(expr *plan.Expr) (*plan.ColRef, int32) {
   219  	switch exprImpl := expr.Expr.(type) {
   220  	case *plan.Expr_F:
   221  		//do not support shuffle on expr for now. will improve this in the future
   222  		return nil, -1
   223  	case *plan.Expr_Col:
   224  		return exprImpl.Col, expr.Typ.Id
   225  	}
   226  	return nil, -1
   227  }
   229  func maybeSorted(n *plan.Node, builder *QueryBuilder, tag int32) bool {
   230  	// for scan node, primary key and cluster by may be sorted
   231  	if n.NodeType == plan.Node_TABLE_SCAN {
   232  		return n.BindingTags[0] == tag
   233  	}
   234  	// for inner join, if left child may be sorted, then inner join may be sorted
   235  	if n.NodeType == plan.Node_JOIN && n.JoinType == plan.Node_INNER {
   236  		leftChild := builder.qry.Nodes[n.Children[0]]
   237  		return maybeSorted(leftChild, builder, tag)
   238  	}
   239  	return false
   240  }
   242  func determinShuffleType(col *plan.ColRef, n *plan.Node, builder *QueryBuilder) {
   243  	// hash by default
   244  	n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Hash
   246  	if builder == nil {
   247  		return
   248  	}
   249  	tableDef, ok := builder.tag2Table[col.RelPos]
   250  	if !ok {
   251  		return
   252  	}
   253  	colName := tableDef.Cols[col.ColPos].Name
   255  	// for shuffle join, if left child is not sorted, the cost will be very high
   256  	// should use complex shuffle type
   257  	if n.NodeType == plan.Node_JOIN {
   258  		leftSorted := true
   259  		if GetSortOrder(tableDef, col.ColPos) != 0 {
   260  			leftSorted = false
   261  		}
   262  		if !maybeSorted(builder.qry.Nodes[n.Children[0]], builder, col.RelPos) {
   263  			leftSorted = false
   264  		}
   265  		if !leftSorted {
   266  			leftCost := builder.qry.Nodes[n.Children[0]].Stats.Outcnt
   267  			rightCost := builder.qry.Nodes[n.Children[1]].Stats.Outcnt
   268  			if n.BuildOnLeft {
   269  				// its better for right join to go shuffle, but can not go complex shuffle
   270  				if n.BuildOnLeft && leftCost > ShuffleTypeThreshHoldUpperLimit*rightCost {
   271  					return
   272  				}
   273  			} else if leftCost > ShuffleTypeThreshHoldLowerLimit*rightCost {
   274  				n.Stats.HashmapStats.ShuffleTypeForMultiCN = plan.ShuffleTypeForMultiCN_Hybrid
   275  			}
   276  		}
   277  	}
   279  	s := builder.getStatsInfoByTableID(tableDef.TblId)
   280  	if s == nil {
   281  		return
   282  	}
   283  	if shouldUseHashShuffle(s.ShuffleRangeMap[colName]) {
   284  		return
   285  	}
   286  	n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range
   287  	n.Stats.HashmapStats.ShuffleColMin = int64(s.MinValMap[colName])
   288  	n.Stats.HashmapStats.ShuffleColMax = int64(s.MaxValMap[colName])
   289  	n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[colName])
   290  	n.Stats.HashmapStats.Nullcnt = int64(s.NullCntMap[colName])
   291  }
   293  // to determine if join need to go shuffle
   294  func determinShuffleForJoin(n *plan.Node, builder *QueryBuilder) {
   295  	// do not shuffle by default
   296  	n.Stats.HashmapStats.ShuffleColIdx = -1
   297  	if n.NodeType != plan.Node_JOIN {
   298  		return
   299  	}
   300  	switch n.JoinType {
   301  	case plan.Node_INNER, plan.Node_ANTI, plan.Node_SEMI, plan.Node_LEFT, plan.Node_RIGHT:
   302  	default:
   303  		return
   304  	}
   306  	// for now, if join children is agg or filter, do not allow shuffle
   307  	if isAggOrFilter(builder.qry.Nodes[n.Children[0]], builder) || isAggOrFilter(builder.qry.Nodes[n.Children[1]], builder) {
   308  		return
   309  	}
   311  	if n.Stats.HashmapStats.HashmapSize < HashMapSizeForShuffle {
   312  		return
   313  	}
   314  	idx := 0
   315  	if !builder.IsEquiJoin(n) {
   316  		return
   317  	}
   318  	leftTags := make(map[int32]bool)
   319  	for _, tag := range builder.enumerateTags(n.Children[0]) {
   320  		leftTags[tag] = true
   321  	}
   322  	rightTags := make(map[int32]bool)
   323  	for _, tag := range builder.enumerateTags(n.Children[1]) {
   324  		rightTags[tag] = true
   325  	}
   326  	// for now ,only support the first join condition
   327  	for i := range n.OnList {
   328  		if isEquiCond(n.OnList[i], leftTags, rightTags) {
   329  			idx = i
   330  			break
   331  		}
   332  	}
   334  	//find the highest ndv
   335  	highestNDV := n.OnList[idx].Ndv
   336  	if highestNDV < ShuffleThreshHoldOfNDV {
   337  		return
   338  	}
   340  	// get the column of left child
   341  	var expr *plan.Expr
   342  	cond := n.OnList[idx]
   343  	switch condImpl := cond.Expr.(type) {
   344  	case *plan.Expr_F:
   345  		expr = condImpl.F.Args[0]
   346  	}
   348  	hashCol, typ := GetHashColumn(expr)
   349  	if hashCol == nil {
   350  		return
   351  	}
   352  	//for now ,only support integer and string type
   353  	switch types.T(typ) {
   354  	case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text:
   355  		n.Stats.HashmapStats.ShuffleColIdx = int32(idx)
   356  		n.Stats.HashmapStats.Shuffle = true
   357  		determinShuffleType(hashCol, n, builder)
   358  	}
   359  }
   361  // find agg or agg->filter node
   362  func isAggOrFilter(n *plan.Node, builder *QueryBuilder) bool {
   363  	if n.NodeType == plan.Node_AGG {
   364  		return true
   365  	} else if n.NodeType == plan.Node_FILTER {
   366  		if builder.qry.Nodes[n.Children[0]].NodeType == plan.Node_AGG {
   367  			return true
   368  		}
   369  	}
   370  	return false
   371  }
   373  // to determine if groupby need to go shuffle
   374  func determinShuffleForGroupBy(n *plan.Node, builder *QueryBuilder) {
   375  	// do not shuffle by default
   376  	n.Stats.HashmapStats.ShuffleColIdx = -1
   378  	if n.NodeType != plan.Node_AGG {
   379  		return
   380  	}
   381  	if len(n.GroupBy) == 0 {
   382  		return
   383  	}
   385  	child := builder.qry.Nodes[n.Children[0]]
   387  	// for now, if agg children is agg or filter, do not allow shuffle
   388  	if isAggOrFilter(child, builder) {
   389  		return
   390  	}
   392  	if n.Stats.HashmapStats.HashmapSize < HashMapSizeForShuffle {
   393  		return
   394  	}
   395  	//find the highest ndv
   396  	highestNDV := n.GroupBy[0].Ndv
   397  	idx := 0
   398  	for i := range n.GroupBy {
   399  		if n.GroupBy[i].Ndv > highestNDV {
   400  			highestNDV = n.GroupBy[i].Ndv
   401  			idx = i
   402  		}
   403  	}
   404  	if highestNDV < ShuffleThreshHoldOfNDV {
   405  		return
   406  	}
   408  	hashCol, typ := GetHashColumn(n.GroupBy[idx])
   409  	if hashCol == nil {
   410  		return
   411  	}
   412  	//for now ,only support integer and string type
   413  	switch types.T(typ) {
   414  	case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text:
   415  		n.Stats.HashmapStats.ShuffleColIdx = int32(idx)
   416  		n.Stats.HashmapStats.Shuffle = true
   417  		determinShuffleType(hashCol, n, builder)
   418  	}
   420  	//shuffle join-> shuffle group ,if they use the same hask key, the group can reuse the shuffle method
   421  	if child.NodeType == plan.Node_JOIN {
   422  		if n.Stats.HashmapStats.Shuffle && child.Stats.HashmapStats.Shuffle {
   423  			// shuffle group can reuse shuffle join
   424  			if n.Stats.HashmapStats.ShuffleType == child.Stats.HashmapStats.ShuffleType && n.Stats.HashmapStats.ShuffleTypeForMultiCN == child.Stats.HashmapStats.ShuffleTypeForMultiCN {
   425  				groupHashCol, _ := GetHashColumn(n.GroupBy[n.Stats.HashmapStats.ShuffleColIdx])
   426  				switch exprImpl := child.OnList[child.Stats.HashmapStats.ShuffleColIdx].Expr.(type) {
   427  				case *plan.Expr_F:
   428  					for _, arg := range exprImpl.F.Args {
   429  						joinHashCol, _ := GetHashColumn(arg)
   430  						if groupHashCol.RelPos == joinHashCol.RelPos && groupHashCol.ColPos == joinHashCol.ColPos {
   431  							n.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Reuse
   432  							return
   433  						}
   434  					}
   435  				}
   436  			}
   437  			// shuffle group can not follow shuffle join, need to reshuffle
   438  			n.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Reshuffle
   439  		}
   440  	}
   442  }
   444  func GetShuffleDop() (dop int) {
   445  	return MAXShuffleDOP
   446  }
   448  // default shuffle type for scan is hash
   449  // for table with primary key, and ndv of first column in primary key is high enough, use range shuffle
   450  // only support integer type
   451  func determinShuffleForScan(n *plan.Node, builder *QueryBuilder) {
   452  	n.Stats.HashmapStats.Shuffle = true
   453  	n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Hash
   454  	if builder.optimizerHints != nil && builder.optimizerHints.determineShuffle == 2 { // always go hashshuffle for scan
   455  		return
   456  	}
   457  	s := builder.getStatsInfoByTableID(n.TableDef.TblId)
   458  	if s == nil {
   459  		return
   460  	}
   462  	var firstSortColName string
   463  	if n.TableDef.ClusterBy != nil {
   464  		firstSortColName = util.GetClusterByFirstColumn(n.TableDef.ClusterBy.Name)
   465  	} else if n.TableDef.Pkey.PkeyColName == catalog.FakePrimaryKeyColName {
   466  		return
   467  	} else {
   468  		firstSortColName = n.TableDef.Pkey.Names[0]
   469  	}
   471  	if s.NdvMap[firstSortColName] < ShuffleThreshHoldOfNDV {
   472  		return
   473  	}
   474  	firstSortColID, ok := n.TableDef.Name2ColIndex[firstSortColName]
   475  	if !ok {
   476  		return
   477  	}
   478  	switch types.T(n.TableDef.Cols[firstSortColID].Typ.Id) {
   479  	case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_char, types.T_varchar, types.T_text:
   480  		n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range
   481  		n.Stats.HashmapStats.ShuffleColIdx = int32(n.TableDef.Cols[firstSortColID].Seqnum)
   482  		n.Stats.HashmapStats.ShuffleColMin = int64(s.MinValMap[firstSortColName])
   483  		n.Stats.HashmapStats.ShuffleColMax = int64(s.MaxValMap[firstSortColName])
   484  		n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[firstSortColName])
   485  		n.Stats.HashmapStats.Nullcnt = int64(s.NullCntMap[firstSortColName])
   486  	}
   487  }
   489  func determineShuffleMethod(nodeID int32, builder *QueryBuilder) {
   490  	if builder.optimizerHints != nil && builder.optimizerHints.determineShuffle == 1 {
   491  		return
   492  	}
   493  	node := builder.qry.Nodes[nodeID]
   494  	if len(node.Children) > 0 {
   495  		for _, child := range node.Children {
   496  			determineShuffleMethod(child, builder)
   497  		}
   498  	}
   499  	switch node.NodeType {
   500  	case plan.Node_AGG:
   501  		determinShuffleForGroupBy(node, builder)
   502  	case plan.Node_TABLE_SCAN:
   503  		determinShuffleForScan(node, builder)
   504  	case plan.Node_JOIN:
   505  		determinShuffleForJoin(node, builder)
   506  	default:
   507  	}
   508  }
   510  // second pass of determine shuffle
   511  func determineShuffleMethod2(nodeID, parentID int32, builder *QueryBuilder) {
   512  	if builder.optimizerHints != nil && builder.optimizerHints.determineShuffle == 1 {
   513  		return
   514  	}
   515  	node := builder.qry.Nodes[nodeID]
   516  	if len(node.Children) > 0 {
   517  		for _, child := range node.Children {
   518  			determineShuffleMethod2(child, nodeID, builder)
   519  		}
   520  	}
   521  	if parentID == -1 {
   522  		return
   523  	}
   524  	parent := builder.qry.Nodes[parentID]
   526  	if node.NodeType == plan.Node_JOIN && node.Stats.HashmapStats.ShuffleTypeForMultiCN == plan.ShuffleTypeForMultiCN_Hybrid {
   527  		if parent.NodeType == plan.Node_AGG && parent.Stats.HashmapStats.ShuffleMethod == plan.ShuffleMethod_Reuse {
   528  			return
   529  		}
   530  		if node.Stats.HashmapStats.HashmapSize <= threshHoldForHybirdShuffle {
   531  			node.Stats.HashmapStats.Shuffle = false
   532  			if parent.NodeType == plan.Node_AGG && parent.Stats.HashmapStats.ShuffleMethod == plan.ShuffleMethod_Reshuffle {
   533  				parent.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Normal
   534  			}
   535  		}
   536  	}
   537  }
   539  func shouldUseHashShuffle(s *pb.ShuffleRange) bool {
   540  	if s == nil {
   541  		return true
   542  	}
   543  	if s.Uniform > 0.3 {
   544  		return false
   545  	}
   546  	if s.Overlap > 0.5 {
   547  		return true
   548  	}
   549  	return true
   550  }
   552  func shouldUseShuffleRanges(s *pb.ShuffleRange) []float64 {
   553  	if s == nil {
   554  		return nil
   555  	}
   556  	if s.Uniform > 0.3 {
   557  		return nil
   558  	}
   559  	return s.Result
   560  }