github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/shuffle.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package plan 16 17 import ( 18 "math/bits" 19 "unsafe" 20 21 "github.com/matrixorigin/matrixone/pkg/catalog" 22 23 "github.com/matrixorigin/matrixone/pkg/container/hashtable" 24 "github.com/matrixorigin/matrixone/pkg/container/types" 25 "github.com/matrixorigin/matrixone/pkg/objectio" 26 "github.com/matrixorigin/matrixone/pkg/pb/plan" 27 pb "github.com/matrixorigin/matrixone/pkg/pb/statsinfo" 28 "github.com/matrixorigin/matrixone/pkg/sql/util" 29 ) 30 31 const ( 32 HashMapSizeForShuffle = 160000 33 threshHoldForHybirdShuffle = 4000000 34 MAXShuffleDOP = 64 35 ShuffleThreshHoldOfNDV = 50000 36 ShuffleTypeThreshHoldLowerLimit = 16 37 ShuffleTypeThreshHoldUpperLimit = 1024 38 ) 39 40 const ( 41 ShuffleToRegIndex int32 = 0 42 ShuffleToLocalMatchedReg int32 = 1 43 ShuffleToMultiMatchedReg int32 = 2 44 ) 45 46 // convert first 8 bytes to uint64, slice might be less than 8 bytes 47 func ByteSliceToUint64(bytes []byte) uint64 { 48 var result uint64 = 0 49 i := 0 50 length := len(bytes) 51 for ; i < 8; i++ { 52 result = result * 256 53 if i < length { 54 result += uint64(bytes[i]) 55 } 56 } 57 return result 58 } 59 60 // convert first 8 bytes to uint64. vec.area must be nil 61 // if varlena length less than 8 bytes, should have filled zero in varlena 62 func VarlenaToUint64Inline(v *types.Varlena) uint64 { 63 return bits.ReverseBytes64(*(*uint64)(unsafe.Add(unsafe.Pointer(&v[0]), 1))) 64 } 65 66 // convert first 8 bytes to uint64 67 func VarlenaToUint64(v *types.Varlena, area []byte) uint64 { 68 svlen := (*v)[0] 69 if svlen <= types.VarlenaInlineSize { 70 return VarlenaToUint64Inline(v) 71 } else { 72 voff, _ := v.OffsetLen() 73 return bits.ReverseBytes64(*(*uint64)(unsafe.Pointer(&area[voff]))) 74 } 75 } 76 77 func SimpleCharHashToRange(bytes []byte, upperLimit uint64) uint64 { 78 lenBytes := len(bytes) 79 if lenBytes == 0 { 80 // always hash empty string to first bucket 81 return 0 82 } 83 //sample five bytes 84 h := (uint64(bytes[0])*(uint64(bytes[lenBytes/4])+uint64(bytes[lenBytes/2])+uint64(bytes[lenBytes*3/4])) + uint64(bytes[lenBytes-1])) 85 return hashtable.Int64HashWithFixedSeed(h) % upperLimit 86 } 87 88 func SimpleInt64HashToRange(i uint64, upperLimit uint64) uint64 { 89 return hashtable.Int64HashWithFixedSeed(i) % upperLimit 90 } 91 92 func GetCenterValueForZMSigned(zm objectio.ZoneMap) int64 { 93 switch zm.GetType() { 94 case types.T_int64: 95 return types.DecodeInt64(zm.GetMinBuf())/2 + types.DecodeInt64(zm.GetMaxBuf())/2 96 case types.T_int32: 97 return int64(types.DecodeInt32(zm.GetMinBuf()))/2 + int64(types.DecodeInt32(zm.GetMaxBuf()))/2 98 case types.T_int16: 99 return int64(types.DecodeInt16(zm.GetMinBuf()))/2 + int64(types.DecodeInt16(zm.GetMaxBuf()))/2 100 default: 101 panic("wrong type!") 102 } 103 } 104 105 func GetCenterValueForZMUnsigned(zm objectio.ZoneMap) uint64 { 106 switch zm.GetType() { 107 case types.T_bit: 108 return types.DecodeUint64(zm.GetMinBuf())/2 + types.DecodeUint64(zm.GetMaxBuf())/2 109 case types.T_uint64: 110 return types.DecodeUint64(zm.GetMinBuf())/2 + types.DecodeUint64(zm.GetMaxBuf())/2 111 case types.T_uint32: 112 return uint64(types.DecodeUint32(zm.GetMinBuf()))/2 + uint64(types.DecodeUint32(zm.GetMaxBuf()))/2 113 case types.T_uint16: 114 return uint64(types.DecodeUint16(zm.GetMinBuf()))/2 + uint64(types.DecodeUint16(zm.GetMaxBuf()))/2 115 case types.T_varchar, types.T_char, types.T_text: 116 return ByteSliceToUint64(zm.GetMinBuf())/2 + ByteSliceToUint64(zm.GetMaxBuf())/2 117 default: 118 panic("wrong type!") 119 } 120 } 121 122 func GetRangeShuffleIndexForZM(minVal, maxVal int64, zm objectio.ZoneMap, upplerLimit uint64) uint64 { 123 switch zm.GetType() { 124 case types.T_int64, types.T_int32, types.T_int16: 125 return GetRangeShuffleIndexSignedMinMax(minVal, maxVal, GetCenterValueForZMSigned(zm), upplerLimit) 126 case types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text, types.T_bit: 127 return GetRangeShuffleIndexUnsignedMinMax(uint64(minVal), uint64(maxVal), GetCenterValueForZMUnsigned(zm), upplerLimit) 128 } 129 panic("unsupported shuffle type!") 130 } 131 132 func GetRangeShuffleIndexForZMSignedSlice(val []int64, zm objectio.ZoneMap) uint64 { 133 switch zm.GetType() { 134 case types.T_int64, types.T_int32, types.T_int16: 135 return GetRangeShuffleIndexSignedSlice(val, GetCenterValueForZMSigned(zm)) 136 } 137 panic("wrong type!") 138 } 139 140 func GetRangeShuffleIndexForZMUnsignedSlice(val []uint64, zm objectio.ZoneMap) uint64 { 141 switch zm.GetType() { 142 case types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text, types.T_bit: 143 return GetRangeShuffleIndexUnsignedSlice(val, GetCenterValueForZMUnsigned(zm)) 144 } 145 panic("wrong type!") 146 } 147 148 func GetRangeShuffleIndexSignedMinMax(minVal, maxVal, currentVal int64, upplerLimit uint64) uint64 { 149 if currentVal <= minVal { 150 return 0 151 } else if currentVal >= maxVal { 152 return upplerLimit - 1 153 } else { 154 step := uint64(maxVal-minVal) / upplerLimit 155 ret := uint64(currentVal-minVal) / step 156 if ret >= upplerLimit { 157 return upplerLimit - 1 158 } 159 return ret 160 } 161 } 162 163 func GetRangeShuffleIndexUnsignedMinMax(minVal, maxVal, currentVal uint64, upplerLimit uint64) uint64 { 164 if currentVal <= minVal { 165 return 0 166 } else if currentVal >= maxVal { 167 return upplerLimit - 1 168 } else { 169 step := (maxVal - minVal) / upplerLimit 170 ret := (currentVal - minVal) / step 171 if ret >= upplerLimit { 172 return upplerLimit - 1 173 } 174 return ret 175 } 176 } 177 178 func GetRangeShuffleIndexSignedSlice(val []int64, currentVal int64) uint64 { 179 if currentVal <= val[0] { 180 return 0 181 } 182 left := 0 183 right := len(val) - 1 184 for left < right { 185 mid := (left + right) >> 1 186 if currentVal > val[mid] { 187 left = mid + 1 188 } else { 189 right = mid 190 } 191 } 192 if currentVal > val[right] { 193 right += 1 194 } 195 return uint64(right) 196 } 197 198 func GetRangeShuffleIndexUnsignedSlice(val []uint64, currentVal uint64) uint64 { 199 if currentVal <= val[0] { 200 return 0 201 } 202 left := 0 203 right := len(val) - 1 204 for left < right { 205 mid := (left + right) >> 1 206 if currentVal > val[mid] { 207 left = mid + 1 208 } else { 209 right = mid 210 } 211 } 212 if currentVal > val[right] { 213 right += 1 214 } 215 return uint64(right) 216 } 217 218 func GetHashColumn(expr *plan.Expr) (*plan.ColRef, int32) { 219 switch exprImpl := expr.Expr.(type) { 220 case *plan.Expr_F: 221 //do not support shuffle on expr for now. will improve this in the future 222 return nil, -1 223 case *plan.Expr_Col: 224 return exprImpl.Col, expr.Typ.Id 225 } 226 return nil, -1 227 } 228 229 func maybeSorted(n *plan.Node, builder *QueryBuilder, tag int32) bool { 230 // for scan node, primary key and cluster by may be sorted 231 if n.NodeType == plan.Node_TABLE_SCAN { 232 return n.BindingTags[0] == tag 233 } 234 // for inner join, if left child may be sorted, then inner join may be sorted 235 if n.NodeType == plan.Node_JOIN && n.JoinType == plan.Node_INNER { 236 leftChild := builder.qry.Nodes[n.Children[0]] 237 return maybeSorted(leftChild, builder, tag) 238 } 239 return false 240 } 241 242 func determinShuffleType(col *plan.ColRef, n *plan.Node, builder *QueryBuilder) { 243 // hash by default 244 n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Hash 245 246 if builder == nil { 247 return 248 } 249 tableDef, ok := builder.tag2Table[col.RelPos] 250 if !ok { 251 return 252 } 253 colName := tableDef.Cols[col.ColPos].Name 254 255 // for shuffle join, if left child is not sorted, the cost will be very high 256 // should use complex shuffle type 257 if n.NodeType == plan.Node_JOIN { 258 leftSorted := true 259 if GetSortOrder(tableDef, col.ColPos) != 0 { 260 leftSorted = false 261 } 262 if !maybeSorted(builder.qry.Nodes[n.Children[0]], builder, col.RelPos) { 263 leftSorted = false 264 } 265 if !leftSorted { 266 leftCost := builder.qry.Nodes[n.Children[0]].Stats.Outcnt 267 rightCost := builder.qry.Nodes[n.Children[1]].Stats.Outcnt 268 if n.BuildOnLeft { 269 // its better for right join to go shuffle, but can not go complex shuffle 270 if n.BuildOnLeft && leftCost > ShuffleTypeThreshHoldUpperLimit*rightCost { 271 return 272 } 273 } else if leftCost > ShuffleTypeThreshHoldLowerLimit*rightCost { 274 n.Stats.HashmapStats.ShuffleTypeForMultiCN = plan.ShuffleTypeForMultiCN_Hybrid 275 } 276 } 277 } 278 279 s := builder.getStatsInfoByTableID(tableDef.TblId) 280 if s == nil { 281 return 282 } 283 if shouldUseHashShuffle(s.ShuffleRangeMap[colName]) { 284 return 285 } 286 n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range 287 n.Stats.HashmapStats.ShuffleColMin = int64(s.MinValMap[colName]) 288 n.Stats.HashmapStats.ShuffleColMax = int64(s.MaxValMap[colName]) 289 n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[colName]) 290 n.Stats.HashmapStats.Nullcnt = int64(s.NullCntMap[colName]) 291 } 292 293 // to determine if join need to go shuffle 294 func determinShuffleForJoin(n *plan.Node, builder *QueryBuilder) { 295 // do not shuffle by default 296 n.Stats.HashmapStats.ShuffleColIdx = -1 297 if n.NodeType != plan.Node_JOIN { 298 return 299 } 300 switch n.JoinType { 301 case plan.Node_INNER, plan.Node_ANTI, plan.Node_SEMI, plan.Node_LEFT, plan.Node_RIGHT: 302 default: 303 return 304 } 305 306 // for now, if join children is agg or filter, do not allow shuffle 307 if isAggOrFilter(builder.qry.Nodes[n.Children[0]], builder) || isAggOrFilter(builder.qry.Nodes[n.Children[1]], builder) { 308 return 309 } 310 311 if n.Stats.HashmapStats.HashmapSize < HashMapSizeForShuffle { 312 return 313 } 314 idx := 0 315 if !builder.IsEquiJoin(n) { 316 return 317 } 318 leftTags := make(map[int32]bool) 319 for _, tag := range builder.enumerateTags(n.Children[0]) { 320 leftTags[tag] = true 321 } 322 rightTags := make(map[int32]bool) 323 for _, tag := range builder.enumerateTags(n.Children[1]) { 324 rightTags[tag] = true 325 } 326 // for now ,only support the first join condition 327 for i := range n.OnList { 328 if isEquiCond(n.OnList[i], leftTags, rightTags) { 329 idx = i 330 break 331 } 332 } 333 334 //find the highest ndv 335 highestNDV := n.OnList[idx].Ndv 336 if highestNDV < ShuffleThreshHoldOfNDV { 337 return 338 } 339 340 // get the column of left child 341 var expr *plan.Expr 342 cond := n.OnList[idx] 343 switch condImpl := cond.Expr.(type) { 344 case *plan.Expr_F: 345 expr = condImpl.F.Args[0] 346 } 347 348 hashCol, typ := GetHashColumn(expr) 349 if hashCol == nil { 350 return 351 } 352 //for now ,only support integer and string type 353 switch types.T(typ) { 354 case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text: 355 n.Stats.HashmapStats.ShuffleColIdx = int32(idx) 356 n.Stats.HashmapStats.Shuffle = true 357 determinShuffleType(hashCol, n, builder) 358 } 359 } 360 361 // find agg or agg->filter node 362 func isAggOrFilter(n *plan.Node, builder *QueryBuilder) bool { 363 if n.NodeType == plan.Node_AGG { 364 return true 365 } else if n.NodeType == plan.Node_FILTER { 366 if builder.qry.Nodes[n.Children[0]].NodeType == plan.Node_AGG { 367 return true 368 } 369 } 370 return false 371 } 372 373 // to determine if groupby need to go shuffle 374 func determinShuffleForGroupBy(n *plan.Node, builder *QueryBuilder) { 375 // do not shuffle by default 376 n.Stats.HashmapStats.ShuffleColIdx = -1 377 378 if n.NodeType != plan.Node_AGG { 379 return 380 } 381 if len(n.GroupBy) == 0 { 382 return 383 } 384 385 child := builder.qry.Nodes[n.Children[0]] 386 387 // for now, if agg children is agg or filter, do not allow shuffle 388 if isAggOrFilter(child, builder) { 389 return 390 } 391 392 if n.Stats.HashmapStats.HashmapSize < HashMapSizeForShuffle { 393 return 394 } 395 //find the highest ndv 396 highestNDV := n.GroupBy[0].Ndv 397 idx := 0 398 for i := range n.GroupBy { 399 if n.GroupBy[i].Ndv > highestNDV { 400 highestNDV = n.GroupBy[i].Ndv 401 idx = i 402 } 403 } 404 if highestNDV < ShuffleThreshHoldOfNDV { 405 return 406 } 407 408 hashCol, typ := GetHashColumn(n.GroupBy[idx]) 409 if hashCol == nil { 410 return 411 } 412 //for now ,only support integer and string type 413 switch types.T(typ) { 414 case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text: 415 n.Stats.HashmapStats.ShuffleColIdx = int32(idx) 416 n.Stats.HashmapStats.Shuffle = true 417 determinShuffleType(hashCol, n, builder) 418 } 419 420 //shuffle join-> shuffle group ,if they use the same hask key, the group can reuse the shuffle method 421 if child.NodeType == plan.Node_JOIN { 422 if n.Stats.HashmapStats.Shuffle && child.Stats.HashmapStats.Shuffle { 423 // shuffle group can reuse shuffle join 424 if n.Stats.HashmapStats.ShuffleType == child.Stats.HashmapStats.ShuffleType && n.Stats.HashmapStats.ShuffleTypeForMultiCN == child.Stats.HashmapStats.ShuffleTypeForMultiCN { 425 groupHashCol, _ := GetHashColumn(n.GroupBy[n.Stats.HashmapStats.ShuffleColIdx]) 426 switch exprImpl := child.OnList[child.Stats.HashmapStats.ShuffleColIdx].Expr.(type) { 427 case *plan.Expr_F: 428 for _, arg := range exprImpl.F.Args { 429 joinHashCol, _ := GetHashColumn(arg) 430 if groupHashCol.RelPos == joinHashCol.RelPos && groupHashCol.ColPos == joinHashCol.ColPos { 431 n.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Reuse 432 return 433 } 434 } 435 } 436 } 437 // shuffle group can not follow shuffle join, need to reshuffle 438 n.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Reshuffle 439 } 440 } 441 442 } 443 444 func GetShuffleDop() (dop int) { 445 return MAXShuffleDOP 446 } 447 448 // default shuffle type for scan is hash 449 // for table with primary key, and ndv of first column in primary key is high enough, use range shuffle 450 // only support integer type 451 func determinShuffleForScan(n *plan.Node, builder *QueryBuilder) { 452 n.Stats.HashmapStats.Shuffle = true 453 n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Hash 454 if builder.optimizerHints != nil && builder.optimizerHints.determineShuffle == 2 { // always go hashshuffle for scan 455 return 456 } 457 s := builder.getStatsInfoByTableID(n.TableDef.TblId) 458 if s == nil { 459 return 460 } 461 462 var firstSortColName string 463 if n.TableDef.ClusterBy != nil { 464 firstSortColName = util.GetClusterByFirstColumn(n.TableDef.ClusterBy.Name) 465 } else if n.TableDef.Pkey.PkeyColName == catalog.FakePrimaryKeyColName { 466 return 467 } else { 468 firstSortColName = n.TableDef.Pkey.Names[0] 469 } 470 471 if s.NdvMap[firstSortColName] < ShuffleThreshHoldOfNDV { 472 return 473 } 474 firstSortColID, ok := n.TableDef.Name2ColIndex[firstSortColName] 475 if !ok { 476 return 477 } 478 switch types.T(n.TableDef.Cols[firstSortColID].Typ.Id) { 479 case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_char, types.T_varchar, types.T_text: 480 n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range 481 n.Stats.HashmapStats.ShuffleColIdx = int32(n.TableDef.Cols[firstSortColID].Seqnum) 482 n.Stats.HashmapStats.ShuffleColMin = int64(s.MinValMap[firstSortColName]) 483 n.Stats.HashmapStats.ShuffleColMax = int64(s.MaxValMap[firstSortColName]) 484 n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[firstSortColName]) 485 n.Stats.HashmapStats.Nullcnt = int64(s.NullCntMap[firstSortColName]) 486 } 487 } 488 489 func determineShuffleMethod(nodeID int32, builder *QueryBuilder) { 490 if builder.optimizerHints != nil && builder.optimizerHints.determineShuffle == 1 { 491 return 492 } 493 node := builder.qry.Nodes[nodeID] 494 if len(node.Children) > 0 { 495 for _, child := range node.Children { 496 determineShuffleMethod(child, builder) 497 } 498 } 499 switch node.NodeType { 500 case plan.Node_AGG: 501 determinShuffleForGroupBy(node, builder) 502 case plan.Node_TABLE_SCAN: 503 determinShuffleForScan(node, builder) 504 case plan.Node_JOIN: 505 determinShuffleForJoin(node, builder) 506 default: 507 } 508 } 509 510 // second pass of determine shuffle 511 func determineShuffleMethod2(nodeID, parentID int32, builder *QueryBuilder) { 512 if builder.optimizerHints != nil && builder.optimizerHints.determineShuffle == 1 { 513 return 514 } 515 node := builder.qry.Nodes[nodeID] 516 if len(node.Children) > 0 { 517 for _, child := range node.Children { 518 determineShuffleMethod2(child, nodeID, builder) 519 } 520 } 521 if parentID == -1 { 522 return 523 } 524 parent := builder.qry.Nodes[parentID] 525 526 if node.NodeType == plan.Node_JOIN && node.Stats.HashmapStats.ShuffleTypeForMultiCN == plan.ShuffleTypeForMultiCN_Hybrid { 527 if parent.NodeType == plan.Node_AGG && parent.Stats.HashmapStats.ShuffleMethod == plan.ShuffleMethod_Reuse { 528 return 529 } 530 if node.Stats.HashmapStats.HashmapSize <= threshHoldForHybirdShuffle { 531 node.Stats.HashmapStats.Shuffle = false 532 if parent.NodeType == plan.Node_AGG && parent.Stats.HashmapStats.ShuffleMethod == plan.ShuffleMethod_Reshuffle { 533 parent.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Normal 534 } 535 } 536 } 537 } 538 539 func shouldUseHashShuffle(s *pb.ShuffleRange) bool { 540 if s == nil { 541 return true 542 } 543 if s.Uniform > 0.3 { 544 return false 545 } 546 if s.Overlap > 0.5 { 547 return true 548 } 549 return true 550 } 551 552 func shouldUseShuffleRanges(s *pb.ShuffleRange) []float64 { 553 if s == nil { 554 return nil 555 } 556 if s.Uniform > 0.3 { 557 return nil 558 } 559 return s.Result 560 }