github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/stats.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package plan 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "math" 22 "sort" 23 "strings" 24 "time" 25 26 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 27 28 "github.com/matrixorigin/matrixone/pkg/catalog" 29 "github.com/matrixorigin/matrixone/pkg/container/batch" 30 "github.com/matrixorigin/matrixone/pkg/container/types" 31 "github.com/matrixorigin/matrixone/pkg/logutil" 32 "github.com/matrixorigin/matrixone/pkg/objectio" 33 "github.com/matrixorigin/matrixone/pkg/pb/plan" 34 pb "github.com/matrixorigin/matrixone/pkg/pb/statsinfo" 35 "github.com/matrixorigin/matrixone/pkg/sql/util" 36 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 37 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/options" 38 "github.com/matrixorigin/matrixone/pkg/vm/process" 39 ) 40 41 const DefaultBlockMaxRows = 8192 42 const BlockNumForceOneCN = 200 43 const highNDVcolumnThreshHold = 0.95 44 const statsCacheInitSize = 128 45 const statsCacheMaxSize = 8192 46 47 type StatsCache struct { 48 cache map[uint64]*pb.StatsInfo 49 } 50 51 func NewStatsCache() *StatsCache { 52 return &StatsCache{ 53 cache: make(map[uint64]*pb.StatsInfo, statsCacheInitSize), 54 } 55 } 56 57 // GetStatsInfo returns the stats info and if the info in the cache needs to be updated. 58 func (sc *StatsCache) GetStatsInfo(tableID uint64, create bool) *pb.StatsInfo { 59 if sc == nil { 60 return nil 61 } 62 if s, ok := sc.cache[tableID]; ok { 63 return s 64 } 65 if create { 66 if len(sc.cache) > statsCacheMaxSize { 67 sc.cache = make(map[uint64]*pb.StatsInfo, statsCacheInitSize) 68 logutil.Infof("statscache entries more than %v in long session, release memory and create new cachepool", statsCacheMaxSize) 69 } 70 s := NewStatsInfo() 71 sc.cache[tableID] = s 72 return s 73 } else { 74 return nil 75 } 76 } 77 78 // SetStatsInfo updates the stats info in the cache. 79 func (sc *StatsCache) SetStatsInfo(tableID uint64, s *pb.StatsInfo) { 80 if sc == nil { 81 return 82 } 83 sc.cache[tableID] = s 84 } 85 86 func NewStatsInfo() *pb.StatsInfo { 87 return &pb.StatsInfo{ 88 NdvMap: make(map[string]float64), 89 MinValMap: make(map[string]float64), 90 MaxValMap: make(map[string]float64), 91 DataTypeMap: make(map[string]uint64), 92 NullCntMap: make(map[string]uint64), 93 SizeMap: make(map[string]uint64), 94 ShuffleRangeMap: make(map[string]*pb.ShuffleRange), 95 BlockNumber: 0, 96 ApproxObjectNumber: 0, 97 TableCnt: 0, 98 } 99 } 100 101 type InfoFromZoneMap struct { 102 ColumnZMs []objectio.ZoneMap 103 DataTypes []types.Type 104 ColumnNDVs []float64 105 NullCnts []int64 106 ShuffleRanges []*pb.ShuffleRange 107 ColumnSize []int64 108 BlockNumber int64 109 AccurateObjectNumber int64 110 ApproxObjectNumber int64 111 TableCnt float64 112 } 113 114 func NewInfoFromZoneMap(lenCols int) *InfoFromZoneMap { 115 info := &InfoFromZoneMap{ 116 ColumnZMs: make([]objectio.ZoneMap, lenCols), 117 DataTypes: make([]types.Type, lenCols), 118 ColumnNDVs: make([]float64, lenCols), 119 NullCnts: make([]int64, lenCols), 120 ColumnSize: make([]int64, lenCols), 121 ShuffleRanges: make([]*pb.ShuffleRange, lenCols), 122 } 123 return info 124 } 125 126 func UpdateStatsInfo(info *InfoFromZoneMap, tableDef *plan.TableDef, s *pb.StatsInfo) { 127 start := time.Now() 128 defer func() { 129 v2.TxnStatementUpdateStatsInfoMapHistogram.Observe(time.Since(start).Seconds()) 130 }() 131 s.ApproxObjectNumber = info.ApproxObjectNumber 132 s.AccurateObjectNumber = info.AccurateObjectNumber 133 s.BlockNumber = info.BlockNumber 134 s.TableCnt = info.TableCnt 135 s.TableName = tableDef.Name 136 //calc ndv with min,max,distinct value in zonemap, blocknumer and column type 137 //set info in statsInfo 138 for i, coldef := range tableDef.Cols[:len(tableDef.Cols)-1] { 139 colName := coldef.Name 140 s.NdvMap[colName] = info.ColumnNDVs[i] 141 s.DataTypeMap[colName] = uint64(info.DataTypes[i].Oid) 142 s.NullCntMap[colName] = uint64(info.NullCnts[i]) 143 s.SizeMap[colName] = uint64(info.ColumnSize[i]) 144 145 if !info.ColumnZMs[i].IsInited() { 146 s.MinValMap[colName] = 0 147 s.MaxValMap[colName] = 0 148 continue 149 } 150 switch info.DataTypes[i].Oid { 151 case types.T_bit: 152 s.MinValMap[colName] = float64(types.DecodeUint64(info.ColumnZMs[i].GetMinBuf())) 153 s.MaxValMap[colName] = float64(types.DecodeUint64(info.ColumnZMs[i].GetMaxBuf())) 154 case types.T_int8: 155 s.MinValMap[colName] = float64(types.DecodeInt8(info.ColumnZMs[i].GetMinBuf())) 156 s.MaxValMap[colName] = float64(types.DecodeInt8(info.ColumnZMs[i].GetMaxBuf())) 157 case types.T_int16: 158 s.MinValMap[colName] = float64(types.DecodeInt16(info.ColumnZMs[i].GetMinBuf())) 159 s.MaxValMap[colName] = float64(types.DecodeInt16(info.ColumnZMs[i].GetMaxBuf())) 160 case types.T_int32: 161 s.MinValMap[colName] = float64(types.DecodeInt32(info.ColumnZMs[i].GetMinBuf())) 162 s.MaxValMap[colName] = float64(types.DecodeInt32(info.ColumnZMs[i].GetMaxBuf())) 163 case types.T_int64: 164 s.MinValMap[colName] = float64(types.DecodeInt64(info.ColumnZMs[i].GetMinBuf())) 165 s.MaxValMap[colName] = float64(types.DecodeInt64(info.ColumnZMs[i].GetMaxBuf())) 166 case types.T_uint8: 167 s.MinValMap[colName] = float64(types.DecodeUint8(info.ColumnZMs[i].GetMinBuf())) 168 s.MaxValMap[colName] = float64(types.DecodeUint8(info.ColumnZMs[i].GetMaxBuf())) 169 case types.T_uint16: 170 s.MinValMap[colName] = float64(types.DecodeUint16(info.ColumnZMs[i].GetMinBuf())) 171 s.MaxValMap[colName] = float64(types.DecodeUint16(info.ColumnZMs[i].GetMaxBuf())) 172 case types.T_uint32: 173 s.MinValMap[colName] = float64(types.DecodeUint32(info.ColumnZMs[i].GetMinBuf())) 174 s.MaxValMap[colName] = float64(types.DecodeUint32(info.ColumnZMs[i].GetMaxBuf())) 175 case types.T_uint64: 176 s.MinValMap[colName] = float64(types.DecodeUint64(info.ColumnZMs[i].GetMinBuf())) 177 s.MaxValMap[colName] = float64(types.DecodeUint64(info.ColumnZMs[i].GetMaxBuf())) 178 case types.T_date: 179 s.MinValMap[colName] = float64(types.DecodeDate(info.ColumnZMs[i].GetMinBuf())) 180 s.MaxValMap[colName] = float64(types.DecodeDate(info.ColumnZMs[i].GetMaxBuf())) 181 case types.T_time: 182 s.MinValMap[colName] = float64(types.DecodeTime(info.ColumnZMs[i].GetMinBuf())) 183 s.MaxValMap[colName] = float64(types.DecodeTime(info.ColumnZMs[i].GetMaxBuf())) 184 case types.T_timestamp: 185 s.MinValMap[colName] = float64(types.DecodeTimestamp(info.ColumnZMs[i].GetMinBuf())) 186 s.MaxValMap[colName] = float64(types.DecodeTimestamp(info.ColumnZMs[i].GetMaxBuf())) 187 case types.T_datetime: 188 s.MinValMap[colName] = float64(types.DecodeDatetime(info.ColumnZMs[i].GetMinBuf())) 189 s.MaxValMap[colName] = float64(types.DecodeDatetime(info.ColumnZMs[i].GetMaxBuf())) 190 case types.T_char, types.T_varchar, types.T_text: 191 s.MinValMap[colName] = float64(ByteSliceToUint64(info.ColumnZMs[i].GetMinBuf())) 192 s.MaxValMap[colName] = float64(ByteSliceToUint64(info.ColumnZMs[i].GetMaxBuf())) 193 } 194 195 if info.ShuffleRanges[i] != nil { 196 if s.MinValMap[colName] != s.MaxValMap[colName] && 197 s.TableCnt > HashMapSizeForShuffle && 198 info.ColumnNDVs[i] >= ShuffleThreshHoldOfNDV && 199 !util.JudgeIsCompositeClusterByColumn(colName) && 200 colName != catalog.CPrimaryKeyColName { 201 info.ShuffleRanges[i].Eval() 202 s.ShuffleRangeMap[colName] = info.ShuffleRanges[i] 203 } 204 info.ShuffleRanges[i] = nil 205 } 206 } 207 } 208 209 // cols in one table, return if ndv of multi column is high enough 210 func isHighNdvCols(cols []int32, tableDef *TableDef, builder *QueryBuilder) bool { 211 if tableDef == nil { 212 return false 213 } 214 // first to check if it is primary key. 215 if containsAllPKs(cols, tableDef) { 216 return true 217 } 218 219 s := builder.getStatsInfoByTableID(tableDef.TblId) 220 if s == nil { 221 return false 222 } 223 var totalNDV float64 = 1 224 for i := range cols { 225 totalNDV *= s.NdvMap[tableDef.Cols[cols[i]].Name] 226 } 227 return totalNDV > s.TableCnt*highNDVcolumnThreshHold 228 } 229 230 func (builder *QueryBuilder) getColNDVRatio(cols []int32, tableDef *TableDef) float64 { 231 if tableDef == nil { 232 return 0 233 } 234 // first to check if it is primary key. 235 if containsAllPKs(cols, tableDef) { 236 return 1 237 } 238 239 s := builder.getStatsInfoByTableID(tableDef.TblId) 240 if s == nil { 241 return 0 242 } 243 var totalNDV float64 = 1 244 for i := range cols { 245 totalNDV *= s.NdvMap[tableDef.Cols[cols[i]].Name] 246 } 247 result := totalNDV / s.TableCnt 248 if result > 1 { 249 result = 1 250 } 251 return result 252 } 253 254 func (builder *QueryBuilder) getStatsInfoByTableID(tableID uint64) *pb.StatsInfo { 255 if builder == nil { 256 return nil 257 } 258 sc := builder.compCtx.GetStatsCache() 259 if sc == nil { 260 return nil 261 } 262 return sc.GetStatsInfo(tableID, false) 263 } 264 265 func (builder *QueryBuilder) getStatsInfoByCol(col *plan.ColRef) *pb.StatsInfo { 266 if builder == nil { 267 return nil 268 } 269 sc := builder.compCtx.GetStatsCache() 270 if sc == nil { 271 return nil 272 } 273 tableDef, ok := builder.tag2Table[col.RelPos] 274 if !ok { 275 return nil 276 } 277 //fix column name 278 if len(col.Name) == 0 { 279 col.Name = tableDef.Cols[col.ColPos].Name 280 } 281 return sc.GetStatsInfo(tableDef.TblId, false) 282 } 283 284 func (builder *QueryBuilder) getColNdv(col *plan.ColRef) float64 { 285 s := builder.getStatsInfoByCol(col) 286 if s == nil { 287 return -1 288 } 289 return s.NdvMap[col.Name] 290 } 291 292 func getNullSelectivity(arg *plan.Expr, builder *QueryBuilder, isnull bool) float64 { 293 switch exprImpl := arg.Expr.(type) { 294 case *plan.Expr_Col: 295 col := exprImpl.Col 296 s := builder.getStatsInfoByCol(col) 297 if s == nil { 298 break 299 } 300 nullCnt := float64(s.NullCntMap[col.Name]) 301 if isnull { 302 return nullCnt / s.TableCnt 303 } else { 304 return 1 - (nullCnt / s.TableCnt) 305 } 306 } 307 308 if isnull { 309 return 0.1 310 } else { 311 return 0.9 312 } 313 } 314 315 // this function is used to calculate the ndv of expressions, 316 // like year(l_orderdate), substring(phone_number), and assume col is the first argument 317 // if only the ndv of column is needed, please call getColNDV 318 // if this function fail, it will return -1 319 func getExprNdv(expr *plan.Expr, builder *QueryBuilder) float64 { 320 switch exprImpl := expr.Expr.(type) { 321 case *plan.Expr_F: 322 funcName := exprImpl.F.Func.ObjName 323 switch funcName { 324 case "year": 325 return getExprNdv(exprImpl.F.Args[0], builder) / 365 326 case "substring": 327 // no good way to calc ndv for substring 328 return math.Min(getExprNdv(exprImpl.F.Args[0], builder), 25) 329 default: 330 return getExprNdv(exprImpl.F.Args[0], builder) 331 } 332 case *plan.Expr_Col: 333 return builder.getColNdv(exprImpl.Col) 334 } 335 return -1 336 } 337 338 func estimateEqualitySelectivity(expr *plan.Expr, builder *QueryBuilder) float64 { 339 // only filter like func(col)=1 or col=? can estimate outcnt 340 // and only 1 colRef is allowd in the filter. otherwise, no good method to calculate 341 col := extractColRefInFilter(expr) 342 if col == nil { 343 return 0.01 344 } 345 ndv := getExprNdv(expr, builder) 346 if ndv > 0 { 347 return 1 / ndv 348 } 349 return 0.01 350 } 351 352 func calcSelectivityByMinMax(funcName string, min, max float64, typ types.T, vals []*plan.Literal) (ret float64) { 353 switch funcName { 354 case ">", ">=": 355 if val, ok := getFloat64Value(typ, vals[0]); ok { 356 ret = (max - val + 1) / (max - min) 357 } 358 case "<", "<=": 359 if val, ok := getFloat64Value(typ, vals[0]); ok { 360 ret = (val - min + 1) / (max - min) 361 } 362 case "between": 363 if lb, ok := getFloat64Value(typ, vals[0]); ok { 364 if ub, ok := getFloat64Value(typ, vals[1]); ok { 365 ret = (ub - lb + 1) / (max - min) 366 } 367 } 368 default: 369 ret = 0.3 370 } 371 if ret < 0 { 372 ret = 0 373 } 374 if ret > 1 { 375 ret = 1 376 } 377 return ret 378 } 379 380 func getFloat64Value(typ types.T, lit *plan.Literal) (float64, bool) { 381 switch typ { 382 case types.T_float32: 383 if val, valOk := lit.Value.(*plan.Literal_Fval); valOk { 384 return float64(val.Fval), true 385 } 386 case types.T_float64: 387 if val, valOk := lit.Value.(*plan.Literal_Dval); valOk { 388 return val.Dval, true 389 } 390 case types.T_int8: 391 if val, valOk := lit.Value.(*plan.Literal_I8Val); valOk { 392 return float64(val.I8Val), true 393 } 394 case types.T_int16: 395 if val, valOk := lit.Value.(*plan.Literal_I16Val); valOk { 396 return float64(val.I16Val), true 397 } 398 case types.T_int32: 399 if val, valOk := lit.Value.(*plan.Literal_I32Val); valOk { 400 return float64(val.I32Val), true 401 } 402 case types.T_int64: 403 if val, valOk := lit.Value.(*plan.Literal_I64Val); valOk { 404 return float64(val.I64Val), true 405 } 406 case types.T_uint8: 407 if val, valOk := lit.Value.(*plan.Literal_U8Val); valOk { 408 return float64(val.U8Val), true 409 } 410 case types.T_uint16: 411 if val, valOk := lit.Value.(*plan.Literal_U16Val); valOk { 412 return float64(val.U16Val), true 413 } 414 case types.T_uint32: 415 if val, valOk := lit.Value.(*plan.Literal_U32Val); valOk { 416 return float64(val.U32Val), true 417 } 418 case types.T_uint64: 419 if val, valOk := lit.Value.(*plan.Literal_U64Val); valOk { 420 return float64(val.U64Val), true 421 } 422 case types.T_date: 423 if val, valOk := lit.Value.(*plan.Literal_Dateval); valOk { 424 return float64(val.Dateval), true 425 } 426 case types.T_datetime: 427 if val, valOk := lit.Value.(*plan.Literal_Datetimeval); valOk { 428 return float64(val.Datetimeval), true 429 } 430 } 431 432 return 0, false 433 } 434 435 func estimateNonEqualitySelectivity(expr *plan.Expr, funcName string, builder *QueryBuilder) float64 { 436 // only filter like func(col)>1 , or (col=1) or (col=2) can estimate outcnt 437 // and only 1 colRef is allowd in the filter. otherwise, no good method to calculate 438 col := extractColRefInFilter(expr) 439 if col == nil { 440 return 0.1 441 } 442 s := builder.getStatsInfoByCol(col) 443 if s == nil { 444 return 0.1 445 } 446 //check strict filter, otherwise can not estimate outcnt by min/max val 447 col, litType, literals, colFnName := extractColRefAndLiteralsInFilter(expr) 448 if col != nil && len(literals) > 0 { 449 typ := types.T(s.DataTypeMap[col.Name]) 450 if !(typ.IsInteger() || typ.IsDateRelate()) { 451 return 0.1 452 } 453 454 switch colFnName { 455 case "": 456 return calcSelectivityByMinMax(funcName, s.MinValMap[col.Name], s.MaxValMap[col.Name], typ, literals) 457 case "year": 458 switch typ { 459 case types.T_date: 460 minVal := types.Date(s.MinValMap[col.Name]) 461 maxVal := types.Date(s.MaxValMap[col.Name]) 462 return calcSelectivityByMinMax(funcName, float64(minVal.Year()), float64(maxVal.Year()), litType, literals) 463 case types.T_datetime: 464 // TODO 465 } 466 } 467 } 468 469 return 0.1 470 } 471 472 func estimateExprSelectivity(expr *plan.Expr, builder *QueryBuilder) float64 { 473 if expr == nil { 474 return 1 475 } 476 477 switch exprImpl := expr.Expr.(type) { 478 case *plan.Expr_F: 479 funcName := exprImpl.F.Func.ObjName 480 switch funcName { 481 case "=": 482 return estimateEqualitySelectivity(expr, builder) 483 case "!=", "<>": 484 return 0.9 485 case ">", "<", ">=", "<=", "between": 486 return estimateNonEqualitySelectivity(expr, funcName, builder) 487 case "and": 488 sel1 := estimateExprSelectivity(exprImpl.F.Args[0], builder) 489 sel2 := estimateExprSelectivity(exprImpl.F.Args[1], builder) 490 if canMergeToBetweenAnd(exprImpl.F.Args[0], exprImpl.F.Args[1]) && (sel1+sel2) > 1 { 491 return sel1 + sel2 - 1 492 } else { 493 return andSelectivity(sel1, sel2) 494 } 495 case "or": 496 sel1 := estimateExprSelectivity(exprImpl.F.Args[0], builder) 497 sel2 := estimateExprSelectivity(exprImpl.F.Args[1], builder) 498 return orSelectivity(sel1, sel2) 499 case "not": 500 return 1 - estimateExprSelectivity(exprImpl.F.Args[0], builder) 501 case "like": 502 return 0.2 503 case "prefix_eq": 504 ndv := getExprNdv(expr, builder) 505 if ndv > 10 { 506 return 10 / ndv 507 } 508 return 0.5 509 case "in": 510 card := float64(exprImpl.F.Args[1].GetVec().Len) 511 ndv := getExprNdv(expr, builder) 512 if ndv > card { 513 return card / ndv 514 } 515 return 1 516 case "prefix_in": 517 card := float64(exprImpl.F.Args[1].GetVec().Len) 518 ndv := getExprNdv(expr, builder) 519 if ndv > 10*card { 520 return 10 * card / ndv 521 } 522 return 0.5 523 case "prefix_between": 524 return 0.1 525 case "isnull", "is_null": 526 return getNullSelectivity(exprImpl.F.Args[0], builder, true) 527 case "isnotnull", "is_not_null": 528 return getNullSelectivity(exprImpl.F.Args[0], builder, false) 529 default: 530 return 0.15 531 } 532 case *plan.Expr_Lit: 533 return 1 534 } 535 return 1 536 } 537 538 func estimateFilterWeight(expr *plan.Expr, w float64) float64 { 539 switch expr.Typ.Id { 540 case int32(types.T_decimal64): 541 w += 64 542 case int32(types.T_decimal128): 543 w += 128 544 case int32(types.T_float32), int32(types.T_float64): 545 w += 8 546 case int32(types.T_char), int32(types.T_varchar), int32(types.T_text), int32(types.T_json): 547 w += 4 548 } 549 switch exprImpl := expr.Expr.(type) { 550 case *plan.Expr_F: 551 funcImpl := exprImpl.F 552 switch funcImpl.Func.GetObjName() { 553 case "like": 554 w += 10 555 case "cast": 556 w += 3 557 case "in": 558 w += 2 559 case "<>", "!=": 560 w += 1.2 561 case "<", "<=": 562 w += 1.1 563 default: 564 w += 1 565 } 566 for _, child := range exprImpl.F.Args { 567 w += estimateFilterWeight(child, 0) 568 } 569 } 570 return w 571 } 572 573 // harsh estimate of block selectivity, will improve it in the future 574 func estimateFilterBlockSelectivity(ctx context.Context, expr *plan.Expr, tableDef *plan.TableDef, s *pb.StatsInfo) float64 { 575 if !ExprIsZonemappable(ctx, expr) { 576 return 1 577 } 578 col := extractColRefInFilter(expr) 579 if col != nil { 580 blocksel := calcBlockSelectivityUsingShuffleRange(s.ShuffleRangeMap[col.Name], expr.Selectivity) 581 switch GetSortOrder(tableDef, col.ColPos) { 582 case 0: 583 blocksel = math.Min(blocksel, 0.2) 584 case 1: 585 return math.Min(blocksel, 0.5) 586 case 2: 587 return math.Min(blocksel, 0.7) 588 } 589 return blocksel 590 } 591 return 1 592 } 593 594 func rewriteFilterListByStats(ctx context.Context, nodeID int32, builder *QueryBuilder) { 595 node := builder.qry.Nodes[nodeID] 596 if len(node.Children) > 0 { 597 for _, child := range node.Children { 598 rewriteFilterListByStats(ctx, child, builder) 599 } 600 } 601 switch node.NodeType { 602 case plan.Node_TABLE_SCAN: 603 if node.ObjRef != nil && len(node.FilterList) >= 1 { 604 sort.Slice(node.FilterList, func(i, j int) bool { 605 cost1 := estimateFilterWeight(node.FilterList[i], 0) * estimateExprSelectivity(node.FilterList[i], builder) 606 cost2 := estimateFilterWeight(node.FilterList[j], 0) * estimateExprSelectivity(node.FilterList[j], builder) 607 return cost1 <= cost2 608 }) 609 sort.Slice(node.BlockFilterList, func(i, j int) bool { 610 blockSel1 := node.BlockFilterList[i].Selectivity 611 blockSel2 := node.BlockFilterList[j].Selectivity 612 return blockSel1 <= blockSel2 613 }) 614 } 615 } 616 } 617 618 func ReCalcNodeStats(nodeID int32, builder *QueryBuilder, recursive bool, leafNode bool, needResetHashMapStats bool) { 619 node := builder.qry.Nodes[nodeID] 620 if recursive { 621 if len(node.Children) > 0 { 622 for _, child := range node.Children { 623 ReCalcNodeStats(child, builder, recursive, leafNode, needResetHashMapStats) 624 } 625 } 626 } 627 628 var leftStats, rightStats, childStats *Stats 629 if len(node.Children) == 1 { 630 childStats = builder.qry.Nodes[node.Children[0]].Stats 631 } else if len(node.Children) == 2 { 632 leftStats = builder.qry.Nodes[node.Children[0]].Stats 633 rightStats = builder.qry.Nodes[node.Children[1]].Stats 634 } 635 636 if node.Stats == nil { 637 if node.NodeType != plan.Node_EXTERNAL_SCAN && node.NodeType != plan.Node_TABLE_SCAN { 638 node.Stats = DefaultStats() 639 } 640 } 641 642 switch node.NodeType { 643 case plan.Node_JOIN: 644 if needResetHashMapStats { 645 resetHashMapStats(node.Stats) 646 } 647 648 ndv := math.Min(leftStats.Outcnt, rightStats.Outcnt) 649 if ndv < 1 { 650 ndv = 1 651 } 652 //assume all join is not cross join 653 //will fix this in the future 654 //isCrossJoin := (len(node.OnList) == 0) 655 isCrossJoin := false 656 selectivity := math.Pow(rightStats.Selectivity, math.Pow(leftStats.Selectivity, 0.2)) 657 selectivity_out := andSelectivity(leftStats.Selectivity, rightStats.Selectivity) 658 659 for _, pred := range node.OnList { 660 if pred.Ndv <= 0 { 661 pred.Ndv = getExprNdv(pred, builder) 662 } 663 } 664 665 switch node.JoinType { 666 case plan.Node_INNER: 667 outcnt := leftStats.Outcnt * rightStats.Outcnt / ndv 668 if !isCrossJoin { 669 outcnt *= selectivity 670 } 671 if outcnt < rightStats.Outcnt && leftStats.Selectivity > 0.95 { 672 outcnt = rightStats.Outcnt 673 } 674 node.Stats.Outcnt = outcnt 675 node.Stats.Cost = leftStats.Cost + rightStats.Cost 676 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 677 node.Stats.Selectivity = selectivity_out 678 679 case plan.Node_LEFT: 680 node.Stats.Outcnt = leftStats.Outcnt 681 node.Stats.Cost = leftStats.Cost + rightStats.Cost 682 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 683 node.Stats.Selectivity = selectivity_out 684 685 case plan.Node_RIGHT: 686 node.Stats.Outcnt = rightStats.Outcnt 687 node.Stats.Cost = leftStats.Cost + rightStats.Cost 688 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 689 node.Stats.Selectivity = selectivity_out 690 691 case plan.Node_OUTER: 692 node.Stats.Outcnt = leftStats.Outcnt + rightStats.Outcnt 693 node.Stats.Cost = leftStats.Cost + rightStats.Cost 694 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 695 node.Stats.Selectivity = selectivity_out 696 697 case plan.Node_SEMI, plan.Node_INDEX: 698 node.Stats.Outcnt = leftStats.Outcnt * selectivity 699 node.Stats.Cost = leftStats.Cost + rightStats.Cost 700 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 701 node.Stats.Selectivity = selectivity_out 702 703 case plan.Node_ANTI: 704 node.Stats.Outcnt = leftStats.Outcnt * (1 - rightStats.Selectivity) * 0.5 705 node.Stats.Cost = leftStats.Cost + rightStats.Cost 706 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 707 node.Stats.Selectivity = selectivity_out 708 709 case plan.Node_SINGLE, plan.Node_MARK: 710 node.Stats.Outcnt = leftStats.Outcnt 711 node.Stats.Cost = leftStats.Cost + rightStats.Cost 712 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 713 node.Stats.Selectivity = selectivity_out 714 } 715 716 case plan.Node_AGG: 717 if needResetHashMapStats { 718 resetHashMapStats(node.Stats) 719 } 720 if len(node.GroupBy) > 0 { 721 incnt := childStats.Outcnt 722 outcnt := 1.0 723 for _, groupby := range node.GroupBy { 724 ndv := getExprNdv(groupby, builder) 725 if ndv > 1 { 726 groupby.Ndv = ndv 727 outcnt *= ndv 728 } 729 } 730 if outcnt > incnt { 731 outcnt = math.Min(incnt, outcnt*math.Pow(childStats.Selectivity, 0.8)) 732 } 733 node.Stats.Outcnt = outcnt 734 node.Stats.Cost = incnt + outcnt 735 node.Stats.HashmapStats.HashmapSize = outcnt 736 node.Stats.Selectivity = 1 737 if len(node.FilterList) > 0 { 738 node.Stats.Outcnt *= 0.0001 739 node.Stats.Selectivity *= 0.0001 740 } 741 } else { 742 node.Stats.Outcnt = 1 743 node.Stats.Cost = childStats.Cost 744 node.Stats.HashmapStats.HashmapSize = 1 745 node.Stats.Selectivity = 1 746 } 747 748 case plan.Node_UNION: 749 if needResetHashMapStats { 750 resetHashMapStats(node.Stats) 751 } 752 node.Stats.Outcnt = (leftStats.Outcnt + rightStats.Outcnt) * 0.7 753 node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt 754 node.Stats.Selectivity = 1 755 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 756 757 case plan.Node_UNION_ALL: 758 node.Stats.Outcnt = leftStats.Outcnt + rightStats.Outcnt 759 node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt 760 node.Stats.Selectivity = 1 761 762 case plan.Node_INTERSECT: 763 if needResetHashMapStats { 764 resetHashMapStats(node.Stats) 765 } 766 node.Stats.Outcnt = math.Min(leftStats.Outcnt, rightStats.Outcnt) * 0.5 767 node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt 768 node.Stats.Selectivity = 1 769 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 770 771 case plan.Node_INTERSECT_ALL: 772 if needResetHashMapStats { 773 resetHashMapStats(node.Stats) 774 } 775 node.Stats.Outcnt = math.Min(leftStats.Outcnt, rightStats.Outcnt) * 0.7 776 node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt 777 node.Stats.Selectivity = 1 778 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 779 780 case plan.Node_MINUS: 781 if needResetHashMapStats { 782 resetHashMapStats(node.Stats) 783 } 784 minus := math.Max(leftStats.Outcnt, rightStats.Outcnt) - math.Min(leftStats.Outcnt, rightStats.Outcnt) 785 node.Stats.Outcnt = minus * 0.5 786 node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt 787 node.Stats.Selectivity = 1 788 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 789 790 case plan.Node_MINUS_ALL: 791 if needResetHashMapStats { 792 resetHashMapStats(node.Stats) 793 } 794 minus := math.Max(leftStats.Outcnt, rightStats.Outcnt) - math.Min(leftStats.Outcnt, rightStats.Outcnt) 795 node.Stats.Outcnt = minus * 0.7 796 node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt 797 node.Stats.Selectivity = 1 798 node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt 799 800 case plan.Node_VALUE_SCAN: 801 if node.RowsetData != nil { 802 rowCount := float64(node.RowsetData.RowCount) 803 node.Stats.TableCnt = rowCount 804 node.Stats.BlockNum = int32(rowCount/float64(options.DefaultBlockMaxRows) + 1) 805 node.Stats.Cost = rowCount 806 node.Stats.Outcnt = rowCount 807 node.Stats.Selectivity = 1 808 } 809 810 case plan.Node_SINK_SCAN: 811 sourceNode := builder.qry.Steps[node.GetSourceStep()[0]] 812 node.Stats = builder.qry.Nodes[sourceNode].Stats 813 814 case plan.Node_RECURSIVE_SCAN: 815 sourceNode := builder.qry.Steps[node.GetSourceStep()[0]] 816 node.Stats = builder.qry.Nodes[sourceNode].Stats 817 818 case plan.Node_EXTERNAL_SCAN: 819 //calc for external scan is heavy, avoid recalc of this 820 if node.Stats == nil || node.Stats.TableCnt == 0 { 821 node.Stats = getExternalStats(node, builder) 822 } 823 824 case plan.Node_TABLE_SCAN: 825 //calc for scan is heavy. use leafNode to judge if scan need to recalculate 826 if node.ObjRef != nil && leafNode { 827 if len(node.BindingTags) > 0 { 828 builder.tag2Table[node.BindingTags[0]] = node.TableDef 829 } 830 newStats := calcScanStats(node, builder) 831 if needResetHashMapStats { 832 resetHashMapStats(newStats) 833 } 834 node.Stats = newStats 835 } 836 837 case plan.Node_FILTER: 838 //filters which can not push down to scan nodes. hard to estimate selectivity 839 node.Stats.Outcnt = childStats.Outcnt * 0.05 840 if node.Stats.Outcnt < 1 { 841 node.Stats.Outcnt = 1 842 } 843 node.Stats.Cost = childStats.Cost 844 node.Stats.Selectivity = 0.05 845 846 case plan.Node_FUNCTION_SCAN: 847 if !computeFunctionScan(node.TableDef.TblFunc.Name, node.TblFuncExprList, node.Stats) { 848 if len(node.Children) > 0 && childStats != nil { 849 node.Stats.Outcnt = childStats.Outcnt 850 node.Stats.Cost = childStats.Outcnt 851 node.Stats.Selectivity = childStats.Selectivity 852 } 853 } 854 855 case plan.Node_INSERT: 856 if len(node.Children) > 0 && childStats != nil { 857 node.Stats.Outcnt = childStats.Outcnt 858 node.Stats.Cost = childStats.Outcnt 859 node.Stats.Selectivity = childStats.Selectivity 860 node.Stats.Rowsize = GetRowSizeFromTableDef(node.TableDef, true) * 0.8 861 } 862 863 default: 864 if len(node.Children) > 0 && childStats != nil { 865 node.Stats.Outcnt = childStats.Outcnt 866 node.Stats.Cost = childStats.Outcnt 867 node.Stats.Selectivity = childStats.Selectivity 868 } 869 } 870 871 // if there is a limit, outcnt is limit number 872 if node.Limit != nil { 873 limitExpr := DeepCopyExpr(node.Limit) 874 if _, ok := limitExpr.Expr.(*plan.Expr_F); ok { 875 if !hasParam(limitExpr) { 876 limitExpr, _ = ConstantFold(batch.EmptyForConstFoldBatch, limitExpr, builder.compCtx.GetProcess(), true) 877 } 878 } 879 if cExpr, ok := limitExpr.Expr.(*plan.Expr_Lit); ok { 880 if c, ok := cExpr.Lit.Value.(*plan.Literal_I64Val); ok { 881 node.Stats.Outcnt = float64(c.I64Val) 882 node.Stats.Selectivity = node.Stats.Outcnt / node.Stats.Cost 883 } 884 } 885 } 886 } 887 888 func computeFunctionScan(name string, exprs []*Expr, nodeStat *Stats) bool { 889 if name != "generate_series" { 890 return false 891 } 892 var cost float64 893 var canGetCost bool 894 if len(exprs) == 2 { 895 if exprs[0].Typ.Id != exprs[1].Typ.Id { 896 return false 897 } 898 cost, canGetCost = getCost(exprs[0], exprs[1], nil) 899 } else if len(exprs) == 3 { 900 if !(exprs[0].Typ.Id == exprs[1].Typ.Id && exprs[1].Typ.Id == exprs[2].Typ.Id) { 901 return false 902 } 903 cost, canGetCost = getCost(exprs[0], exprs[1], exprs[2]) 904 } else { 905 return false 906 } 907 if !canGetCost { 908 return false 909 } 910 nodeStat.Outcnt = cost 911 nodeStat.TableCnt = cost 912 nodeStat.Cost = cost 913 nodeStat.Selectivity = 1 914 return true 915 } 916 917 func getCost(start *Expr, end *Expr, step *Expr) (float64, bool) { 918 var startNum, endNum, stepNum float64 919 var flag1, flag2, flag3 bool 920 getInt32Val := func(e *Expr) (float64, bool) { 921 if s, ok := e.Expr.(*plan.Expr_Lit); ok { 922 if v, ok := s.Lit.Value.(*plan.Literal_I32Val); ok && !s.Lit.Isnull { 923 return float64(v.I32Val), true 924 } 925 } 926 return 0, false 927 } 928 getInt64Val := func(e *Expr) (float64, bool) { 929 if s, ok := e.Expr.(*plan.Expr_Lit); ok { 930 if v, ok := s.Lit.Value.(*plan.Literal_I64Val); ok && !s.Lit.Isnull { 931 return float64(v.I64Val), true 932 } 933 } 934 return 0, false 935 } 936 937 switch start.Typ.Id { 938 case int32(types.T_int32): 939 startNum, flag1 = getInt32Val(start) 940 endNum, flag2 = getInt32Val(end) 941 flag3 = true 942 if step != nil { 943 stepNum, flag3 = getInt32Val(step) 944 } 945 if !(flag1 && flag2 && flag3) { 946 return 0, false 947 } 948 case int32(types.T_int64): 949 startNum, flag1 = getInt64Val(start) 950 endNum, flag2 = getInt64Val(end) 951 flag3 = true 952 if step != nil { 953 stepNum, flag3 = getInt64Val(step) 954 } 955 if !(flag1 && flag2 && flag3) { 956 return 0, false 957 } 958 } 959 if step == nil { 960 if startNum > endNum { 961 stepNum = -1 962 } else { 963 stepNum = 1 964 } 965 } 966 ret := (endNum - startNum) / stepNum 967 if ret < 0 { 968 return 0, false 969 } 970 return ret, true 971 } 972 973 func foldTableScanFilters(proc *process.Process, qry *Query, nodeId int32) error { 974 node := qry.Nodes[nodeId] 975 if node.NodeType == plan.Node_TABLE_SCAN && len(node.FilterList) > 0 { 976 for i, e := range node.FilterList { 977 foldedExpr, err := ConstantFold(batch.EmptyForConstFoldBatch, e, proc, false) 978 if err != nil { 979 return err 980 } 981 node.FilterList[i] = foldedExpr 982 } 983 } 984 for _, childId := range node.Children { 985 err := foldTableScanFilters(proc, qry, childId) 986 if err != nil { 987 return err 988 } 989 } 990 return nil 991 } 992 993 func recalcStatsByRuntimeFilter(node *plan.Node, joinNode *plan.Node, runtimeFilterSel float64) { 994 if node.NodeType != plan.Node_TABLE_SCAN { 995 return 996 } 997 node.Stats.Cost *= runtimeFilterSel 998 node.Stats.Outcnt *= runtimeFilterSel 999 if node.Stats.Cost < 1 { 1000 node.Stats.Cost = 1 1001 } 1002 node.Stats.BlockNum = int32(node.Stats.Outcnt/2) + 1 1003 } 1004 1005 func calcScanStats(node *plan.Node, builder *QueryBuilder) *plan.Stats { 1006 if builder.skipStats { 1007 return DefaultStats() 1008 } 1009 if InternalTable(node.TableDef) { 1010 return DefaultStats() 1011 } 1012 if shouldReturnMinimalStats(node) { 1013 return DefaultMinimalStats() 1014 } 1015 1016 //ts := timestamp.Timestamp{} 1017 //if node.ScanTS != nil { 1018 // ts = *node.ScanTS 1019 //} 1020 1021 scanSnapshot := node.ScanSnapshot 1022 if scanSnapshot == nil { 1023 scanSnapshot = &Snapshot{} 1024 } 1025 1026 s, err := builder.compCtx.Stats(node.ObjRef, *scanSnapshot) 1027 if err != nil || s == nil { 1028 return DefaultStats() 1029 } 1030 1031 stats := new(plan.Stats) 1032 stats.TableCnt = s.TableCnt 1033 var blockSel float64 = 1 1034 1035 var blockExprList []*plan.Expr 1036 for i := range node.FilterList { 1037 node.FilterList[i].Selectivity = estimateExprSelectivity(node.FilterList[i], builder) 1038 currentBlockSel := estimateFilterBlockSelectivity(builder.GetContext(), node.FilterList[i], node.TableDef, s) 1039 if builder.optimizerHints != nil { 1040 if builder.optimizerHints.blockFilter == 1 { //always trying to pushdown blockfilters if zonemappable 1041 if ExprIsZonemappable(builder.GetContext(), node.FilterList[i]) { 1042 copyOfExpr := DeepCopyExpr(node.FilterList[i]) 1043 copyOfExpr.Selectivity = currentBlockSel 1044 blockExprList = append(blockExprList, copyOfExpr) 1045 } 1046 } else if builder.optimizerHints.blockFilter == 2 { // never pushdown blockfilters 1047 node.BlockFilterList = nil 1048 } else { 1049 if currentBlockSel < 1 || strings.HasPrefix(node.TableDef.Name, catalog.IndexTableNamePrefix) { 1050 copyOfExpr := DeepCopyExpr(node.FilterList[i]) 1051 copyOfExpr.Selectivity = currentBlockSel 1052 blockExprList = append(blockExprList, copyOfExpr) 1053 } 1054 } 1055 } else { 1056 if currentBlockSel < 1 || strings.HasPrefix(node.TableDef.Name, catalog.IndexTableNamePrefix) { 1057 copyOfExpr := DeepCopyExpr(node.FilterList[i]) 1058 copyOfExpr.Selectivity = currentBlockSel 1059 blockExprList = append(blockExprList, copyOfExpr) 1060 } 1061 } 1062 blockSel = andSelectivity(blockSel, currentBlockSel) 1063 } 1064 node.BlockFilterList = blockExprList 1065 stats.Selectivity = estimateExprSelectivity(colexec.RewriteFilterExprList(node.FilterList), builder) 1066 stats.Outcnt = stats.Selectivity * stats.TableCnt 1067 stats.Cost = stats.TableCnt * blockSel 1068 stats.BlockNum = int32(float64(s.BlockNumber)*blockSel) + 1 1069 1070 // if there is a limit, outcnt is limit number 1071 if node.Limit != nil { 1072 if cExpr, ok := node.Limit.Expr.(*plan.Expr_Lit); ok { 1073 if c, ok := cExpr.Lit.Value.(*plan.Literal_I64Val); ok { 1074 stats.Outcnt = float64(c.I64Val) 1075 stats.BlockNum = int32(((stats.Outcnt / stats.Selectivity) / DefaultBlockMaxRows) + 1) 1076 stats.Cost = float64(stats.BlockNum * DefaultBlockMaxRows) 1077 } 1078 } 1079 } 1080 1081 return stats 1082 } 1083 1084 func shouldReturnMinimalStats(node *plan.Node) bool { 1085 return false 1086 } 1087 1088 func InternalTable(tableDef *TableDef) bool { 1089 switch tableDef.TblId { 1090 case catalog.MO_DATABASE_ID, catalog.MO_TABLES_ID, catalog.MO_COLUMNS_ID: 1091 return true 1092 } 1093 if strings.HasPrefix(tableDef.Name, "sys_") { 1094 return true 1095 } 1096 if strings.HasPrefix(tableDef.Name, "mo_") { 1097 return true 1098 } 1099 return false 1100 } 1101 1102 func DefaultHugeStats() *plan.Stats { 1103 stats := new(Stats) 1104 stats.TableCnt = 10000000 1105 stats.Cost = 10000000 1106 stats.Outcnt = 10000000 1107 stats.Selectivity = 1 1108 stats.BlockNum = 1000 1109 return stats 1110 } 1111 1112 func DefaultStats() *plan.Stats { 1113 stats := new(Stats) 1114 stats.TableCnt = 1000 1115 stats.Cost = 1000 1116 stats.Outcnt = 1000 1117 stats.Selectivity = 1 1118 stats.BlockNum = 1 1119 return stats 1120 } 1121 1122 func DefaultMinimalStats() *plan.Stats { 1123 stats := new(Stats) 1124 stats.TableCnt = 100000 1125 stats.Cost = 10 1126 stats.Outcnt = 10 1127 stats.Selectivity = 0.0001 1128 stats.BlockNum = 1 1129 return stats 1130 } 1131 1132 func resetHashMapStats(stats *plan.Stats) { 1133 if stats.HashmapStats == nil { 1134 stats.HashmapStats = &plan.HashMapStats{} 1135 } else { 1136 stats.HashmapStats.HashmapSize = 0 1137 stats.HashmapStats.HashOnPK = false 1138 stats.HashmapStats.Shuffle = false 1139 } 1140 } 1141 1142 func (builder *QueryBuilder) determineBuildAndProbeSide(nodeID int32, recursive bool) { 1143 if builder.optimizerHints != nil && builder.optimizerHints.joinOrdering != 0 { 1144 return 1145 } 1146 1147 node := builder.qry.Nodes[nodeID] 1148 if recursive && len(node.Children) > 0 { 1149 for _, child := range node.Children { 1150 builder.determineBuildAndProbeSide(child, recursive) 1151 } 1152 } 1153 if node.NodeType != plan.Node_JOIN { 1154 return 1155 } 1156 1157 leftChild := builder.qry.Nodes[node.Children[0]] 1158 rightChild := builder.qry.Nodes[node.Children[1]] 1159 if rightChild.NodeType == plan.Node_FUNCTION_SCAN { 1160 return 1161 } 1162 1163 switch node.JoinType { 1164 case plan.Node_INNER, plan.Node_OUTER: 1165 if leftChild.Stats.Outcnt < rightChild.Stats.Outcnt { 1166 node.Children[0], node.Children[1] = node.Children[1], node.Children[0] 1167 1168 } 1169 1170 case plan.Node_LEFT, plan.Node_SEMI, plan.Node_ANTI: 1171 //right joins does not support non equal join for now 1172 if builder.IsEquiJoin(node) && leftChild.Stats.Outcnt*1.2 < rightChild.Stats.Outcnt && !builder.haveOnDuplicateKey { 1173 node.BuildOnLeft = true 1174 } 1175 } 1176 1177 if builder.hasRecursiveScan(builder.qry.Nodes[node.Children[1]]) { 1178 node.Children[0], node.Children[1] = node.Children[1], node.Children[0] 1179 } 1180 } 1181 1182 func (builder *QueryBuilder) hasRecursiveScan(node *plan.Node) bool { 1183 if node.NodeType == plan.Node_RECURSIVE_SCAN { 1184 return true 1185 } 1186 for _, nodeID := range node.Children { 1187 if builder.hasRecursiveScan(builder.qry.Nodes[nodeID]) { 1188 return true 1189 } 1190 } 1191 return false 1192 } 1193 1194 func compareStats(stats1, stats2 *Stats) bool { 1195 // selectivity is first considered to reduce data 1196 // when selectivity very close, we first join smaller table 1197 if math.Abs(stats1.Selectivity-stats2.Selectivity) > 0.01 { 1198 return stats1.Selectivity < stats2.Selectivity 1199 } else { 1200 // todo we need to calculate ndv of outcnt here 1201 return stats1.Outcnt < stats2.Outcnt 1202 } 1203 } 1204 1205 func andSelectivity(s1, s2 float64) float64 { 1206 if s1 > 0.15 || s2 > 0.15 || s1*s2 > 0.1 { 1207 return s1 * s2 1208 } 1209 return math.Min(s1, s2) * math.Max(math.Pow(s1, s2), math.Pow(s2, s1)) 1210 } 1211 1212 func orSelectivity(s1, s2 float64) float64 { 1213 var s float64 1214 if math.Abs(s1-s2) < 0.001 && s1 < 0.2 { 1215 s = s1 + s2 1216 } else { 1217 s = math.Max(s1, s2) * 1.5 1218 } 1219 if s > 1 { 1220 return 1 1221 } else { 1222 return s 1223 } 1224 } 1225 1226 const blockThresholdForTpQuery = 16 1227 1228 func IsTpQuery(qry *plan.Query) bool { 1229 for _, node := range qry.GetNodes() { 1230 stats := node.Stats 1231 if stats == nil || stats.BlockNum > blockThresholdForTpQuery { 1232 return false 1233 } 1234 } 1235 return true 1236 } 1237 1238 func ReCalcQueryStats(builder *QueryBuilder, query *plan.Query) { 1239 for _, rootID := range builder.qry.Steps { 1240 ReCalcNodeStats(rootID, builder, true, false, true) 1241 } 1242 } 1243 1244 func PrintStats(qry *plan.Query) string { 1245 buf := bytes.NewBuffer(make([]byte, 0, 1024*64)) 1246 buf.WriteString("Print Stats: \n") 1247 for _, node := range qry.GetNodes() { 1248 stats := node.Stats 1249 buf.WriteString(fmt.Sprintf("Node ID: %v, Node Type %v, ", node.NodeId, node.NodeType)) 1250 if stats == nil { 1251 buf.WriteString("Stats: nil\n") 1252 } else { 1253 buf.WriteString(fmt.Sprintf("blocknum %v, outcnt %v \n", node.Stats.BlockNum, node.Stats.Outcnt)) 1254 } 1255 } 1256 return buf.String() 1257 } 1258 1259 func DeepCopyStats(stats *plan.Stats) *plan.Stats { 1260 if stats == nil { 1261 return nil 1262 } 1263 var hashmapStats *plan.HashMapStats 1264 if stats.HashmapStats != nil { 1265 hashmapStats = &plan.HashMapStats{ 1266 HashmapSize: stats.HashmapStats.HashmapSize, 1267 HashOnPK: stats.HashmapStats.HashOnPK, 1268 Shuffle: stats.HashmapStats.Shuffle, 1269 ShuffleColIdx: stats.HashmapStats.ShuffleColIdx, 1270 ShuffleType: stats.HashmapStats.ShuffleType, 1271 ShuffleColMin: stats.HashmapStats.ShuffleColMin, 1272 ShuffleColMax: stats.HashmapStats.ShuffleColMax, 1273 ShuffleMethod: stats.HashmapStats.ShuffleMethod, 1274 } 1275 } 1276 return &plan.Stats{ 1277 BlockNum: stats.BlockNum, 1278 Rowsize: stats.Rowsize, 1279 Cost: stats.Cost, 1280 Outcnt: stats.Outcnt, 1281 TableCnt: stats.TableCnt, 1282 Selectivity: stats.Selectivity, 1283 HashmapStats: hashmapStats, 1284 ForceOneCN: stats.ForceOneCN, 1285 } 1286 } 1287 1288 func calcBlockSelectivityUsingShuffleRange(s *pb.ShuffleRange, sel float64) float64 { 1289 if s == nil { 1290 if sel <= 0.01 { 1291 return sel * 100 1292 } else { 1293 return 1 1294 } 1295 } 1296 ret := sel * math.Pow(500, math.Pow(s.Overlap, 2)) 1297 if ret > 1 { 1298 ret = 1 1299 } 1300 return ret 1301 } 1302 1303 func (builder *QueryBuilder) canSkipStats() bool { 1304 //for now ,only skip stats for select count(*) from xx 1305 if len(builder.qry.Steps) != 1 || len(builder.qry.Nodes) != 3 { 1306 return false 1307 } 1308 project := builder.qry.Nodes[builder.qry.Steps[0]] 1309 if project.NodeType != plan.Node_PROJECT { 1310 return false 1311 } 1312 agg := builder.qry.Nodes[project.Children[0]] 1313 if agg.NodeType != plan.Node_AGG { 1314 return false 1315 } 1316 if len(agg.AggList) != 1 || len(agg.GroupBy) != 0 { 1317 return false 1318 } 1319 if agg.AggList[0].GetF() == nil || agg.AggList[0].GetF().Func.ObjName != "starcount" { 1320 return false 1321 } 1322 scan := builder.qry.Nodes[agg.Children[0]] 1323 return scan.NodeType == plan.Node_TABLE_SCAN 1324 }