github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/partition_prune.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package plan 16 17 import ( 18 "strings" 19 20 "github.com/matrixorigin/matrixone/pkg/container/batch" 21 "github.com/matrixorigin/matrixone/pkg/container/types" 22 "github.com/matrixorigin/matrixone/pkg/container/vector" 23 "github.com/matrixorigin/matrixone/pkg/pb/plan" 24 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 25 "github.com/matrixorigin/matrixone/pkg/vm/process" 26 ) 27 28 func (builder *QueryBuilder) partitionPrune(nodeID int32) { 29 node := builder.qry.Nodes[nodeID] 30 for _, childID := range node.Children { 31 builder.partitionPrune(childID) 32 } 33 34 switch node.NodeType { 35 case plan.Node_TABLE_SCAN, plan.Node_MATERIAL_SCAN, plan.Node_EXTERNAL_SCAN: 36 if node.TableDef.GetPartition() != nil && len(node.FilterList) != 0 { 37 partitionByDef := node.TableDef.Partition 38 switch partitionByDef.Type { 39 case plan.PartitionType_KEY, plan.PartitionType_LINEAR_KEY, plan.PartitionType_HASH, plan.PartitionType_LINEAR_HASH: 40 pruner := &KeyHashPartitionPruner{ 41 node: node, 42 process: builder.compCtx.GetProcess(), 43 } 44 pruner.init() 45 pruner.prune() 46 case plan.PartitionType_LIST: 47 // XXX unimplement 48 case plan.PartitionType_LIST_COLUMNS: 49 // XXX unimplement 50 case plan.PartitionType_RANGE: 51 // XXX unimplement 52 case plan.PartitionType_RANGE_COLUMNS: 53 // XXX unimplement 54 } 55 } 56 } 57 } 58 59 // KEY and HASH Partition Pruner 60 type KeyHashPartitionPruner struct { 61 partitionKeysMap map[string]int 62 partitionByDef *plan.PartitionByDef 63 node *Node 64 process *process.Process 65 } 66 67 type PartitionPruneResult struct { 68 usedPartitions map[int32]bool 69 isUnablePrune bool 70 needPushUp bool 71 } 72 73 func (p *KeyHashPartitionPruner) init() { 74 partitionByDef := p.node.TableDef.Partition 75 p.partitionByDef = partitionByDef 76 p.partitionKeysMap = make(map[string]int) 77 78 switch partitionByDef.Type { 79 case plan.PartitionType_KEY, plan.PartitionType_LINEAR_KEY: 80 for _, partitionCol := range partitionByDef.PartitionColumns.PartitionColumns { 81 if _, ok := p.partitionKeysMap[partitionCol]; !ok { 82 p.partitionKeysMap[partitionCol] = 1 83 } 84 } 85 case plan.PartitionType_HASH, plan.PartitionType_LINEAR_HASH: 86 extractColumnsFromExpression(partitionByDef.PartitionExpr.Expr, p.partitionKeysMap) 87 } 88 } 89 90 // detachAndPrune Detach of filter conditions and partition prune 91 func (p *KeyHashPartitionPruner) detachAndPrune() *PartitionPruneResult { 92 if len(p.node.FilterList) == 1 { 93 if exprF, ok := p.node.FilterList[0].Expr.(*plan.Expr_F); ok && exprF.F.Func.ObjName == "or" { 94 return p.detachDNFCondAndBuildPrune(p.node.FilterList[0]) 95 } 96 } 97 return p.detachCNFCondAndBuildPrune(p.node.FilterList) 98 } 99 100 func (p *KeyHashPartitionPruner) prune() bool { 101 pruneResult := p.detachAndPrune() 102 if pruneResult.isUnablePrune { 103 return false 104 } 105 106 p.node.PartitionPrune = &plan.PartitionPrune{ 107 IsPruned: true, 108 SelectedPartitions: make([]*plan.PartitionItem, 0, len(pruneResult.usedPartitions)), 109 } 110 111 for pid := range pruneResult.usedPartitions { 112 partitionItem := p.partitionByDef.Partitions[pid] 113 partition := &plan.PartitionItem{ 114 PartitionName: partitionItem.PartitionName, 115 OrdinalPosition: partitionItem.OrdinalPosition, 116 Description: partitionItem.Description, 117 Comment: partitionItem.Comment, 118 LessThan: DeepCopyExprList(partitionItem.LessThan), 119 InValues: DeepCopyExprList(partitionItem.InValues), 120 PartitionTableName: partitionItem.PartitionTableName, 121 } 122 p.node.PartitionPrune.SelectedPartitions = append(p.node.PartitionPrune.SelectedPartitions, partition) 123 } 124 return true 125 } 126 127 func (p *KeyHashPartitionPruner) detachDNFCondAndBuildPrune(orExpr *plan.Expr) *PartitionPruneResult { 128 unablePruneResult := &PartitionPruneResult{ 129 isUnablePrune: true, 130 } 131 132 // split disjunctive expression 133 dnfItems := SplitDNFItems(orExpr) 134 if isAllColEqualConstExpr(dnfItems) { 135 if len(p.partitionKeysMap) == 1 { 136 usedPartitions := make(map[int32]bool) 137 for _, expr := range dnfItems { 138 // 1. extract all ColRef equals const value from expression 139 colEqValMap := make(map[string]*plan.Expr) 140 extractColEqValFromEqualExpr(expr, colEqValMap) 141 142 // 2. Check if all column equivalence expressions contain all partition keys 143 if !exprColsIncludePartitionKeys(p.partitionKeysMap, colEqValMap) { 144 return unablePruneResult 145 } 146 147 if ok, pidx := p.getUsedPartition(colEqValMap); ok { 148 // if pidx=-1, it means that no existing partitions can be selected 149 if pidx != -1 { 150 usedPartitions[pidx] = true 151 } 152 } else { 153 return unablePruneResult 154 } 155 } 156 return &PartitionPruneResult{ 157 isUnablePrune: false, 158 usedPartitions: usedPartitions, 159 } 160 } else { 161 return unablePruneResult 162 } 163 } else { 164 hitPartitions := make(map[int32]bool) 165 for i := range dnfItems { 166 if isLogicExpr(dnfItems[i], "and") { 167 exprs := SplitCNFItems(dnfItems[i]) 168 tmp := p.detachCNFCondAndBuildPrune(exprs) 169 if tmp.needPushUp || tmp.isUnablePrune { 170 return unablePruneResult 171 } else { 172 hitPartitions = union(hitPartitions, tmp.usedPartitions) 173 } 174 } else if isExprColRefEqualConst(dnfItems[i]) { 175 // 2. extract all colRef to const value from filters 176 colEqValMap := make(map[string]*plan.Expr) 177 extractColEqValFromEqualExpr(dnfItems[i], colEqValMap) 178 if !exprColsIncludePartitionKeys(p.partitionKeysMap, colEqValMap) { 179 return unablePruneResult 180 } 181 182 if ok, pidx := p.getUsedPartition(colEqValMap); ok { 183 // if pidx=-1, it means that no existing partitions can be selected 184 if pidx != -1 { 185 hitPartitions[pidx] = true 186 } 187 } else { 188 return unablePruneResult 189 } 190 } else { 191 return unablePruneResult 192 } 193 } 194 return &PartitionPruneResult{ 195 usedPartitions: hitPartitions, 196 isUnablePrune: false, 197 } 198 } 199 } 200 201 func (p *KeyHashPartitionPruner) detachCNFCondAndBuildPrune(conditions []*Expr) *PartitionPruneResult { 202 if isAllSimpleExpr(conditions) { 203 // 1. Collect equivalent expressions 204 if ok, colEqValMap := extractColEqValFromExprs(conditions, p.partitionKeysMap); ok { 205 return p.buildPruneResult(colEqValMap) 206 } else { 207 return &PartitionPruneResult{ 208 isUnablePrune: true, 209 } 210 } 211 } else if isAllLogicExpr(conditions, "or") { 212 return p.buildPruneResultForOrConditions(conditions) 213 } else { 214 return &PartitionPruneResult{ 215 isUnablePrune: true, 216 } 217 } 218 } 219 220 // buildPruneResult Get hit partitions based on the set of equivalent expressions 221 func (p *KeyHashPartitionPruner) buildPruneResult(colEqValMap map[string]*plan.Expr) *PartitionPruneResult { 222 // Check if the conditions meet the partitioning key 223 if len(colEqValMap) != len(p.partitionKeysMap) { 224 return &PartitionPruneResult{ 225 isUnablePrune: true, 226 needPushUp: true, 227 } 228 } 229 230 if ok, pid := p.getUsedPartition(colEqValMap); ok { 231 hitPartitions := make(map[int32]bool) 232 if pid != -1 { 233 hitPartitions[pid] = true 234 } 235 result := &PartitionPruneResult{ 236 isUnablePrune: false, 237 usedPartitions: hitPartitions, 238 } 239 return result 240 } else { 241 return &PartitionPruneResult{ 242 isUnablePrune: true, 243 } 244 } 245 } 246 247 // buildPruneResultForOrConditions Get hit partitions based on the set of disjunction expressions 248 func (p *KeyHashPartitionPruner) buildPruneResultForOrConditions(conditions []*Expr) *PartitionPruneResult { 249 hitPartitions := make(map[int32]bool) 250 for i, cond := range conditions { 251 tmp := p.detachDNFCondAndBuildPrune(cond) 252 if tmp.isUnablePrune { 253 return &PartitionPruneResult{ 254 isUnablePrune: true, 255 } 256 } 257 if i == 0 { 258 hitPartitions = tmp.usedPartitions 259 } else { 260 hitPartitions = intersection(hitPartitions, tmp.usedPartitions) 261 } 262 } 263 return &PartitionPruneResult{ 264 usedPartitions: hitPartitions, 265 isUnablePrune: false, 266 } 267 } 268 269 // getUsedPartition Calculate the partition based on the constant expression of the partition key column 270 func (p *KeyHashPartitionPruner) getUsedPartition(cnfColEqVal map[string]*plan.Expr) (bool, int32) { 271 // 1.evaluate the partition expr where the colRef assigned with const 272 inputBat := batch.NewWithSize(len(p.node.TableDef.GetCols())) 273 inputBat.SetRowCount(1) 274 defer inputBat.Clean(p.process.Mp()) 275 276 for i, colDef := range p.node.TableDef.GetCols() { 277 if valueExpr, ok := cnfColEqVal[colDef.GetName()]; ok { 278 colVec, err := colexec.EvalExpressionOnce(p.process, valueExpr, []*batch.Batch{batch.EmptyForConstFoldBatch}) 279 if err != nil { 280 return false, -1 281 } 282 inputBat.SetVector(int32(i), colVec) 283 } else { 284 typ := types.New(types.T(colDef.Typ.Id), colDef.Typ.Width, colDef.Typ.Scale) 285 colVec := vector.NewConstNull(typ, 1, p.process.Mp()) 286 inputBat.SetVector(int32(i), colVec) 287 } 288 } 289 290 // 2. calculate partition expression 291 resVec, err := colexec.EvalExpressionOnce(p.process, p.partitionByDef.PartitionExpression, []*batch.Batch{inputBat}) 292 if err != nil { 293 return false, -1 294 } 295 defer resVec.Free(p.process.Mp()) 296 297 // 3. prune the partition 298 if resVec.IsConstNull() { 299 return false, -1 300 } else { 301 return true, vector.MustFixedCol[int32](resVec)[0] 302 } 303 } 304 305 // intersection Finding the Intersection of Two Map[int32]bool Sets 306 func intersection(left, right map[int32]bool) map[int32]bool { 307 result := make(map[int32]bool) 308 for key, value := range left { 309 if _, ok := right[key]; ok { 310 result[key] = value 311 } 312 } 313 return result 314 } 315 316 // union Finding the Union of Two Map[int32]bool Sets 317 func union(left, right map[int32]bool) map[int32]bool { 318 result := make(map[int32]bool) 319 for key, value := range left { 320 result[key] = value 321 } 322 for key, value := range right { 323 result[key] = value 324 } 325 return result 326 } 327 328 // SplitCNFItems splits CNF items. 329 // CNF means conjunctive normal form, such as: "a and b and c". 330 func SplitCNFItems(onExpr *Expr) []*Expr { 331 return splitNormalFormItems(onExpr, "and") 332 } 333 334 // SplitDNFItems splits DNF items. 335 // DNF means disjunctive normal form, such as: "a or b or c". 336 func SplitDNFItems(onExpr *Expr) []*Expr { 337 return splitNormalFormItems(onExpr, "or") 338 } 339 340 // splitNormalFormItems split CNF(conjunctive normal form) like "a and b and c", or DNF(disjunctive normal form) like "a or b or c" 341 func splitNormalFormItems(onExpr *Expr, funcName string) []*Expr { 342 // nolint: revive 343 switch v := onExpr.Expr.(type) { 344 case *plan.Expr_F: 345 if v.F.Func.ObjName == funcName { 346 var ret []*Expr 347 for _, arg := range v.F.GetArgs() { 348 ret = append(ret, splitNormalFormItems(arg, funcName)...) 349 } 350 return ret 351 } 352 } 353 return []*Expr{onExpr} 354 } 355 356 // ---------------------------------------------------------------------------------------------------------------------- 357 // extract column equivalent pairs from a equality comparison expression 358 func extractColEqValFromEqualExpr(expr *plan.Expr, colEqValMap map[string]*plan.Expr) { 359 switch exprImpl := expr.Expr.(type) { 360 case *plan.Expr_F: 361 if exprImpl.F.Func.ObjName == "=" { 362 if isColExpr(exprImpl.F.Args[0]) { 363 exprCol := exprImpl.F.Args[0].Expr.(*plan.Expr_Col) 364 colEqValMap[exprCol.Col.Name] = exprImpl.F.Args[1] 365 } else if isColExpr(exprImpl.F.Args[1]) { 366 exprCol := exprImpl.F.Args[1].Expr.(*plan.Expr_Col) 367 colEqValMap[exprCol.Col.Name] = exprImpl.F.Args[0] 368 } 369 } 370 } 371 } 372 373 // extract column equivalent pairs from some expressions 374 func extractColEqValFromExprs(cnfExprs []*Expr, partKeysMap map[string]int) (bool, map[string]*plan.Expr) { 375 colEqValMap := make(map[string]*plan.Expr) 376 for i := range cnfExprs { 377 switch exprImpl := cnfExprs[i].Expr.(type) { 378 case *plan.Expr_F: 379 if exprImpl.F.Func.ObjName == "=" { 380 if isColExpr(exprImpl.F.Args[0]) && isConstExpr(exprImpl.F.Args[1]) { 381 exprCol := exprImpl.F.Args[0].Expr.(*plan.Expr_Col) 382 if _, ok := partKeysMap[exprCol.Col.Name]; ok { 383 colEqValMap[exprCol.Col.Name] = exprImpl.F.Args[1] 384 } 385 } else if isConstExpr(exprImpl.F.Args[0]) && isColExpr(exprImpl.F.Args[1]) { 386 exprCol := exprImpl.F.Args[0].Expr.(*plan.Expr_Col) 387 if _, ok := partKeysMap[exprCol.Col.Name]; ok { 388 colEqValMap[exprCol.Col.Name] = exprImpl.F.Args[0] 389 } 390 } else { 391 continue 392 } 393 } else { 394 if checkExprContainPartitionKey(cnfExprs[i], partKeysMap) { 395 return false, nil 396 } 397 } 398 default: 399 if checkExprContainPartitionKey(cnfExprs[i], partKeysMap) { 400 return false, nil 401 } 402 } 403 } 404 return true, colEqValMap 405 } 406 407 // Extract columns used in partition expressions 408 func extractColumnsFromExpression(expr *plan.Expr, usedColumns map[string]int) { 409 switch e := expr.Expr.(type) { 410 case *plan.Expr_Col: 411 if v, ok := usedColumns[e.Col.Name]; ok { 412 usedColumns[e.Col.Name] = v + 1 413 } else { 414 usedColumns[e.Col.Name] = 1 415 } 416 case *plan.Expr_F: 417 for _, args := range e.F.Args { 418 extractColumnsFromExpression(args, usedColumns) 419 } 420 case *plan.Expr_List: 421 for _, exprl := range e.List.List { 422 extractColumnsFromExpression(exprl, usedColumns) 423 } 424 default: 425 return 426 } 427 } 428 429 // Check if all column equivalence expressions contain all partition keys 430 func exprColsIncludePartitionKeys(partitionKeys map[string]int, exprCols map[string]*plan.Expr) bool { 431 for key := range partitionKeys { 432 if !keyIsInExprCols(key, exprCols) { 433 return false 434 } 435 } 436 return true 437 } 438 439 func keyIsInExprCols(key string, exprCols map[string]*plan.Expr) bool { 440 for c1 := range exprCols { 441 if strings.EqualFold(key, c1) { 442 return true 443 } 444 } 445 return false 446 } 447 448 func isAllSimpleExpr(exprs []*Expr) bool { 449 for _, expr := range exprs { 450 if !isSimpleExpr(expr) { 451 return false 452 } 453 } 454 return true 455 } 456 457 // checkExprContainPartitionKey Check if the expression contains partitioning keys 458 func checkExprContainPartitionKey(expr *Expr, partitionKeys map[string]int) bool { 459 switch v := expr.Expr.(type) { 460 case *plan.Expr_Col: 461 if _, ok := partitionKeys[v.Col.Name]; ok { 462 return true 463 } 464 case *plan.Expr_F: 465 for _, arg := range v.F.GetArgs() { 466 if checkExprContainPartitionKey(arg, partitionKeys) { 467 return true 468 } 469 } 470 } 471 return false 472 } 473 474 func isAllColEqualConstExpr(exprs []*Expr) bool { 475 for _, expr := range exprs { 476 if !isExprColRefEqualConst(expr) { 477 return false 478 } 479 } 480 return true 481 } 482 483 func isAllLogicExpr(exprs []*Expr, funcName string) bool { 484 for _, expr := range exprs { 485 if !isLogicExpr(expr, funcName) { 486 return false 487 } 488 } 489 return true 490 } 491 492 func isSimpleExpr(expr *Expr) bool { 493 switch exprImpl := expr.Expr.(type) { 494 case *plan.Expr_F: 495 for _, arg := range exprImpl.F.Args { 496 if !isFactorExpr(arg) { 497 return false 498 } 499 } 500 } 501 return true 502 } 503 504 func isFactorExpr(expr *Expr) bool { 505 switch exprImpl := expr.Expr.(type) { 506 case *plan.Expr_Col, *plan.Expr_Lit, *plan.Expr_Max, *plan.Expr_T: 507 return true 508 case *plan.Expr_F: 509 if exprImpl.F.Func.ObjName == "cast" { 510 return isFactorExpr(exprImpl.F.Args[0]) 511 } else { 512 return false 513 } 514 default: 515 return false 516 } 517 } 518 519 func isLogicExpr(expr *Expr, funcName string) bool { 520 switch exprImpl := expr.Expr.(type) { 521 case *plan.Expr_F: 522 if exprImpl.F.Func.ObjName == funcName { 523 return true 524 } 525 } 526 return false 527 } 528 529 func isExprColRefEqualConst(expr *plan.Expr) bool { 530 switch exprImpl := expr.Expr.(type) { 531 case *plan.Expr_F: 532 if exprImpl.F.Func.ObjName == "=" { 533 if isColExpr(exprImpl.F.Args[0]) && isConstExpr(exprImpl.F.Args[1]) || 534 isConstExpr(exprImpl.F.Args[0]) && isColExpr(exprImpl.F.Args[1]) { 535 return true 536 } 537 } 538 } 539 return false 540 } 541 542 func isColExpr(expr *plan.Expr) bool { 543 switch expr.Expr.(type) { 544 case *plan.Expr_Col: 545 return true 546 } 547 return false 548 } 549 550 func isConstExpr(expr *plan.Expr) bool { 551 switch expr.Expr.(type) { 552 case *plan.Expr_Lit: 553 return true 554 } 555 return false 556 }