github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/plan/utils.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package plan 16 17 import ( 18 "container/list" 19 "context" 20 "encoding/csv" 21 "math" 22 "path" 23 "strings" 24 25 "github.com/matrixorigin/matrixone/pkg/vm/process" 26 27 "github.com/matrixorigin/matrixone/pkg/common/moerr" 28 "github.com/matrixorigin/matrixone/pkg/common/mpool" 29 "github.com/matrixorigin/matrixone/pkg/container/batch" 30 "github.com/matrixorigin/matrixone/pkg/container/types" 31 "github.com/matrixorigin/matrixone/pkg/container/vector" 32 "github.com/matrixorigin/matrixone/pkg/fileservice" 33 "github.com/matrixorigin/matrixone/pkg/pb/plan" 34 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 35 "github.com/matrixorigin/matrixone/pkg/sql/parsers/dialect" 36 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 37 "github.com/matrixorigin/matrixone/pkg/sql/plan/function" 38 "github.com/matrixorigin/matrixone/pkg/sql/plan/rule" 39 ) 40 41 func GetBindings(expr *plan.Expr) []int32 { 42 bindingSet := doGetBindings(expr) 43 bindings := make([]int32, 0, len(bindingSet)) 44 for id := range bindingSet { 45 bindings = append(bindings, id) 46 } 47 return bindings 48 } 49 50 func doGetBindings(expr *plan.Expr) map[int32]any { 51 res := make(map[int32]any) 52 53 switch expr := expr.Expr.(type) { 54 case *plan.Expr_Col: 55 res[expr.Col.RelPos] = nil 56 57 case *plan.Expr_F: 58 for _, child := range expr.F.Args { 59 for id := range doGetBindings(child) { 60 res[id] = nil 61 } 62 } 63 } 64 65 return res 66 } 67 68 func hasCorrCol(expr *plan.Expr) bool { 69 switch exprImpl := expr.Expr.(type) { 70 case *plan.Expr_Corr: 71 return true 72 73 case *plan.Expr_F: 74 ret := false 75 for _, arg := range exprImpl.F.Args { 76 ret = ret || hasCorrCol(arg) 77 } 78 return ret 79 80 default: 81 return false 82 } 83 } 84 85 func decreaseDepthAndDispatch(preds []*plan.Expr) ([]*plan.Expr, []*plan.Expr) { 86 filterPreds := make([]*plan.Expr, 0, len(preds)) 87 joinPreds := make([]*plan.Expr, 0, len(preds)) 88 89 for _, pred := range preds { 90 newPred, correlated := decreaseDepth(pred) 91 if !correlated { 92 joinPreds = append(joinPreds, newPred) 93 continue 94 } 95 filterPreds = append(filterPreds, newPred) 96 } 97 98 return filterPreds, joinPreds 99 } 100 101 func decreaseDepth(expr *plan.Expr) (*plan.Expr, bool) { 102 var correlated bool 103 104 switch exprImpl := expr.Expr.(type) { 105 case *plan.Expr_Corr: 106 if exprImpl.Corr.Depth > 1 { 107 exprImpl.Corr.Depth-- 108 correlated = true 109 } else { 110 expr.Expr = &plan.Expr_Col{ 111 Col: &plan.ColRef{ 112 RelPos: exprImpl.Corr.RelPos, 113 ColPos: exprImpl.Corr.ColPos, 114 }, 115 } 116 } 117 118 case *plan.Expr_F: 119 var tmp bool 120 for i, arg := range exprImpl.F.Args { 121 exprImpl.F.Args[i], tmp = decreaseDepth(arg) 122 correlated = correlated || tmp 123 } 124 } 125 126 return expr, correlated 127 } 128 129 func getJoinSide(expr *plan.Expr, leftTags, rightTags map[int32]*Binding, markTag int32) (side int8) { 130 switch exprImpl := expr.Expr.(type) { 131 case *plan.Expr_F: 132 for _, arg := range exprImpl.F.Args { 133 side |= getJoinSide(arg, leftTags, rightTags, markTag) 134 } 135 136 case *plan.Expr_Col: 137 if _, ok := leftTags[exprImpl.Col.RelPos]; ok { 138 side = JoinSideLeft 139 } else if _, ok := rightTags[exprImpl.Col.RelPos]; ok { 140 side = JoinSideRight 141 } else if exprImpl.Col.RelPos == markTag { 142 side = JoinSideMark 143 } 144 145 case *plan.Expr_Corr: 146 side = JoinSideCorrelated 147 } 148 149 return 150 } 151 152 func containsTag(expr *plan.Expr, tag int32) bool { 153 var ret bool 154 155 switch exprImpl := expr.Expr.(type) { 156 case *plan.Expr_F: 157 for _, arg := range exprImpl.F.Args { 158 ret = ret || containsTag(arg, tag) 159 } 160 161 case *plan.Expr_Col: 162 return exprImpl.Col.RelPos == tag 163 } 164 165 return ret 166 } 167 168 func replaceColRefs(expr *plan.Expr, tag int32, projects []*plan.Expr) *plan.Expr { 169 switch exprImpl := expr.Expr.(type) { 170 case *plan.Expr_F: 171 for i, arg := range exprImpl.F.Args { 172 exprImpl.F.Args[i] = replaceColRefs(arg, tag, projects) 173 } 174 175 case *plan.Expr_Col: 176 colRef := exprImpl.Col 177 if colRef.RelPos == tag { 178 expr = DeepCopyExpr(projects[colRef.ColPos]) 179 } 180 } 181 182 return expr 183 } 184 185 func replaceColRefsForSet(expr *plan.Expr, projects []*plan.Expr) *plan.Expr { 186 switch exprImpl := expr.Expr.(type) { 187 case *plan.Expr_F: 188 for i, arg := range exprImpl.F.Args { 189 exprImpl.F.Args[i] = replaceColRefsForSet(arg, projects) 190 } 191 192 case *plan.Expr_Col: 193 expr = DeepCopyExpr(projects[exprImpl.Col.ColPos]) 194 } 195 196 return expr 197 } 198 199 func splitAndBindCondition(astExpr tree.Expr, ctx *BindContext) ([]*plan.Expr, error) { 200 conds := splitAstConjunction(astExpr) 201 exprs := make([]*plan.Expr, len(conds)) 202 203 for i, cond := range conds { 204 cond, err := ctx.qualifyColumnNames(cond, nil, false) 205 if err != nil { 206 return nil, err 207 } 208 209 expr, err := ctx.binder.BindExpr(cond, 0, true) 210 if err != nil { 211 return nil, err 212 } 213 // expr must be bool type, if not, try to do type convert 214 // but just ignore the subQuery. It will be solved at optimizer. 215 if expr.GetSub() == nil { 216 expr, err = makePlan2CastExpr(ctx.binder.GetContext(), expr, &plan.Type{Id: int32(types.T_bool)}) 217 if err != nil { 218 return nil, err 219 } 220 } 221 exprs[i] = expr 222 } 223 224 return exprs, nil 225 } 226 227 // splitAstConjunction split a expression to a list of AND conditions. 228 func splitAstConjunction(astExpr tree.Expr) []tree.Expr { 229 var astExprs []tree.Expr 230 switch typ := astExpr.(type) { 231 case nil: 232 case *tree.AndExpr: 233 astExprs = append(astExprs, splitAstConjunction(typ.Left)...) 234 astExprs = append(astExprs, splitAstConjunction(typ.Right)...) 235 case *tree.ParenExpr: 236 astExprs = append(astExprs, splitAstConjunction(typ.Expr)...) 237 default: 238 astExprs = append(astExprs, astExpr) 239 } 240 return astExprs 241 } 242 243 // applyDistributivity (X AND B) OR (X AND C) OR (X AND D) => X AND (B OR C OR D) 244 // TODO: move it into optimizer 245 func applyDistributivity(ctx context.Context, expr *plan.Expr) *plan.Expr { 246 switch exprImpl := expr.Expr.(type) { 247 case *plan.Expr_F: 248 for i, arg := range exprImpl.F.Args { 249 exprImpl.F.Args[i] = applyDistributivity(ctx, arg) 250 } 251 252 if exprImpl.F.Func.ObjName != "or" { 253 break 254 } 255 256 leftConds := splitPlanConjunction(exprImpl.F.Args[0]) 257 rightConds := splitPlanConjunction(exprImpl.F.Args[1]) 258 259 condMap := make(map[string]int) 260 261 for _, cond := range rightConds { 262 condMap[cond.String()] = JoinSideRight 263 } 264 265 var commonConds, leftOnlyConds, rightOnlyConds []*plan.Expr 266 267 for _, cond := range leftConds { 268 exprStr := cond.String() 269 270 if condMap[exprStr] == JoinSideRight { 271 commonConds = append(commonConds, cond) 272 condMap[exprStr] = JoinSideBoth 273 } else { 274 leftOnlyConds = append(leftOnlyConds, cond) 275 condMap[exprStr] = JoinSideLeft 276 } 277 } 278 279 for _, cond := range rightConds { 280 if condMap[cond.String()] == JoinSideRight { 281 rightOnlyConds = append(rightOnlyConds, cond) 282 } 283 } 284 285 if len(commonConds) == 0 { 286 return expr 287 } 288 289 expr, _ = combinePlanConjunction(ctx, commonConds) 290 291 if len(leftOnlyConds) == 0 || len(rightOnlyConds) == 0 { 292 return expr 293 } 294 295 leftExpr, _ := combinePlanConjunction(ctx, leftOnlyConds) 296 rightExpr, _ := combinePlanConjunction(ctx, rightOnlyConds) 297 298 leftExpr, _ = bindFuncExprImplByPlanExpr(ctx, "or", []*plan.Expr{leftExpr, rightExpr}) 299 300 expr, _ = bindFuncExprImplByPlanExpr(ctx, "and", []*plan.Expr{expr, leftExpr}) 301 } 302 303 return expr 304 } 305 306 func unionSlice(left, right []string) []string { 307 if len(left) < 1 { 308 return right 309 } 310 if len(right) < 1 { 311 return left 312 } 313 m := make(map[string]bool, len(left)+len(right)) 314 for _, s := range left { 315 m[s] = true 316 } 317 for _, s := range right { 318 m[s] = true 319 } 320 ret := make([]string, 0) 321 for s := range m { 322 ret = append(ret, s) 323 } 324 return ret 325 } 326 327 func intersectSlice(left, right []string) []string { 328 if len(left) < 1 || len(right) < 1 { 329 return left 330 } 331 m := make(map[string]bool, len(left)+len(right)) 332 for _, s := range left { 333 m[s] = true 334 } 335 ret := make([]string, 0) 336 for _, s := range right { 337 if _, ok := m[s]; ok { 338 ret = append(ret, s) 339 } 340 } 341 return ret 342 } 343 344 /* 345 DNF means disjunctive normal form, for example (a and b) or (c and d) or (e and f) 346 if we have a DNF filter, for example (c1=1 and c2=1) or (c1=2 and c2=2) 347 we can have extra filter: (c1=1 or c1=2) and (c2=1 or c2=2), which can be pushed down to optimize join 348 349 checkDNF scan the expr and return all groups of cond 350 for example (c1=1 and c2=1) or (c1=2 and c3=2), c1 is a group because it appears in all disjunctives 351 and c2,c3 is not a group 352 353 walkThroughDNF accept a keyword string, walk through the expr, 354 and extract all the conds which contains the keyword 355 */ 356 func checkDNF(expr *plan.Expr) []string { 357 var ret []string 358 switch exprImpl := expr.Expr.(type) { 359 case *plan.Expr_F: 360 if exprImpl.F.Func.ObjName == "or" { 361 left := checkDNF(exprImpl.F.Args[0]) 362 right := checkDNF(exprImpl.F.Args[1]) 363 return intersectSlice(left, right) 364 } 365 for _, arg := range exprImpl.F.Args { 366 ret = unionSlice(ret, checkDNF(arg)) 367 } 368 return ret 369 370 case *plan.Expr_Corr: 371 ret = append(ret, exprImpl.Corr.String()) 372 case *plan.Expr_Col: 373 ret = append(ret, exprImpl.Col.String()) 374 } 375 return ret 376 } 377 378 func walkThroughDNF(ctx context.Context, expr *plan.Expr, keywords string) *plan.Expr { 379 var retExpr *plan.Expr 380 switch exprImpl := expr.Expr.(type) { 381 case *plan.Expr_F: 382 if exprImpl.F.Func.ObjName == "or" { 383 left := walkThroughDNF(ctx, exprImpl.F.Args[0], keywords) 384 right := walkThroughDNF(ctx, exprImpl.F.Args[1], keywords) 385 if left != nil && right != nil { 386 retExpr, _ = bindFuncExprImplByPlanExpr(ctx, "or", []*plan.Expr{left, right}) 387 return retExpr 388 } 389 } else if exprImpl.F.Func.ObjName == "and" { 390 left := walkThroughDNF(ctx, exprImpl.F.Args[0], keywords) 391 right := walkThroughDNF(ctx, exprImpl.F.Args[1], keywords) 392 if left == nil { 393 return right 394 } else if right == nil { 395 return left 396 } else { 397 retExpr, _ = bindFuncExprImplByPlanExpr(ctx, "and", []*plan.Expr{left, right}) 398 return retExpr 399 } 400 } else { 401 for _, arg := range exprImpl.F.Args { 402 if walkThroughDNF(ctx, arg, keywords) == nil { 403 return nil 404 } 405 } 406 return expr 407 } 408 409 case *plan.Expr_Corr: 410 if exprImpl.Corr.String() == keywords { 411 return expr 412 } else { 413 return nil 414 } 415 case *plan.Expr_Col: 416 if exprImpl.Col.String() == keywords { 417 return expr 418 } else { 419 return nil 420 } 421 } 422 return expr 423 } 424 425 // deduction of new predicates. for example join on a=b where b=1, then a=1 can be deduced 426 func predsDeduction(filters, onList []*plan.Expr) []*plan.Expr { 427 var newFilters []*plan.Expr 428 for _, onPred := range onList { 429 ret, col1, col2 := checkOnPred(onPred) 430 if !ret { 431 continue 432 } 433 for _, filter := range filters { 434 ret, col := CheckFilter(filter) 435 if ret && col != nil { 436 newExpr := DeepCopyExpr(filter) 437 if substituteMatchColumn(newExpr, col1, col2) { 438 newFilters = append(newFilters, newExpr) 439 } 440 } 441 } 442 } 443 return newFilters 444 } 445 446 // for predicate deduction, filter must be like func(col)>1 , or (col=1) or (col=2) 447 // and only 1 colRef is allowd in the filter 448 func CheckFilter(expr *plan.Expr) (bool, *ColRef) { 449 switch exprImpl := expr.Expr.(type) { 450 case *plan.Expr_F: 451 switch exprImpl.F.Func.ObjName { 452 case "=", ">", "<", ">=", "<=": 453 switch exprImpl.F.Args[1].Expr.(type) { 454 case *plan.Expr_C: 455 return CheckFilter(exprImpl.F.Args[0]) 456 default: 457 return false, nil 458 } 459 default: 460 var col *ColRef 461 for _, arg := range exprImpl.F.Args { 462 ret, c := CheckFilter(arg) 463 if !ret { 464 return false, nil 465 } else if c != nil { 466 if col != nil { 467 if col.RelPos != c.RelPos || col.ColPos != c.ColPos { 468 return false, nil 469 } 470 } else { 471 col = c 472 } 473 } 474 } 475 return true, col 476 } 477 case *plan.Expr_Col: 478 return true, exprImpl.Col 479 } 480 return false, nil 481 } 482 483 func substituteMatchColumn(expr *plan.Expr, onPredCol1, onPredCol2 *ColRef) bool { 484 var ret bool 485 switch exprImpl := expr.Expr.(type) { 486 case *plan.Expr_Col: 487 colName := exprImpl.Col.String() 488 if colName == onPredCol1.String() { 489 exprImpl.Col.RelPos = onPredCol2.RelPos 490 exprImpl.Col.ColPos = onPredCol2.ColPos 491 exprImpl.Col.Name = onPredCol2.Name 492 return true 493 } else if colName == onPredCol2.String() { 494 exprImpl.Col.RelPos = onPredCol1.RelPos 495 exprImpl.Col.ColPos = onPredCol1.ColPos 496 exprImpl.Col.Name = onPredCol1.Name 497 return true 498 } 499 case *plan.Expr_F: 500 for _, arg := range exprImpl.F.Args { 501 if substituteMatchColumn(arg, onPredCol1, onPredCol2) { 502 ret = true 503 } 504 } 505 } 506 return ret 507 } 508 509 func checkOnPred(onPred *plan.Expr) (bool, *ColRef, *ColRef) { 510 //onPred must be equality, children must be column name 511 switch onPredImpl := onPred.Expr.(type) { 512 case *plan.Expr_F: 513 if onPredImpl.F.Func.ObjName != "=" { 514 return false, nil, nil 515 } 516 args := onPredImpl.F.Args 517 var col1, col2 *ColRef 518 switch child1 := args[0].Expr.(type) { 519 case *plan.Expr_Col: 520 col1 = child1.Col 521 } 522 switch child2 := args[1].Expr.(type) { 523 case *plan.Expr_Col: 524 col2 = child2.Col 525 } 526 if col1 != nil && col2 != nil { 527 return true, col1, col2 528 } 529 } 530 return false, nil, nil 531 } 532 533 func splitPlanConjunction(expr *plan.Expr) []*plan.Expr { 534 var exprs []*plan.Expr 535 switch exprImpl := expr.Expr.(type) { 536 case *plan.Expr_F: 537 if exprImpl.F.Func.ObjName == "and" { 538 exprs = append(exprs, splitPlanConjunction(exprImpl.F.Args[0])...) 539 exprs = append(exprs, splitPlanConjunction(exprImpl.F.Args[1])...) 540 } else { 541 exprs = append(exprs, expr) 542 } 543 544 default: 545 exprs = append(exprs, expr) 546 } 547 548 return exprs 549 } 550 551 func combinePlanConjunction(ctx context.Context, exprs []*plan.Expr) (expr *plan.Expr, err error) { 552 expr = exprs[0] 553 554 for i := 1; i < len(exprs); i++ { 555 expr, err = bindFuncExprImplByPlanExpr(ctx, "and", []*plan.Expr{expr, exprs[i]}) 556 557 if err != nil { 558 break 559 } 560 } 561 562 return 563 } 564 565 func rejectsNull(filter *plan.Expr, proc *process.Process) bool { 566 filter = replaceColRefWithNull(DeepCopyExpr(filter)) 567 568 bat := batch.NewWithSize(0) 569 bat.Zs = []int64{1} 570 filter, err := ConstantFold(bat, filter, proc) 571 if err != nil { 572 return false 573 } 574 575 if f, ok := filter.Expr.(*plan.Expr_C); ok { 576 if f.C.Isnull { 577 return true 578 } 579 580 if fbool, ok := f.C.Value.(*plan.Const_Bval); ok { 581 return !fbool.Bval 582 } 583 } 584 585 return false 586 } 587 588 func replaceColRefWithNull(expr *plan.Expr) *plan.Expr { 589 switch exprImpl := expr.Expr.(type) { 590 case *plan.Expr_Col: 591 expr = &plan.Expr{ 592 Typ: expr.Typ, 593 Expr: &plan.Expr_C{ 594 C: &plan.Const{ 595 Isnull: true, 596 }, 597 }, 598 } 599 600 case *plan.Expr_F: 601 for i, arg := range exprImpl.F.Args { 602 exprImpl.F.Args[i] = replaceColRefWithNull(arg) 603 } 604 } 605 606 return expr 607 } 608 609 func increaseRefCnt(expr *plan.Expr, colRefCnt map[[2]int32]int) { 610 switch exprImpl := expr.Expr.(type) { 611 case *plan.Expr_Col: 612 colRefCnt[[2]int32{exprImpl.Col.RelPos, exprImpl.Col.ColPos}]++ 613 614 case *plan.Expr_F: 615 for _, arg := range exprImpl.F.Args { 616 increaseRefCnt(arg, colRefCnt) 617 } 618 } 619 } 620 621 func decreaseRefCnt(expr *plan.Expr, colRefCnt map[[2]int32]int) { 622 switch exprImpl := expr.Expr.(type) { 623 case *plan.Expr_Col: 624 colRefCnt[[2]int32{exprImpl.Col.RelPos, exprImpl.Col.ColPos}]-- 625 626 case *plan.Expr_F: 627 for _, arg := range exprImpl.F.Args { 628 decreaseRefCnt(arg, colRefCnt) 629 } 630 } 631 } 632 633 func getHyperEdgeFromExpr(expr *plan.Expr, leafByTag map[int32]int32, hyperEdge map[int32]any) { 634 switch exprImpl := expr.Expr.(type) { 635 case *plan.Expr_Col: 636 hyperEdge[leafByTag[exprImpl.Col.RelPos]] = nil 637 638 case *plan.Expr_F: 639 for _, arg := range exprImpl.F.Args { 640 getHyperEdgeFromExpr(arg, leafByTag, hyperEdge) 641 } 642 } 643 } 644 645 func getNumOfCharacters(str string) int { 646 strRune := []rune(str) 647 return len(strRune) 648 } 649 650 func getUnionSelects(ctx context.Context, stmt *tree.UnionClause, selects *[]tree.Statement, unionTypes *[]plan.Node_NodeType) error { 651 switch leftStmt := stmt.Left.(type) { 652 case *tree.UnionClause: 653 err := getUnionSelects(ctx, leftStmt, selects, unionTypes) 654 if err != nil { 655 return err 656 } 657 case *tree.SelectClause: 658 *selects = append(*selects, leftStmt) 659 case *tree.ParenSelect: 660 *selects = append(*selects, leftStmt.Select) 661 default: 662 return moerr.NewParseError(ctx, "unexpected statement in union: '%v'", tree.String(leftStmt, dialect.MYSQL)) 663 } 664 665 // right is not UNION allways 666 switch rightStmt := stmt.Right.(type) { 667 case *tree.SelectClause: 668 if stmt.Type == tree.UNION && !stmt.All { 669 rightStr := tree.String(rightStmt, dialect.MYSQL) 670 if len(*selects) == 1 && tree.String((*selects)[0], dialect.MYSQL) == rightStr { 671 return nil 672 } 673 } 674 675 *selects = append(*selects, rightStmt) 676 case *tree.ParenSelect: 677 if stmt.Type == tree.UNION && !stmt.All { 678 rightStr := tree.String(rightStmt.Select, dialect.MYSQL) 679 if len(*selects) == 1 && tree.String((*selects)[0], dialect.MYSQL) == rightStr { 680 return nil 681 } 682 } 683 684 *selects = append(*selects, rightStmt.Select) 685 default: 686 return moerr.NewParseError(ctx, "unexpected statement in union2: '%v'", tree.String(rightStmt, dialect.MYSQL)) 687 } 688 689 switch stmt.Type { 690 case tree.UNION: 691 if stmt.All { 692 *unionTypes = append(*unionTypes, plan.Node_UNION_ALL) 693 } else { 694 *unionTypes = append(*unionTypes, plan.Node_UNION) 695 } 696 case tree.INTERSECT: 697 if stmt.All { 698 *unionTypes = append(*unionTypes, plan.Node_INTERSECT_ALL) 699 } else { 700 *unionTypes = append(*unionTypes, plan.Node_INTERSECT) 701 } 702 case tree.EXCEPT, tree.UT_MINUS: 703 if stmt.All { 704 return moerr.NewNYI(ctx, "EXCEPT/MINUS ALL clause") 705 } else { 706 *unionTypes = append(*unionTypes, plan.Node_MINUS) 707 } 708 } 709 return nil 710 } 711 712 func containsParamRef(expr *plan.Expr) bool { 713 var ret bool 714 switch exprImpl := expr.Expr.(type) { 715 case *plan.Expr_F: 716 for _, arg := range exprImpl.F.Args { 717 ret = ret || containsParamRef(arg) 718 } 719 case *plan.Expr_P: 720 return true 721 } 722 return ret 723 } 724 725 func getColumnMapByExpr(expr *plan.Expr, tableDef *plan.TableDef, columnMap *map[int]int) { 726 if expr == nil { 727 return 728 } 729 switch exprImpl := expr.Expr.(type) { 730 case *plan.Expr_F: 731 for _, arg := range exprImpl.F.Args { 732 getColumnMapByExpr(arg, tableDef, columnMap) 733 } 734 735 case *plan.Expr_Col: 736 idx := exprImpl.Col.ColPos 737 colName := exprImpl.Col.Name 738 dotIdx := strings.Index(colName, ".") 739 colName = colName[dotIdx+1:] 740 colIdx := tableDef.Name2ColIndex[colName] 741 (*columnMap)[int(idx)] = int(colIdx) 742 } 743 } 744 745 func GetColumnsByExpr(expr *plan.Expr, tableDef *plan.TableDef) (map[int]int, []int, int) { 746 columnMap := make(map[int]int) 747 // key = expr's ColPos, value = tableDef's ColPos 748 getColumnMapByExpr(expr, tableDef, &columnMap) 749 750 maxCol := 0 751 useColumn := len(columnMap) 752 columns := make([]int, useColumn) 753 i := 0 754 for k, v := range columnMap { 755 if k > maxCol { 756 maxCol = k 757 } 758 columns[i] = v //tableDef's ColPos 759 i = i + 1 760 } 761 return columnMap, columns, maxCol 762 } 763 764 func EvalFilterExpr(ctx context.Context, expr *plan.Expr, bat *batch.Batch, proc *process.Process) (bool, error) { 765 if len(bat.Vecs) == 0 { //that's constant expr 766 e, err := ConstantFold(bat, expr, proc) 767 if err != nil { 768 return false, err 769 } 770 771 if cExpr, ok := e.Expr.(*plan.Expr_C); ok { 772 if bVal, bOk := cExpr.C.Value.(*plan.Const_Bval); bOk { 773 return bVal.Bval, nil 774 } 775 } 776 return false, moerr.NewInternalError(ctx, "cannot eval filter expr") 777 } else { 778 vec, err := colexec.EvalExprByZonemapBat(ctx, bat, proc, expr) 779 if err != nil { 780 return false, err 781 } 782 if vec.Typ.Oid != types.T_bool { 783 return false, moerr.NewInternalError(ctx, "cannot eval filter expr") 784 } 785 cols := vector.MustTCols[bool](vec) 786 for _, isNeed := range cols { 787 if isNeed { 788 return true, nil 789 } 790 } 791 return false, nil 792 } 793 } 794 795 func exchangeVectors(datas [][2]any, depth int, tmpResult []any, result *[]*vector.Vector, mp *mpool.MPool) { 796 for i := 0; i < len(datas[depth]); i++ { 797 tmpResult[depth] = datas[depth][i] 798 if depth != len(datas)-1 { 799 exchangeVectors(datas, depth+1, tmpResult, result, mp) 800 } else { 801 for j, val := range tmpResult { 802 (*result)[j].Append(val, false, mp) 803 } 804 } 805 } 806 } 807 808 func BuildVectorsByData(datas [][2]any, dataTypes []uint8, mp *mpool.MPool) []*vector.Vector { 809 vectors := make([]*vector.Vector, len(dataTypes)) 810 for i, typ := range dataTypes { 811 vectors[i] = vector.New(types.T(typ).ToType()) 812 } 813 814 tmpResult := make([]any, len(datas)) 815 exchangeVectors(datas, 0, tmpResult, &vectors, mp) 816 817 return vectors 818 } 819 820 func CheckExprIsMonotonic(ctx context.Context, expr *plan.Expr) bool { 821 if expr == nil { 822 return false 823 } 824 switch exprImpl := expr.Expr.(type) { 825 case *plan.Expr_F: 826 for _, arg := range exprImpl.F.Args { 827 isMonotonic := CheckExprIsMonotonic(ctx, arg) 828 if !isMonotonic { 829 return false 830 } 831 } 832 833 isMonotonic, _ := function.GetFunctionIsMonotonicById(ctx, exprImpl.F.Func.GetObj()) 834 if !isMonotonic { 835 return false 836 } 837 838 return true 839 default: 840 return true 841 } 842 } 843 844 // handle the filter list for zonemap. rewrite and constFold 845 func HandleFiltersForZM(exprList []*plan.Expr, proc *process.Process) *plan.Expr { 846 if proc == nil || proc.Ctx == nil { 847 return nil 848 } 849 var newExprList []*plan.Expr 850 bat := batch.NewWithSize(0) 851 bat.Zs = []int64{1} 852 for _, expr := range exprList { 853 tmpexpr, _ := ConstantFold(bat, DeepCopyExpr(expr), proc) 854 if tmpexpr != nil { 855 expr = tmpexpr 856 } 857 if !containsParamRef(expr) && CheckExprIsMonotonic(proc.Ctx, expr) { 858 newExprList = append(newExprList, expr) 859 } 860 } 861 e := colexec.RewriteFilterExprList(newExprList) 862 return e 863 } 864 865 func ConstantFold(bat *batch.Batch, e *plan.Expr, proc *process.Process) (*plan.Expr, error) { 866 var err error 867 868 ef, ok := e.Expr.(*plan.Expr_F) 869 if !ok { 870 return e, nil 871 } 872 overloadID := ef.F.Func.GetObj() 873 f, err := function.GetFunctionByID(proc.Ctx, overloadID) 874 if err != nil { 875 return nil, err 876 } 877 if f.Volatile { // function cannot be fold 878 return e, nil 879 } 880 for i := range ef.F.Args { 881 ef.F.Args[i], err = ConstantFold(bat, ef.F.Args[i], proc) 882 if err != nil { 883 return nil, err 884 } 885 } 886 if !rule.IsConstant(e) { 887 return e, nil 888 } 889 vec, err := colexec.EvalExpr(bat, proc, e) 890 if err != nil { 891 return nil, err 892 } 893 c := rule.GetConstantValue(vec, false) 894 vec.Free(proc.Mp()) 895 if c == nil { 896 return e, nil 897 } 898 ec := &plan.Expr_C{ 899 C: c, 900 } 901 e.Expr = ec 902 return e, nil 903 } 904 905 func rewriteTableFunction(tblFunc *tree.TableFunction, leftCtx *BindContext) error { 906 //var err error 907 //newTableAliasMap := make(map[string]string) 908 //newColAliasMap := make(map[string]string) 909 //col2Table := make(map[string]string) 910 //for i := range tblFunc.SelectStmt.Select.(*tree.SelectClause).From.Tables { 911 // alias := string(tblFunc.SelectStmt.Select.(*tree.SelectClause).From.Tables[i].(*tree.AliasedTableExpr).As.Alias) 912 // if len(alias) == 0 { 913 // alias = string(tblFunc.SelectStmt.Select.(*tree.SelectClause).From.Tables[i].(*tree.AliasedTableExpr).Expr.(*tree.TableName).ObjectName) 914 // } 915 // newAlias := fmt.Sprintf("%s_tbl_%d", alias, i) 916 // tblFunc.SelectStmt.Select.(*tree.SelectClause).From.Tables[i].(*tree.AliasedTableExpr).As.Alias = tree.Identifier(newAlias) 917 // //newTableAliasMap[alias] = newAlias 918 //} 919 for i := range tblFunc.SelectStmt.Select.(*tree.SelectClause).Exprs { 920 selectExpr := tblFunc.SelectStmt.Select.(*tree.SelectClause).Exprs[i] //take care, this is not a pointer 921 expr := selectExpr.Expr.(*tree.UnresolvedName) 922 _, tableName, colName := expr.GetNames() 923 if len(tableName) == 0 { 924 if binding, ok := leftCtx.bindingByCol[colName]; ok { 925 tableName = binding.table 926 expr.Parts[1] = tableName 927 } else { 928 return moerr.NewInternalError(leftCtx.binder.GetContext(), "cannot find column '%s'", colName) 929 } 930 } 931 //newTableName = newTableAliasMap[tableName] 932 //newColAlias = fmt.Sprintf("%s_%d", colName, i) 933 //newColAliasMap[colName] = newColAlias 934 //col2Table[newColAlias] = newTableName 935 //newName, err := tree.NewUnresolvedName(newTableName, colName) 936 //if err != nil { 937 // return err 938 //} 939 //tblFunc.SelectStmt.Select.(*tree.SelectClause).Exprs[i].Expr = newName 940 //tblFunc.SelectStmt.Select.(*tree.SelectClause).Exprs[i].As = tree.UnrestrictedIdentifier(newColAlias) 941 } 942 943 //for i, _ := range tblFunc.Func.Exprs { 944 // tblFunc.Func.Exprs[i], err = rewriteTableFunctionExpr(tblFunc.Func.Exprs[i], newTableAliasMap, newColAliasMap, col2Table) 945 // if err != nil { 946 // return err 947 // } 948 //} 949 return nil 950 } 951 952 // 953 //func rewriteTableFunctionExpr(ast tree.Expr, tableAlias map[string]string, colAlias map[string]string, col2Table map[string]string) (tree.Expr, error) { 954 // var err error 955 // switch item := ast.(type) { 956 // case *tree.UnresolvedName: 957 // _, tblName, colName := item.GetNames() 958 // if len(tblName) > 0 { 959 // if alias, ok := tableAlias[tblName]; ok { 960 // item.Parts[1] = alias 961 // } 962 // } else { 963 // newColName := colAlias[colName] 964 // newTblName := col2Table[newColName] 965 // item.Parts[1] = newTblName 966 // } 967 // case *tree.FuncExpr: 968 // for i, _ := range item.Exprs { 969 // item.Exprs[i], err = rewriteTableFunctionExpr(item.Exprs[i], tableAlias, colAlias, col2Table) 970 // if err != nil { 971 // return nil, err 972 // } 973 // } 974 // case *tree.NumVal: 975 // break 976 // default: 977 // return nil, moerr.NewNotSupported("table function expr '%s' not supported", item) 978 // } 979 // return ast, nil 980 //} 981 982 // lookUpFnCols looks up the columns in the function expression 983 func lookUpFnCols(ret tree.SelectExprs, fn interface{}) tree.SelectExprs { 984 switch fnExpr := fn.(type) { //TODO add more cases 985 case *tree.UnresolvedName: 986 ret = append(ret, tree.SelectExpr{Expr: fnExpr}) 987 case *tree.FuncExpr: 988 for _, arg := range fnExpr.Exprs { 989 ret = lookUpFnCols(ret, arg) 990 } 991 case *tree.BinaryExpr: 992 ret = lookUpFnCols(ret, fnExpr.Left) 993 ret = lookUpFnCols(ret, fnExpr.Right) 994 case *tree.UnaryExpr: 995 ret = lookUpFnCols(ret, fnExpr.Expr) 996 } 997 return ret 998 } 999 func buildTableFunctionStmt(tbl *tree.TableFunction, left tree.TableExpr, leftCtx *BindContext) error { 1000 var selectExprs tree.SelectExprs 1001 selectExprs = lookUpFnCols(selectExprs, tbl.Func) 1002 tbl.SelectStmt = &tree.Select{ 1003 Select: &tree.SelectClause{ 1004 From: &tree.From{ 1005 Tables: []tree.TableExpr{left}, 1006 }, 1007 Exprs: selectExprs, 1008 }, 1009 } 1010 return rewriteTableFunction(tbl, leftCtx) 1011 } 1012 1013 func clearBinding(ctx *BindContext) { 1014 ctx.bindingByCol = make(map[string]*Binding) 1015 ctx.bindingByTable = make(map[string]*Binding) 1016 ctx.bindingByTag = make(map[int32]*Binding) 1017 ctx.bindingTree = &BindingTreeNode{} 1018 ctx.bindings = make([]*Binding, 0) 1019 } 1020 1021 func unwindTupleComparison(ctx context.Context, nonEqOp, op string, leftExprs, rightExprs []*plan.Expr, idx int) (*plan.Expr, error) { 1022 if idx == len(leftExprs)-1 { 1023 return bindFuncExprImplByPlanExpr(ctx, op, []*plan.Expr{ 1024 leftExprs[idx], 1025 rightExprs[idx], 1026 }) 1027 } 1028 1029 expr, err := bindFuncExprImplByPlanExpr(ctx, nonEqOp, []*plan.Expr{ 1030 DeepCopyExpr(leftExprs[idx]), 1031 DeepCopyExpr(rightExprs[idx]), 1032 }) 1033 if err != nil { 1034 return nil, err 1035 } 1036 1037 eqExpr, err := bindFuncExprImplByPlanExpr(ctx, "=", []*plan.Expr{ 1038 leftExprs[idx], 1039 rightExprs[idx], 1040 }) 1041 if err != nil { 1042 return nil, err 1043 } 1044 1045 tailExpr, err := unwindTupleComparison(ctx, nonEqOp, op, leftExprs, rightExprs, idx+1) 1046 if err != nil { 1047 return nil, err 1048 } 1049 1050 tailExpr, err = bindFuncExprImplByPlanExpr(ctx, "and", []*plan.Expr{eqExpr, tailExpr}) 1051 if err != nil { 1052 return nil, err 1053 } 1054 1055 return bindFuncExprImplByPlanExpr(ctx, "or", []*plan.Expr{expr, tailExpr}) 1056 } 1057 1058 // checkNoNeedCast 1059 // if constant's type higher than column's type 1060 // and constant's value in range of column's type, then no cast was needed 1061 func checkNoNeedCast(constT, columnT types.Type, constExpr *plan.Expr_C) bool { 1062 switch constT.Oid { 1063 case types.T_char, types.T_varchar, types.T_text: 1064 switch columnT.Oid { 1065 case types.T_char, types.T_varchar: 1066 if constT.Width <= columnT.Width { 1067 return true 1068 } else { 1069 return false 1070 } 1071 case types.T_text: 1072 return true 1073 default: 1074 return false 1075 } 1076 1077 case types.T_int8, types.T_int16, types.T_int32, types.T_int64: 1078 val, valOk := constExpr.C.Value.(*plan.Const_I64Val) 1079 if !valOk { 1080 return false 1081 } 1082 constVal := val.I64Val 1083 switch columnT.Oid { 1084 case types.T_int8: 1085 return constVal <= int64(math.MaxInt8) && constVal >= int64(math.MinInt8) 1086 case types.T_int16: 1087 return constVal <= int64(math.MaxInt16) && constVal >= int64(math.MinInt16) 1088 case types.T_int32: 1089 return constVal <= int64(math.MaxInt32) && constVal >= int64(math.MinInt32) 1090 case types.T_int64: 1091 return true 1092 case types.T_uint8: 1093 return constVal <= math.MaxUint8 && constVal >= 0 1094 case types.T_uint16: 1095 return constVal <= math.MaxUint16 && constVal >= 0 1096 case types.T_uint32: 1097 return constVal <= math.MaxUint32 && constVal >= 0 1098 case types.T_uint64: 1099 return constVal >= 0 1100 case types.T_varchar: 1101 return true 1102 case types.T_float32: 1103 //float32 has 6-7 significant digits. 1104 return constVal <= 100000 && constVal >= -100000 1105 default: 1106 return false 1107 } 1108 case types.T_uint8, types.T_uint16, types.T_uint32, types.T_uint64: 1109 val_u, valOk := constExpr.C.Value.(*plan.Const_U64Val) 1110 if !valOk { 1111 return false 1112 } 1113 constVal := val_u.U64Val 1114 switch columnT.Oid { 1115 case types.T_int8: 1116 return constVal <= math.MaxInt8 1117 case types.T_int16: 1118 return constVal <= math.MaxInt16 1119 case types.T_int32: 1120 return constVal <= math.MaxInt32 1121 case types.T_int64: 1122 return constVal <= math.MaxInt64 1123 case types.T_uint8: 1124 return constVal <= math.MaxUint8 1125 case types.T_uint16: 1126 return constVal <= math.MaxUint16 1127 case types.T_uint32: 1128 return constVal <= math.MaxUint32 1129 case types.T_uint64: 1130 return true 1131 case types.T_float32: 1132 //float32 has 6-7 significant digits. 1133 return constVal <= 100000 1134 default: 1135 return false 1136 } 1137 default: 1138 return false 1139 } 1140 1141 } 1142 1143 func InitInfileParam(param *tree.ExternParam) error { 1144 for i := 0; i < len(param.Option); i += 2 { 1145 switch strings.ToLower(param.Option[i]) { 1146 case "filepath": 1147 param.Filepath = param.Option[i+1] 1148 case "compression": 1149 param.CompressType = param.Option[i+1] 1150 case "format": 1151 format := strings.ToLower(param.Option[i+1]) 1152 if format != tree.CSV && format != tree.JSONLINE { 1153 return moerr.NewBadConfig(param.Ctx, "the format '%s' is not supported", format) 1154 } 1155 param.Format = format 1156 case "jsondata": 1157 jsondata := strings.ToLower(param.Option[i+1]) 1158 if jsondata != tree.OBJECT && jsondata != tree.ARRAY { 1159 return moerr.NewBadConfig(param.Ctx, "the jsondata '%s' is not supported", jsondata) 1160 } 1161 param.JsonData = jsondata 1162 param.Format = tree.JSONLINE 1163 default: 1164 return moerr.NewBadConfig(param.Ctx, "the keyword '%s' is not support", strings.ToLower(param.Option[i])) 1165 } 1166 } 1167 if len(param.Filepath) == 0 { 1168 return moerr.NewBadConfig(param.Ctx, "the filepath must be specified") 1169 } 1170 if param.Format == tree.JSONLINE && len(param.JsonData) == 0 { 1171 return moerr.NewBadConfig(param.Ctx, "the jsondata must be specified") 1172 } 1173 if len(param.Format) == 0 { 1174 param.Format = tree.CSV 1175 } 1176 return nil 1177 } 1178 1179 func InitS3Param(param *tree.ExternParam) error { 1180 param.S3Param = &tree.S3Parameter{} 1181 for i := 0; i < len(param.Option); i += 2 { 1182 switch strings.ToLower(param.Option[i]) { 1183 case "endpoint": 1184 param.S3Param.Endpoint = param.Option[i+1] 1185 case "region": 1186 param.S3Param.Region = param.Option[i+1] 1187 case "access_key_id": 1188 param.S3Param.APIKey = param.Option[i+1] 1189 case "secret_access_key": 1190 param.S3Param.APISecret = param.Option[i+1] 1191 case "bucket": 1192 param.S3Param.Bucket = param.Option[i+1] 1193 case "filepath": 1194 param.Filepath = param.Option[i+1] 1195 case "compression": 1196 param.CompressType = param.Option[i+1] 1197 case "provider": 1198 param.S3Param.Provider = param.Option[i+1] 1199 case "role_arn": 1200 param.S3Param.RoleArn = param.Option[i+1] 1201 case "external_id": 1202 param.S3Param.ExternalId = param.Option[i+1] 1203 case "format": 1204 format := strings.ToLower(param.Option[i+1]) 1205 if format != tree.CSV && format != tree.JSONLINE { 1206 return moerr.NewBadConfig(param.Ctx, "the format '%s' is not supported", format) 1207 } 1208 param.Format = format 1209 case "jsondata": 1210 jsondata := strings.ToLower(param.Option[i+1]) 1211 if jsondata != tree.OBJECT && jsondata != tree.ARRAY { 1212 return moerr.NewBadConfig(param.Ctx, "the jsondata '%s' is not supported", jsondata) 1213 } 1214 param.JsonData = jsondata 1215 param.Format = tree.JSONLINE 1216 1217 default: 1218 return moerr.NewBadConfig(param.Ctx, "the keyword '%s' is not support", strings.ToLower(param.Option[i])) 1219 } 1220 } 1221 if param.Format == tree.JSONLINE && len(param.JsonData) == 0 { 1222 return moerr.NewBadConfig(param.Ctx, "the jsondata must be specified") 1223 } 1224 if len(param.Format) == 0 { 1225 param.Format = tree.CSV 1226 } 1227 return nil 1228 } 1229 1230 func GetForETLWithType(param *tree.ExternParam, prefix string) (res fileservice.ETLFileService, readPath string, err error) { 1231 if param.ScanType == tree.S3 { 1232 buf := new(strings.Builder) 1233 w := csv.NewWriter(buf) 1234 opts := []string{"s3-opts", "endpoint=" + param.S3Param.Endpoint, "region=" + param.S3Param.Region, "key=" + param.S3Param.APIKey, "secret=" + param.S3Param.APISecret, 1235 "bucket=" + param.S3Param.Bucket, "role-arn=" + param.S3Param.RoleArn, "external-id=" + param.S3Param.ExternalId} 1236 if param.S3Param.Provider == "minio" { 1237 opts = append(opts, "is-minio=true") 1238 } 1239 if err = w.Write(opts); err != nil { 1240 return nil, "", err 1241 } 1242 w.Flush() 1243 return fileservice.GetForETL(nil, fileservice.JoinPath(buf.String(), prefix)) 1244 } 1245 return fileservice.GetForETL(param.FileService, prefix) 1246 } 1247 1248 // ReadDir support "etl:" and "/..." absolute path, NOT support relative path. 1249 func ReadDir(param *tree.ExternParam) (fileList []string, fileSize []int64, err error) { 1250 filePath := strings.TrimSpace(param.Filepath) 1251 if strings.HasPrefix(filePath, "etl:") { 1252 filePath = path.Clean(filePath) 1253 } else { 1254 filePath = path.Clean("/" + filePath) 1255 } 1256 1257 sep := "/" 1258 pathDir := strings.Split(filePath, sep) 1259 l := list.New() 1260 l2 := list.New() 1261 if pathDir[0] == "" { 1262 l.PushBack(sep) 1263 } else { 1264 l.PushBack(pathDir[0]) 1265 } 1266 1267 for i := 1; i < len(pathDir); i++ { 1268 length := l.Len() 1269 for j := 0; j < length; j++ { 1270 prefix := l.Front().Value.(string) 1271 fs, readPath, err := GetForETLWithType(param, prefix) 1272 if err != nil { 1273 return nil, nil, err 1274 } 1275 entries, err := fs.List(param.Ctx, readPath) 1276 if err != nil { 1277 return nil, nil, err 1278 } 1279 for _, entry := range entries { 1280 if !entry.IsDir && i+1 != len(pathDir) { 1281 continue 1282 } 1283 if entry.IsDir && i+1 == len(pathDir) { 1284 continue 1285 } 1286 matched, err := path.Match(pathDir[i], entry.Name) 1287 if err != nil { 1288 return nil, nil, err 1289 } 1290 if !matched { 1291 continue 1292 } 1293 l.PushBack(path.Join(l.Front().Value.(string), entry.Name)) 1294 if !entry.IsDir { 1295 l2.PushBack(entry.Size) 1296 } 1297 } 1298 l.Remove(l.Front()) 1299 } 1300 } 1301 len := l.Len() 1302 for j := 0; j < len; j++ { 1303 fileList = append(fileList, l.Front().Value.(string)) 1304 l.Remove(l.Front()) 1305 fileSize = append(fileSize, l2.Front().Value.(int64)) 1306 l2.Remove(l2.Front()) 1307 } 1308 return fileList, fileSize, err 1309 }