github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/plan/utils.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import (
    18  	"container/list"
    19  	"context"
    20  	"encoding/csv"
    21  	"math"
    22  	"path"
    23  	"strings"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    26  
    27  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    28  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    29  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    30  	"github.com/matrixorigin/matrixone/pkg/container/types"
    31  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    32  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    33  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    34  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    35  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/dialect"
    36  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    37  	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
    38  	"github.com/matrixorigin/matrixone/pkg/sql/plan/rule"
    39  )
    40  
    41  func GetBindings(expr *plan.Expr) []int32 {
    42  	bindingSet := doGetBindings(expr)
    43  	bindings := make([]int32, 0, len(bindingSet))
    44  	for id := range bindingSet {
    45  		bindings = append(bindings, id)
    46  	}
    47  	return bindings
    48  }
    49  
    50  func doGetBindings(expr *plan.Expr) map[int32]any {
    51  	res := make(map[int32]any)
    52  
    53  	switch expr := expr.Expr.(type) {
    54  	case *plan.Expr_Col:
    55  		res[expr.Col.RelPos] = nil
    56  
    57  	case *plan.Expr_F:
    58  		for _, child := range expr.F.Args {
    59  			for id := range doGetBindings(child) {
    60  				res[id] = nil
    61  			}
    62  		}
    63  	}
    64  
    65  	return res
    66  }
    67  
    68  func hasCorrCol(expr *plan.Expr) bool {
    69  	switch exprImpl := expr.Expr.(type) {
    70  	case *plan.Expr_Corr:
    71  		return true
    72  
    73  	case *plan.Expr_F:
    74  		ret := false
    75  		for _, arg := range exprImpl.F.Args {
    76  			ret = ret || hasCorrCol(arg)
    77  		}
    78  		return ret
    79  
    80  	default:
    81  		return false
    82  	}
    83  }
    84  
    85  func decreaseDepthAndDispatch(preds []*plan.Expr) ([]*plan.Expr, []*plan.Expr) {
    86  	filterPreds := make([]*plan.Expr, 0, len(preds))
    87  	joinPreds := make([]*plan.Expr, 0, len(preds))
    88  
    89  	for _, pred := range preds {
    90  		newPred, correlated := decreaseDepth(pred)
    91  		if !correlated {
    92  			joinPreds = append(joinPreds, newPred)
    93  			continue
    94  		}
    95  		filterPreds = append(filterPreds, newPred)
    96  	}
    97  
    98  	return filterPreds, joinPreds
    99  }
   100  
   101  func decreaseDepth(expr *plan.Expr) (*plan.Expr, bool) {
   102  	var correlated bool
   103  
   104  	switch exprImpl := expr.Expr.(type) {
   105  	case *plan.Expr_Corr:
   106  		if exprImpl.Corr.Depth > 1 {
   107  			exprImpl.Corr.Depth--
   108  			correlated = true
   109  		} else {
   110  			expr.Expr = &plan.Expr_Col{
   111  				Col: &plan.ColRef{
   112  					RelPos: exprImpl.Corr.RelPos,
   113  					ColPos: exprImpl.Corr.ColPos,
   114  				},
   115  			}
   116  		}
   117  
   118  	case *plan.Expr_F:
   119  		var tmp bool
   120  		for i, arg := range exprImpl.F.Args {
   121  			exprImpl.F.Args[i], tmp = decreaseDepth(arg)
   122  			correlated = correlated || tmp
   123  		}
   124  	}
   125  
   126  	return expr, correlated
   127  }
   128  
   129  func getJoinSide(expr *plan.Expr, leftTags, rightTags map[int32]*Binding, markTag int32) (side int8) {
   130  	switch exprImpl := expr.Expr.(type) {
   131  	case *plan.Expr_F:
   132  		for _, arg := range exprImpl.F.Args {
   133  			side |= getJoinSide(arg, leftTags, rightTags, markTag)
   134  		}
   135  
   136  	case *plan.Expr_Col:
   137  		if _, ok := leftTags[exprImpl.Col.RelPos]; ok {
   138  			side = JoinSideLeft
   139  		} else if _, ok := rightTags[exprImpl.Col.RelPos]; ok {
   140  			side = JoinSideRight
   141  		} else if exprImpl.Col.RelPos == markTag {
   142  			side = JoinSideMark
   143  		}
   144  
   145  	case *plan.Expr_Corr:
   146  		side = JoinSideCorrelated
   147  	}
   148  
   149  	return
   150  }
   151  
   152  func containsTag(expr *plan.Expr, tag int32) bool {
   153  	var ret bool
   154  
   155  	switch exprImpl := expr.Expr.(type) {
   156  	case *plan.Expr_F:
   157  		for _, arg := range exprImpl.F.Args {
   158  			ret = ret || containsTag(arg, tag)
   159  		}
   160  
   161  	case *plan.Expr_Col:
   162  		return exprImpl.Col.RelPos == tag
   163  	}
   164  
   165  	return ret
   166  }
   167  
   168  func replaceColRefs(expr *plan.Expr, tag int32, projects []*plan.Expr) *plan.Expr {
   169  	switch exprImpl := expr.Expr.(type) {
   170  	case *plan.Expr_F:
   171  		for i, arg := range exprImpl.F.Args {
   172  			exprImpl.F.Args[i] = replaceColRefs(arg, tag, projects)
   173  		}
   174  
   175  	case *plan.Expr_Col:
   176  		colRef := exprImpl.Col
   177  		if colRef.RelPos == tag {
   178  			expr = DeepCopyExpr(projects[colRef.ColPos])
   179  		}
   180  	}
   181  
   182  	return expr
   183  }
   184  
   185  func replaceColRefsForSet(expr *plan.Expr, projects []*plan.Expr) *plan.Expr {
   186  	switch exprImpl := expr.Expr.(type) {
   187  	case *plan.Expr_F:
   188  		for i, arg := range exprImpl.F.Args {
   189  			exprImpl.F.Args[i] = replaceColRefsForSet(arg, projects)
   190  		}
   191  
   192  	case *plan.Expr_Col:
   193  		expr = DeepCopyExpr(projects[exprImpl.Col.ColPos])
   194  	}
   195  
   196  	return expr
   197  }
   198  
   199  func splitAndBindCondition(astExpr tree.Expr, ctx *BindContext) ([]*plan.Expr, error) {
   200  	conds := splitAstConjunction(astExpr)
   201  	exprs := make([]*plan.Expr, len(conds))
   202  
   203  	for i, cond := range conds {
   204  		cond, err := ctx.qualifyColumnNames(cond, nil, false)
   205  		if err != nil {
   206  			return nil, err
   207  		}
   208  
   209  		expr, err := ctx.binder.BindExpr(cond, 0, true)
   210  		if err != nil {
   211  			return nil, err
   212  		}
   213  		// expr must be bool type, if not, try to do type convert
   214  		// but just ignore the subQuery. It will be solved at optimizer.
   215  		if expr.GetSub() == nil {
   216  			expr, err = makePlan2CastExpr(ctx.binder.GetContext(), expr, &plan.Type{Id: int32(types.T_bool)})
   217  			if err != nil {
   218  				return nil, err
   219  			}
   220  		}
   221  		exprs[i] = expr
   222  	}
   223  
   224  	return exprs, nil
   225  }
   226  
   227  // splitAstConjunction split a expression to a list of AND conditions.
   228  func splitAstConjunction(astExpr tree.Expr) []tree.Expr {
   229  	var astExprs []tree.Expr
   230  	switch typ := astExpr.(type) {
   231  	case nil:
   232  	case *tree.AndExpr:
   233  		astExprs = append(astExprs, splitAstConjunction(typ.Left)...)
   234  		astExprs = append(astExprs, splitAstConjunction(typ.Right)...)
   235  	case *tree.ParenExpr:
   236  		astExprs = append(astExprs, splitAstConjunction(typ.Expr)...)
   237  	default:
   238  		astExprs = append(astExprs, astExpr)
   239  	}
   240  	return astExprs
   241  }
   242  
   243  // applyDistributivity (X AND B) OR (X AND C) OR (X AND D) => X AND (B OR C OR D)
   244  // TODO: move it into optimizer
   245  func applyDistributivity(ctx context.Context, expr *plan.Expr) *plan.Expr {
   246  	switch exprImpl := expr.Expr.(type) {
   247  	case *plan.Expr_F:
   248  		for i, arg := range exprImpl.F.Args {
   249  			exprImpl.F.Args[i] = applyDistributivity(ctx, arg)
   250  		}
   251  
   252  		if exprImpl.F.Func.ObjName != "or" {
   253  			break
   254  		}
   255  
   256  		leftConds := splitPlanConjunction(exprImpl.F.Args[0])
   257  		rightConds := splitPlanConjunction(exprImpl.F.Args[1])
   258  
   259  		condMap := make(map[string]int)
   260  
   261  		for _, cond := range rightConds {
   262  			condMap[cond.String()] = JoinSideRight
   263  		}
   264  
   265  		var commonConds, leftOnlyConds, rightOnlyConds []*plan.Expr
   266  
   267  		for _, cond := range leftConds {
   268  			exprStr := cond.String()
   269  
   270  			if condMap[exprStr] == JoinSideRight {
   271  				commonConds = append(commonConds, cond)
   272  				condMap[exprStr] = JoinSideBoth
   273  			} else {
   274  				leftOnlyConds = append(leftOnlyConds, cond)
   275  				condMap[exprStr] = JoinSideLeft
   276  			}
   277  		}
   278  
   279  		for _, cond := range rightConds {
   280  			if condMap[cond.String()] == JoinSideRight {
   281  				rightOnlyConds = append(rightOnlyConds, cond)
   282  			}
   283  		}
   284  
   285  		if len(commonConds) == 0 {
   286  			return expr
   287  		}
   288  
   289  		expr, _ = combinePlanConjunction(ctx, commonConds)
   290  
   291  		if len(leftOnlyConds) == 0 || len(rightOnlyConds) == 0 {
   292  			return expr
   293  		}
   294  
   295  		leftExpr, _ := combinePlanConjunction(ctx, leftOnlyConds)
   296  		rightExpr, _ := combinePlanConjunction(ctx, rightOnlyConds)
   297  
   298  		leftExpr, _ = bindFuncExprImplByPlanExpr(ctx, "or", []*plan.Expr{leftExpr, rightExpr})
   299  
   300  		expr, _ = bindFuncExprImplByPlanExpr(ctx, "and", []*plan.Expr{expr, leftExpr})
   301  	}
   302  
   303  	return expr
   304  }
   305  
   306  func unionSlice(left, right []string) []string {
   307  	if len(left) < 1 {
   308  		return right
   309  	}
   310  	if len(right) < 1 {
   311  		return left
   312  	}
   313  	m := make(map[string]bool, len(left)+len(right))
   314  	for _, s := range left {
   315  		m[s] = true
   316  	}
   317  	for _, s := range right {
   318  		m[s] = true
   319  	}
   320  	ret := make([]string, 0)
   321  	for s := range m {
   322  		ret = append(ret, s)
   323  	}
   324  	return ret
   325  }
   326  
   327  func intersectSlice(left, right []string) []string {
   328  	if len(left) < 1 || len(right) < 1 {
   329  		return left
   330  	}
   331  	m := make(map[string]bool, len(left)+len(right))
   332  	for _, s := range left {
   333  		m[s] = true
   334  	}
   335  	ret := make([]string, 0)
   336  	for _, s := range right {
   337  		if _, ok := m[s]; ok {
   338  			ret = append(ret, s)
   339  		}
   340  	}
   341  	return ret
   342  }
   343  
   344  /*
   345  DNF means disjunctive normal form, for example (a and b) or (c and d) or (e and f)
   346  if we have a DNF filter, for example (c1=1 and c2=1) or (c1=2 and c2=2)
   347  we can have extra filter: (c1=1 or c1=2) and (c2=1 or c2=2), which can be pushed down to optimize join
   348  
   349  checkDNF scan the expr and return all groups of cond
   350  for example (c1=1 and c2=1) or (c1=2 and c3=2), c1 is a group because it appears in all disjunctives
   351  and c2,c3 is not a group
   352  
   353  walkThroughDNF accept a keyword string, walk through the expr,
   354  and extract all the conds which contains the keyword
   355  */
   356  func checkDNF(expr *plan.Expr) []string {
   357  	var ret []string
   358  	switch exprImpl := expr.Expr.(type) {
   359  	case *plan.Expr_F:
   360  		if exprImpl.F.Func.ObjName == "or" {
   361  			left := checkDNF(exprImpl.F.Args[0])
   362  			right := checkDNF(exprImpl.F.Args[1])
   363  			return intersectSlice(left, right)
   364  		}
   365  		for _, arg := range exprImpl.F.Args {
   366  			ret = unionSlice(ret, checkDNF(arg))
   367  		}
   368  		return ret
   369  
   370  	case *plan.Expr_Corr:
   371  		ret = append(ret, exprImpl.Corr.String())
   372  	case *plan.Expr_Col:
   373  		ret = append(ret, exprImpl.Col.String())
   374  	}
   375  	return ret
   376  }
   377  
   378  func walkThroughDNF(ctx context.Context, expr *plan.Expr, keywords string) *plan.Expr {
   379  	var retExpr *plan.Expr
   380  	switch exprImpl := expr.Expr.(type) {
   381  	case *plan.Expr_F:
   382  		if exprImpl.F.Func.ObjName == "or" {
   383  			left := walkThroughDNF(ctx, exprImpl.F.Args[0], keywords)
   384  			right := walkThroughDNF(ctx, exprImpl.F.Args[1], keywords)
   385  			if left != nil && right != nil {
   386  				retExpr, _ = bindFuncExprImplByPlanExpr(ctx, "or", []*plan.Expr{left, right})
   387  				return retExpr
   388  			}
   389  		} else if exprImpl.F.Func.ObjName == "and" {
   390  			left := walkThroughDNF(ctx, exprImpl.F.Args[0], keywords)
   391  			right := walkThroughDNF(ctx, exprImpl.F.Args[1], keywords)
   392  			if left == nil {
   393  				return right
   394  			} else if right == nil {
   395  				return left
   396  			} else {
   397  				retExpr, _ = bindFuncExprImplByPlanExpr(ctx, "and", []*plan.Expr{left, right})
   398  				return retExpr
   399  			}
   400  		} else {
   401  			for _, arg := range exprImpl.F.Args {
   402  				if walkThroughDNF(ctx, arg, keywords) == nil {
   403  					return nil
   404  				}
   405  			}
   406  			return expr
   407  		}
   408  
   409  	case *plan.Expr_Corr:
   410  		if exprImpl.Corr.String() == keywords {
   411  			return expr
   412  		} else {
   413  			return nil
   414  		}
   415  	case *plan.Expr_Col:
   416  		if exprImpl.Col.String() == keywords {
   417  			return expr
   418  		} else {
   419  			return nil
   420  		}
   421  	}
   422  	return expr
   423  }
   424  
   425  // deduction of new predicates. for example join on a=b where b=1, then a=1 can be deduced
   426  func predsDeduction(filters, onList []*plan.Expr) []*plan.Expr {
   427  	var newFilters []*plan.Expr
   428  	for _, onPred := range onList {
   429  		ret, col1, col2 := checkOnPred(onPred)
   430  		if !ret {
   431  			continue
   432  		}
   433  		for _, filter := range filters {
   434  			ret, col := CheckFilter(filter)
   435  			if ret && col != nil {
   436  				newExpr := DeepCopyExpr(filter)
   437  				if substituteMatchColumn(newExpr, col1, col2) {
   438  					newFilters = append(newFilters, newExpr)
   439  				}
   440  			}
   441  		}
   442  	}
   443  	return newFilters
   444  }
   445  
   446  // for predicate deduction, filter must be like func(col)>1 , or (col=1) or (col=2)
   447  // and only 1 colRef is allowd in the filter
   448  func CheckFilter(expr *plan.Expr) (bool, *ColRef) {
   449  	switch exprImpl := expr.Expr.(type) {
   450  	case *plan.Expr_F:
   451  		switch exprImpl.F.Func.ObjName {
   452  		case "=", ">", "<", ">=", "<=":
   453  			switch exprImpl.F.Args[1].Expr.(type) {
   454  			case *plan.Expr_C:
   455  				return CheckFilter(exprImpl.F.Args[0])
   456  			default:
   457  				return false, nil
   458  			}
   459  		default:
   460  			var col *ColRef
   461  			for _, arg := range exprImpl.F.Args {
   462  				ret, c := CheckFilter(arg)
   463  				if !ret {
   464  					return false, nil
   465  				} else if c != nil {
   466  					if col != nil {
   467  						if col.RelPos != c.RelPos || col.ColPos != c.ColPos {
   468  							return false, nil
   469  						}
   470  					} else {
   471  						col = c
   472  					}
   473  				}
   474  			}
   475  			return true, col
   476  		}
   477  	case *plan.Expr_Col:
   478  		return true, exprImpl.Col
   479  	}
   480  	return false, nil
   481  }
   482  
   483  func substituteMatchColumn(expr *plan.Expr, onPredCol1, onPredCol2 *ColRef) bool {
   484  	var ret bool
   485  	switch exprImpl := expr.Expr.(type) {
   486  	case *plan.Expr_Col:
   487  		colName := exprImpl.Col.String()
   488  		if colName == onPredCol1.String() {
   489  			exprImpl.Col.RelPos = onPredCol2.RelPos
   490  			exprImpl.Col.ColPos = onPredCol2.ColPos
   491  			exprImpl.Col.Name = onPredCol2.Name
   492  			return true
   493  		} else if colName == onPredCol2.String() {
   494  			exprImpl.Col.RelPos = onPredCol1.RelPos
   495  			exprImpl.Col.ColPos = onPredCol1.ColPos
   496  			exprImpl.Col.Name = onPredCol1.Name
   497  			return true
   498  		}
   499  	case *plan.Expr_F:
   500  		for _, arg := range exprImpl.F.Args {
   501  			if substituteMatchColumn(arg, onPredCol1, onPredCol2) {
   502  				ret = true
   503  			}
   504  		}
   505  	}
   506  	return ret
   507  }
   508  
   509  func checkOnPred(onPred *plan.Expr) (bool, *ColRef, *ColRef) {
   510  	//onPred must be equality, children must be column name
   511  	switch onPredImpl := onPred.Expr.(type) {
   512  	case *plan.Expr_F:
   513  		if onPredImpl.F.Func.ObjName != "=" {
   514  			return false, nil, nil
   515  		}
   516  		args := onPredImpl.F.Args
   517  		var col1, col2 *ColRef
   518  		switch child1 := args[0].Expr.(type) {
   519  		case *plan.Expr_Col:
   520  			col1 = child1.Col
   521  		}
   522  		switch child2 := args[1].Expr.(type) {
   523  		case *plan.Expr_Col:
   524  			col2 = child2.Col
   525  		}
   526  		if col1 != nil && col2 != nil {
   527  			return true, col1, col2
   528  		}
   529  	}
   530  	return false, nil, nil
   531  }
   532  
   533  func splitPlanConjunction(expr *plan.Expr) []*plan.Expr {
   534  	var exprs []*plan.Expr
   535  	switch exprImpl := expr.Expr.(type) {
   536  	case *plan.Expr_F:
   537  		if exprImpl.F.Func.ObjName == "and" {
   538  			exprs = append(exprs, splitPlanConjunction(exprImpl.F.Args[0])...)
   539  			exprs = append(exprs, splitPlanConjunction(exprImpl.F.Args[1])...)
   540  		} else {
   541  			exprs = append(exprs, expr)
   542  		}
   543  
   544  	default:
   545  		exprs = append(exprs, expr)
   546  	}
   547  
   548  	return exprs
   549  }
   550  
   551  func combinePlanConjunction(ctx context.Context, exprs []*plan.Expr) (expr *plan.Expr, err error) {
   552  	expr = exprs[0]
   553  
   554  	for i := 1; i < len(exprs); i++ {
   555  		expr, err = bindFuncExprImplByPlanExpr(ctx, "and", []*plan.Expr{expr, exprs[i]})
   556  
   557  		if err != nil {
   558  			break
   559  		}
   560  	}
   561  
   562  	return
   563  }
   564  
   565  func rejectsNull(filter *plan.Expr, proc *process.Process) bool {
   566  	filter = replaceColRefWithNull(DeepCopyExpr(filter))
   567  
   568  	bat := batch.NewWithSize(0)
   569  	bat.Zs = []int64{1}
   570  	filter, err := ConstantFold(bat, filter, proc)
   571  	if err != nil {
   572  		return false
   573  	}
   574  
   575  	if f, ok := filter.Expr.(*plan.Expr_C); ok {
   576  		if f.C.Isnull {
   577  			return true
   578  		}
   579  
   580  		if fbool, ok := f.C.Value.(*plan.Const_Bval); ok {
   581  			return !fbool.Bval
   582  		}
   583  	}
   584  
   585  	return false
   586  }
   587  
   588  func replaceColRefWithNull(expr *plan.Expr) *plan.Expr {
   589  	switch exprImpl := expr.Expr.(type) {
   590  	case *plan.Expr_Col:
   591  		expr = &plan.Expr{
   592  			Typ: expr.Typ,
   593  			Expr: &plan.Expr_C{
   594  				C: &plan.Const{
   595  					Isnull: true,
   596  				},
   597  			},
   598  		}
   599  
   600  	case *plan.Expr_F:
   601  		for i, arg := range exprImpl.F.Args {
   602  			exprImpl.F.Args[i] = replaceColRefWithNull(arg)
   603  		}
   604  	}
   605  
   606  	return expr
   607  }
   608  
   609  func increaseRefCnt(expr *plan.Expr, colRefCnt map[[2]int32]int) {
   610  	switch exprImpl := expr.Expr.(type) {
   611  	case *plan.Expr_Col:
   612  		colRefCnt[[2]int32{exprImpl.Col.RelPos, exprImpl.Col.ColPos}]++
   613  
   614  	case *plan.Expr_F:
   615  		for _, arg := range exprImpl.F.Args {
   616  			increaseRefCnt(arg, colRefCnt)
   617  		}
   618  	}
   619  }
   620  
   621  func decreaseRefCnt(expr *plan.Expr, colRefCnt map[[2]int32]int) {
   622  	switch exprImpl := expr.Expr.(type) {
   623  	case *plan.Expr_Col:
   624  		colRefCnt[[2]int32{exprImpl.Col.RelPos, exprImpl.Col.ColPos}]--
   625  
   626  	case *plan.Expr_F:
   627  		for _, arg := range exprImpl.F.Args {
   628  			decreaseRefCnt(arg, colRefCnt)
   629  		}
   630  	}
   631  }
   632  
   633  func getHyperEdgeFromExpr(expr *plan.Expr, leafByTag map[int32]int32, hyperEdge map[int32]any) {
   634  	switch exprImpl := expr.Expr.(type) {
   635  	case *plan.Expr_Col:
   636  		hyperEdge[leafByTag[exprImpl.Col.RelPos]] = nil
   637  
   638  	case *plan.Expr_F:
   639  		for _, arg := range exprImpl.F.Args {
   640  			getHyperEdgeFromExpr(arg, leafByTag, hyperEdge)
   641  		}
   642  	}
   643  }
   644  
   645  func getNumOfCharacters(str string) int {
   646  	strRune := []rune(str)
   647  	return len(strRune)
   648  }
   649  
   650  func getUnionSelects(ctx context.Context, stmt *tree.UnionClause, selects *[]tree.Statement, unionTypes *[]plan.Node_NodeType) error {
   651  	switch leftStmt := stmt.Left.(type) {
   652  	case *tree.UnionClause:
   653  		err := getUnionSelects(ctx, leftStmt, selects, unionTypes)
   654  		if err != nil {
   655  			return err
   656  		}
   657  	case *tree.SelectClause:
   658  		*selects = append(*selects, leftStmt)
   659  	case *tree.ParenSelect:
   660  		*selects = append(*selects, leftStmt.Select)
   661  	default:
   662  		return moerr.NewParseError(ctx, "unexpected statement in union: '%v'", tree.String(leftStmt, dialect.MYSQL))
   663  	}
   664  
   665  	// right is not UNION allways
   666  	switch rightStmt := stmt.Right.(type) {
   667  	case *tree.SelectClause:
   668  		if stmt.Type == tree.UNION && !stmt.All {
   669  			rightStr := tree.String(rightStmt, dialect.MYSQL)
   670  			if len(*selects) == 1 && tree.String((*selects)[0], dialect.MYSQL) == rightStr {
   671  				return nil
   672  			}
   673  		}
   674  
   675  		*selects = append(*selects, rightStmt)
   676  	case *tree.ParenSelect:
   677  		if stmt.Type == tree.UNION && !stmt.All {
   678  			rightStr := tree.String(rightStmt.Select, dialect.MYSQL)
   679  			if len(*selects) == 1 && tree.String((*selects)[0], dialect.MYSQL) == rightStr {
   680  				return nil
   681  			}
   682  		}
   683  
   684  		*selects = append(*selects, rightStmt.Select)
   685  	default:
   686  		return moerr.NewParseError(ctx, "unexpected statement in union2: '%v'", tree.String(rightStmt, dialect.MYSQL))
   687  	}
   688  
   689  	switch stmt.Type {
   690  	case tree.UNION:
   691  		if stmt.All {
   692  			*unionTypes = append(*unionTypes, plan.Node_UNION_ALL)
   693  		} else {
   694  			*unionTypes = append(*unionTypes, plan.Node_UNION)
   695  		}
   696  	case tree.INTERSECT:
   697  		if stmt.All {
   698  			*unionTypes = append(*unionTypes, plan.Node_INTERSECT_ALL)
   699  		} else {
   700  			*unionTypes = append(*unionTypes, plan.Node_INTERSECT)
   701  		}
   702  	case tree.EXCEPT, tree.UT_MINUS:
   703  		if stmt.All {
   704  			return moerr.NewNYI(ctx, "EXCEPT/MINUS ALL clause")
   705  		} else {
   706  			*unionTypes = append(*unionTypes, plan.Node_MINUS)
   707  		}
   708  	}
   709  	return nil
   710  }
   711  
   712  func containsParamRef(expr *plan.Expr) bool {
   713  	var ret bool
   714  	switch exprImpl := expr.Expr.(type) {
   715  	case *plan.Expr_F:
   716  		for _, arg := range exprImpl.F.Args {
   717  			ret = ret || containsParamRef(arg)
   718  		}
   719  	case *plan.Expr_P:
   720  		return true
   721  	}
   722  	return ret
   723  }
   724  
   725  func getColumnMapByExpr(expr *plan.Expr, tableDef *plan.TableDef, columnMap *map[int]int) {
   726  	if expr == nil {
   727  		return
   728  	}
   729  	switch exprImpl := expr.Expr.(type) {
   730  	case *plan.Expr_F:
   731  		for _, arg := range exprImpl.F.Args {
   732  			getColumnMapByExpr(arg, tableDef, columnMap)
   733  		}
   734  
   735  	case *plan.Expr_Col:
   736  		idx := exprImpl.Col.ColPos
   737  		colName := exprImpl.Col.Name
   738  		dotIdx := strings.Index(colName, ".")
   739  		colName = colName[dotIdx+1:]
   740  		colIdx := tableDef.Name2ColIndex[colName]
   741  		(*columnMap)[int(idx)] = int(colIdx)
   742  	}
   743  }
   744  
   745  func GetColumnsByExpr(expr *plan.Expr, tableDef *plan.TableDef) (map[int]int, []int, int) {
   746  	columnMap := make(map[int]int)
   747  	// key = expr's ColPos,  value = tableDef's ColPos
   748  	getColumnMapByExpr(expr, tableDef, &columnMap)
   749  
   750  	maxCol := 0
   751  	useColumn := len(columnMap)
   752  	columns := make([]int, useColumn)
   753  	i := 0
   754  	for k, v := range columnMap {
   755  		if k > maxCol {
   756  			maxCol = k
   757  		}
   758  		columns[i] = v //tableDef's ColPos
   759  		i = i + 1
   760  	}
   761  	return columnMap, columns, maxCol
   762  }
   763  
   764  func EvalFilterExpr(ctx context.Context, expr *plan.Expr, bat *batch.Batch, proc *process.Process) (bool, error) {
   765  	if len(bat.Vecs) == 0 { //that's constant expr
   766  		e, err := ConstantFold(bat, expr, proc)
   767  		if err != nil {
   768  			return false, err
   769  		}
   770  
   771  		if cExpr, ok := e.Expr.(*plan.Expr_C); ok {
   772  			if bVal, bOk := cExpr.C.Value.(*plan.Const_Bval); bOk {
   773  				return bVal.Bval, nil
   774  			}
   775  		}
   776  		return false, moerr.NewInternalError(ctx, "cannot eval filter expr")
   777  	} else {
   778  		vec, err := colexec.EvalExprByZonemapBat(ctx, bat, proc, expr)
   779  		if err != nil {
   780  			return false, err
   781  		}
   782  		if vec.Typ.Oid != types.T_bool {
   783  			return false, moerr.NewInternalError(ctx, "cannot eval filter expr")
   784  		}
   785  		cols := vector.MustTCols[bool](vec)
   786  		for _, isNeed := range cols {
   787  			if isNeed {
   788  				return true, nil
   789  			}
   790  		}
   791  		return false, nil
   792  	}
   793  }
   794  
   795  func exchangeVectors(datas [][2]any, depth int, tmpResult []any, result *[]*vector.Vector, mp *mpool.MPool) {
   796  	for i := 0; i < len(datas[depth]); i++ {
   797  		tmpResult[depth] = datas[depth][i]
   798  		if depth != len(datas)-1 {
   799  			exchangeVectors(datas, depth+1, tmpResult, result, mp)
   800  		} else {
   801  			for j, val := range tmpResult {
   802  				(*result)[j].Append(val, false, mp)
   803  			}
   804  		}
   805  	}
   806  }
   807  
   808  func BuildVectorsByData(datas [][2]any, dataTypes []uint8, mp *mpool.MPool) []*vector.Vector {
   809  	vectors := make([]*vector.Vector, len(dataTypes))
   810  	for i, typ := range dataTypes {
   811  		vectors[i] = vector.New(types.T(typ).ToType())
   812  	}
   813  
   814  	tmpResult := make([]any, len(datas))
   815  	exchangeVectors(datas, 0, tmpResult, &vectors, mp)
   816  
   817  	return vectors
   818  }
   819  
   820  func CheckExprIsMonotonic(ctx context.Context, expr *plan.Expr) bool {
   821  	if expr == nil {
   822  		return false
   823  	}
   824  	switch exprImpl := expr.Expr.(type) {
   825  	case *plan.Expr_F:
   826  		for _, arg := range exprImpl.F.Args {
   827  			isMonotonic := CheckExprIsMonotonic(ctx, arg)
   828  			if !isMonotonic {
   829  				return false
   830  			}
   831  		}
   832  
   833  		isMonotonic, _ := function.GetFunctionIsMonotonicById(ctx, exprImpl.F.Func.GetObj())
   834  		if !isMonotonic {
   835  			return false
   836  		}
   837  
   838  		return true
   839  	default:
   840  		return true
   841  	}
   842  }
   843  
   844  // handle the filter list for zonemap. rewrite and constFold
   845  func HandleFiltersForZM(exprList []*plan.Expr, proc *process.Process) *plan.Expr {
   846  	if proc == nil || proc.Ctx == nil {
   847  		return nil
   848  	}
   849  	var newExprList []*plan.Expr
   850  	bat := batch.NewWithSize(0)
   851  	bat.Zs = []int64{1}
   852  	for _, expr := range exprList {
   853  		tmpexpr, _ := ConstantFold(bat, DeepCopyExpr(expr), proc)
   854  		if tmpexpr != nil {
   855  			expr = tmpexpr
   856  		}
   857  		if !containsParamRef(expr) && CheckExprIsMonotonic(proc.Ctx, expr) {
   858  			newExprList = append(newExprList, expr)
   859  		}
   860  	}
   861  	e := colexec.RewriteFilterExprList(newExprList)
   862  	return e
   863  }
   864  
   865  func ConstantFold(bat *batch.Batch, e *plan.Expr, proc *process.Process) (*plan.Expr, error) {
   866  	var err error
   867  
   868  	ef, ok := e.Expr.(*plan.Expr_F)
   869  	if !ok {
   870  		return e, nil
   871  	}
   872  	overloadID := ef.F.Func.GetObj()
   873  	f, err := function.GetFunctionByID(proc.Ctx, overloadID)
   874  	if err != nil {
   875  		return nil, err
   876  	}
   877  	if f.Volatile { // function cannot be fold
   878  		return e, nil
   879  	}
   880  	for i := range ef.F.Args {
   881  		ef.F.Args[i], err = ConstantFold(bat, ef.F.Args[i], proc)
   882  		if err != nil {
   883  			return nil, err
   884  		}
   885  	}
   886  	if !rule.IsConstant(e) {
   887  		return e, nil
   888  	}
   889  	vec, err := colexec.EvalExpr(bat, proc, e)
   890  	if err != nil {
   891  		return nil, err
   892  	}
   893  	c := rule.GetConstantValue(vec, false)
   894  	vec.Free(proc.Mp())
   895  	if c == nil {
   896  		return e, nil
   897  	}
   898  	ec := &plan.Expr_C{
   899  		C: c,
   900  	}
   901  	e.Expr = ec
   902  	return e, nil
   903  }
   904  
   905  func rewriteTableFunction(tblFunc *tree.TableFunction, leftCtx *BindContext) error {
   906  	//var err error
   907  	//newTableAliasMap := make(map[string]string)
   908  	//newColAliasMap := make(map[string]string)
   909  	//col2Table := make(map[string]string)
   910  	//for i := range tblFunc.SelectStmt.Select.(*tree.SelectClause).From.Tables {
   911  	//	alias := string(tblFunc.SelectStmt.Select.(*tree.SelectClause).From.Tables[i].(*tree.AliasedTableExpr).As.Alias)
   912  	//	if len(alias) == 0 {
   913  	//		alias = string(tblFunc.SelectStmt.Select.(*tree.SelectClause).From.Tables[i].(*tree.AliasedTableExpr).Expr.(*tree.TableName).ObjectName)
   914  	//	}
   915  	//	newAlias := fmt.Sprintf("%s_tbl_%d", alias, i)
   916  	//	tblFunc.SelectStmt.Select.(*tree.SelectClause).From.Tables[i].(*tree.AliasedTableExpr).As.Alias = tree.Identifier(newAlias)
   917  	//	//newTableAliasMap[alias] = newAlias
   918  	//}
   919  	for i := range tblFunc.SelectStmt.Select.(*tree.SelectClause).Exprs {
   920  		selectExpr := tblFunc.SelectStmt.Select.(*tree.SelectClause).Exprs[i] //take care, this is not a pointer
   921  		expr := selectExpr.Expr.(*tree.UnresolvedName)
   922  		_, tableName, colName := expr.GetNames()
   923  		if len(tableName) == 0 {
   924  			if binding, ok := leftCtx.bindingByCol[colName]; ok {
   925  				tableName = binding.table
   926  				expr.Parts[1] = tableName
   927  			} else {
   928  				return moerr.NewInternalError(leftCtx.binder.GetContext(), "cannot find column '%s'", colName)
   929  			}
   930  		}
   931  		//newTableName = newTableAliasMap[tableName]
   932  		//newColAlias = fmt.Sprintf("%s_%d", colName, i)
   933  		//newColAliasMap[colName] = newColAlias
   934  		//col2Table[newColAlias] = newTableName
   935  		//newName, err := tree.NewUnresolvedName(newTableName, colName)
   936  		//if err != nil {
   937  		//	return err
   938  		//}
   939  		//tblFunc.SelectStmt.Select.(*tree.SelectClause).Exprs[i].Expr = newName
   940  		//tblFunc.SelectStmt.Select.(*tree.SelectClause).Exprs[i].As = tree.UnrestrictedIdentifier(newColAlias)
   941  	}
   942  
   943  	//for i, _ := range tblFunc.Func.Exprs {
   944  	//	tblFunc.Func.Exprs[i], err = rewriteTableFunctionExpr(tblFunc.Func.Exprs[i], newTableAliasMap, newColAliasMap, col2Table)
   945  	//	if err != nil {
   946  	//		return err
   947  	//	}
   948  	//}
   949  	return nil
   950  }
   951  
   952  //
   953  //func rewriteTableFunctionExpr(ast tree.Expr, tableAlias map[string]string, colAlias map[string]string, col2Table map[string]string) (tree.Expr, error) {
   954  //	var err error
   955  //	switch item := ast.(type) {
   956  //	case *tree.UnresolvedName:
   957  //		_, tblName, colName := item.GetNames()
   958  //		if len(tblName) > 0 {
   959  //			if alias, ok := tableAlias[tblName]; ok {
   960  //				item.Parts[1] = alias
   961  //			}
   962  //		} else {
   963  //			newColName := colAlias[colName]
   964  //			newTblName := col2Table[newColName]
   965  //			item.Parts[1] = newTblName
   966  //		}
   967  //	case *tree.FuncExpr:
   968  //		for i, _ := range item.Exprs {
   969  //			item.Exprs[i], err = rewriteTableFunctionExpr(item.Exprs[i], tableAlias, colAlias, col2Table)
   970  //			if err != nil {
   971  //				return nil, err
   972  //			}
   973  //		}
   974  //	case *tree.NumVal:
   975  //		break
   976  //	default:
   977  //		return nil, moerr.NewNotSupported("table function expr '%s' not supported", item)
   978  //	}
   979  //	return ast, nil
   980  //}
   981  
   982  // lookUpFnCols looks up the columns in the function expression
   983  func lookUpFnCols(ret tree.SelectExprs, fn interface{}) tree.SelectExprs {
   984  	switch fnExpr := fn.(type) { //TODO add more cases
   985  	case *tree.UnresolvedName:
   986  		ret = append(ret, tree.SelectExpr{Expr: fnExpr})
   987  	case *tree.FuncExpr:
   988  		for _, arg := range fnExpr.Exprs {
   989  			ret = lookUpFnCols(ret, arg)
   990  		}
   991  	case *tree.BinaryExpr:
   992  		ret = lookUpFnCols(ret, fnExpr.Left)
   993  		ret = lookUpFnCols(ret, fnExpr.Right)
   994  	case *tree.UnaryExpr:
   995  		ret = lookUpFnCols(ret, fnExpr.Expr)
   996  	}
   997  	return ret
   998  }
   999  func buildTableFunctionStmt(tbl *tree.TableFunction, left tree.TableExpr, leftCtx *BindContext) error {
  1000  	var selectExprs tree.SelectExprs
  1001  	selectExprs = lookUpFnCols(selectExprs, tbl.Func)
  1002  	tbl.SelectStmt = &tree.Select{
  1003  		Select: &tree.SelectClause{
  1004  			From: &tree.From{
  1005  				Tables: []tree.TableExpr{left},
  1006  			},
  1007  			Exprs: selectExprs,
  1008  		},
  1009  	}
  1010  	return rewriteTableFunction(tbl, leftCtx)
  1011  }
  1012  
  1013  func clearBinding(ctx *BindContext) {
  1014  	ctx.bindingByCol = make(map[string]*Binding)
  1015  	ctx.bindingByTable = make(map[string]*Binding)
  1016  	ctx.bindingByTag = make(map[int32]*Binding)
  1017  	ctx.bindingTree = &BindingTreeNode{}
  1018  	ctx.bindings = make([]*Binding, 0)
  1019  }
  1020  
  1021  func unwindTupleComparison(ctx context.Context, nonEqOp, op string, leftExprs, rightExprs []*plan.Expr, idx int) (*plan.Expr, error) {
  1022  	if idx == len(leftExprs)-1 {
  1023  		return bindFuncExprImplByPlanExpr(ctx, op, []*plan.Expr{
  1024  			leftExprs[idx],
  1025  			rightExprs[idx],
  1026  		})
  1027  	}
  1028  
  1029  	expr, err := bindFuncExprImplByPlanExpr(ctx, nonEqOp, []*plan.Expr{
  1030  		DeepCopyExpr(leftExprs[idx]),
  1031  		DeepCopyExpr(rightExprs[idx]),
  1032  	})
  1033  	if err != nil {
  1034  		return nil, err
  1035  	}
  1036  
  1037  	eqExpr, err := bindFuncExprImplByPlanExpr(ctx, "=", []*plan.Expr{
  1038  		leftExprs[idx],
  1039  		rightExprs[idx],
  1040  	})
  1041  	if err != nil {
  1042  		return nil, err
  1043  	}
  1044  
  1045  	tailExpr, err := unwindTupleComparison(ctx, nonEqOp, op, leftExprs, rightExprs, idx+1)
  1046  	if err != nil {
  1047  		return nil, err
  1048  	}
  1049  
  1050  	tailExpr, err = bindFuncExprImplByPlanExpr(ctx, "and", []*plan.Expr{eqExpr, tailExpr})
  1051  	if err != nil {
  1052  		return nil, err
  1053  	}
  1054  
  1055  	return bindFuncExprImplByPlanExpr(ctx, "or", []*plan.Expr{expr, tailExpr})
  1056  }
  1057  
  1058  // checkNoNeedCast
  1059  // if constant's type higher than column's type
  1060  // and constant's value in range of column's type, then no cast was needed
  1061  func checkNoNeedCast(constT, columnT types.Type, constExpr *plan.Expr_C) bool {
  1062  	switch constT.Oid {
  1063  	case types.T_char, types.T_varchar, types.T_text:
  1064  		switch columnT.Oid {
  1065  		case types.T_char, types.T_varchar:
  1066  			if constT.Width <= columnT.Width {
  1067  				return true
  1068  			} else {
  1069  				return false
  1070  			}
  1071  		case types.T_text:
  1072  			return true
  1073  		default:
  1074  			return false
  1075  		}
  1076  
  1077  	case types.T_int8, types.T_int16, types.T_int32, types.T_int64:
  1078  		val, valOk := constExpr.C.Value.(*plan.Const_I64Val)
  1079  		if !valOk {
  1080  			return false
  1081  		}
  1082  		constVal := val.I64Val
  1083  		switch columnT.Oid {
  1084  		case types.T_int8:
  1085  			return constVal <= int64(math.MaxInt8) && constVal >= int64(math.MinInt8)
  1086  		case types.T_int16:
  1087  			return constVal <= int64(math.MaxInt16) && constVal >= int64(math.MinInt16)
  1088  		case types.T_int32:
  1089  			return constVal <= int64(math.MaxInt32) && constVal >= int64(math.MinInt32)
  1090  		case types.T_int64:
  1091  			return true
  1092  		case types.T_uint8:
  1093  			return constVal <= math.MaxUint8 && constVal >= 0
  1094  		case types.T_uint16:
  1095  			return constVal <= math.MaxUint16 && constVal >= 0
  1096  		case types.T_uint32:
  1097  			return constVal <= math.MaxUint32 && constVal >= 0
  1098  		case types.T_uint64:
  1099  			return constVal >= 0
  1100  		case types.T_varchar:
  1101  			return true
  1102  		case types.T_float32:
  1103  			//float32 has 6-7 significant digits.
  1104  			return constVal <= 100000 && constVal >= -100000
  1105  		default:
  1106  			return false
  1107  		}
  1108  	case types.T_uint8, types.T_uint16, types.T_uint32, types.T_uint64:
  1109  		val_u, valOk := constExpr.C.Value.(*plan.Const_U64Val)
  1110  		if !valOk {
  1111  			return false
  1112  		}
  1113  		constVal := val_u.U64Val
  1114  		switch columnT.Oid {
  1115  		case types.T_int8:
  1116  			return constVal <= math.MaxInt8
  1117  		case types.T_int16:
  1118  			return constVal <= math.MaxInt16
  1119  		case types.T_int32:
  1120  			return constVal <= math.MaxInt32
  1121  		case types.T_int64:
  1122  			return constVal <= math.MaxInt64
  1123  		case types.T_uint8:
  1124  			return constVal <= math.MaxUint8
  1125  		case types.T_uint16:
  1126  			return constVal <= math.MaxUint16
  1127  		case types.T_uint32:
  1128  			return constVal <= math.MaxUint32
  1129  		case types.T_uint64:
  1130  			return true
  1131  		case types.T_float32:
  1132  			//float32 has 6-7 significant digits.
  1133  			return constVal <= 100000
  1134  		default:
  1135  			return false
  1136  		}
  1137  	default:
  1138  		return false
  1139  	}
  1140  
  1141  }
  1142  
  1143  func InitInfileParam(param *tree.ExternParam) error {
  1144  	for i := 0; i < len(param.Option); i += 2 {
  1145  		switch strings.ToLower(param.Option[i]) {
  1146  		case "filepath":
  1147  			param.Filepath = param.Option[i+1]
  1148  		case "compression":
  1149  			param.CompressType = param.Option[i+1]
  1150  		case "format":
  1151  			format := strings.ToLower(param.Option[i+1])
  1152  			if format != tree.CSV && format != tree.JSONLINE {
  1153  				return moerr.NewBadConfig(param.Ctx, "the format '%s' is not supported", format)
  1154  			}
  1155  			param.Format = format
  1156  		case "jsondata":
  1157  			jsondata := strings.ToLower(param.Option[i+1])
  1158  			if jsondata != tree.OBJECT && jsondata != tree.ARRAY {
  1159  				return moerr.NewBadConfig(param.Ctx, "the jsondata '%s' is not supported", jsondata)
  1160  			}
  1161  			param.JsonData = jsondata
  1162  			param.Format = tree.JSONLINE
  1163  		default:
  1164  			return moerr.NewBadConfig(param.Ctx, "the keyword '%s' is not support", strings.ToLower(param.Option[i]))
  1165  		}
  1166  	}
  1167  	if len(param.Filepath) == 0 {
  1168  		return moerr.NewBadConfig(param.Ctx, "the filepath must be specified")
  1169  	}
  1170  	if param.Format == tree.JSONLINE && len(param.JsonData) == 0 {
  1171  		return moerr.NewBadConfig(param.Ctx, "the jsondata must be specified")
  1172  	}
  1173  	if len(param.Format) == 0 {
  1174  		param.Format = tree.CSV
  1175  	}
  1176  	return nil
  1177  }
  1178  
  1179  func InitS3Param(param *tree.ExternParam) error {
  1180  	param.S3Param = &tree.S3Parameter{}
  1181  	for i := 0; i < len(param.Option); i += 2 {
  1182  		switch strings.ToLower(param.Option[i]) {
  1183  		case "endpoint":
  1184  			param.S3Param.Endpoint = param.Option[i+1]
  1185  		case "region":
  1186  			param.S3Param.Region = param.Option[i+1]
  1187  		case "access_key_id":
  1188  			param.S3Param.APIKey = param.Option[i+1]
  1189  		case "secret_access_key":
  1190  			param.S3Param.APISecret = param.Option[i+1]
  1191  		case "bucket":
  1192  			param.S3Param.Bucket = param.Option[i+1]
  1193  		case "filepath":
  1194  			param.Filepath = param.Option[i+1]
  1195  		case "compression":
  1196  			param.CompressType = param.Option[i+1]
  1197  		case "provider":
  1198  			param.S3Param.Provider = param.Option[i+1]
  1199  		case "role_arn":
  1200  			param.S3Param.RoleArn = param.Option[i+1]
  1201  		case "external_id":
  1202  			param.S3Param.ExternalId = param.Option[i+1]
  1203  		case "format":
  1204  			format := strings.ToLower(param.Option[i+1])
  1205  			if format != tree.CSV && format != tree.JSONLINE {
  1206  				return moerr.NewBadConfig(param.Ctx, "the format '%s' is not supported", format)
  1207  			}
  1208  			param.Format = format
  1209  		case "jsondata":
  1210  			jsondata := strings.ToLower(param.Option[i+1])
  1211  			if jsondata != tree.OBJECT && jsondata != tree.ARRAY {
  1212  				return moerr.NewBadConfig(param.Ctx, "the jsondata '%s' is not supported", jsondata)
  1213  			}
  1214  			param.JsonData = jsondata
  1215  			param.Format = tree.JSONLINE
  1216  
  1217  		default:
  1218  			return moerr.NewBadConfig(param.Ctx, "the keyword '%s' is not support", strings.ToLower(param.Option[i]))
  1219  		}
  1220  	}
  1221  	if param.Format == tree.JSONLINE && len(param.JsonData) == 0 {
  1222  		return moerr.NewBadConfig(param.Ctx, "the jsondata must be specified")
  1223  	}
  1224  	if len(param.Format) == 0 {
  1225  		param.Format = tree.CSV
  1226  	}
  1227  	return nil
  1228  }
  1229  
  1230  func GetForETLWithType(param *tree.ExternParam, prefix string) (res fileservice.ETLFileService, readPath string, err error) {
  1231  	if param.ScanType == tree.S3 {
  1232  		buf := new(strings.Builder)
  1233  		w := csv.NewWriter(buf)
  1234  		opts := []string{"s3-opts", "endpoint=" + param.S3Param.Endpoint, "region=" + param.S3Param.Region, "key=" + param.S3Param.APIKey, "secret=" + param.S3Param.APISecret,
  1235  			"bucket=" + param.S3Param.Bucket, "role-arn=" + param.S3Param.RoleArn, "external-id=" + param.S3Param.ExternalId}
  1236  		if param.S3Param.Provider == "minio" {
  1237  			opts = append(opts, "is-minio=true")
  1238  		}
  1239  		if err = w.Write(opts); err != nil {
  1240  			return nil, "", err
  1241  		}
  1242  		w.Flush()
  1243  		return fileservice.GetForETL(nil, fileservice.JoinPath(buf.String(), prefix))
  1244  	}
  1245  	return fileservice.GetForETL(param.FileService, prefix)
  1246  }
  1247  
  1248  // ReadDir support "etl:" and "/..." absolute path, NOT support relative path.
  1249  func ReadDir(param *tree.ExternParam) (fileList []string, fileSize []int64, err error) {
  1250  	filePath := strings.TrimSpace(param.Filepath)
  1251  	if strings.HasPrefix(filePath, "etl:") {
  1252  		filePath = path.Clean(filePath)
  1253  	} else {
  1254  		filePath = path.Clean("/" + filePath)
  1255  	}
  1256  
  1257  	sep := "/"
  1258  	pathDir := strings.Split(filePath, sep)
  1259  	l := list.New()
  1260  	l2 := list.New()
  1261  	if pathDir[0] == "" {
  1262  		l.PushBack(sep)
  1263  	} else {
  1264  		l.PushBack(pathDir[0])
  1265  	}
  1266  
  1267  	for i := 1; i < len(pathDir); i++ {
  1268  		length := l.Len()
  1269  		for j := 0; j < length; j++ {
  1270  			prefix := l.Front().Value.(string)
  1271  			fs, readPath, err := GetForETLWithType(param, prefix)
  1272  			if err != nil {
  1273  				return nil, nil, err
  1274  			}
  1275  			entries, err := fs.List(param.Ctx, readPath)
  1276  			if err != nil {
  1277  				return nil, nil, err
  1278  			}
  1279  			for _, entry := range entries {
  1280  				if !entry.IsDir && i+1 != len(pathDir) {
  1281  					continue
  1282  				}
  1283  				if entry.IsDir && i+1 == len(pathDir) {
  1284  					continue
  1285  				}
  1286  				matched, err := path.Match(pathDir[i], entry.Name)
  1287  				if err != nil {
  1288  					return nil, nil, err
  1289  				}
  1290  				if !matched {
  1291  					continue
  1292  				}
  1293  				l.PushBack(path.Join(l.Front().Value.(string), entry.Name))
  1294  				if !entry.IsDir {
  1295  					l2.PushBack(entry.Size)
  1296  				}
  1297  			}
  1298  			l.Remove(l.Front())
  1299  		}
  1300  	}
  1301  	len := l.Len()
  1302  	for j := 0; j < len; j++ {
  1303  		fileList = append(fileList, l.Front().Value.(string))
  1304  		l.Remove(l.Front())
  1305  		fileSize = append(fileSize, l2.Front().Value.(int64))
  1306  		l2.Remove(l2.Front())
  1307  	}
  1308  	return fileList, fileSize, err
  1309  }