github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/plan/build_load.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import (
    18  	"encoding/json"
    19  	"strings"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/sql/util"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    24  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    25  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    26  )
    27  
    28  func buildLoad(stmt *tree.Load, ctx CompilerContext) (*Plan, error) {
    29  	stmt.Param.Local = stmt.Local
    30  	if err := checkFileExist(stmt.Param, ctx); err != nil {
    31  		return nil, err
    32  	}
    33  
    34  	if err := InitNullMap(stmt.Param, ctx); err != nil {
    35  		return nil, err
    36  	}
    37  	tblName := string(stmt.Table.ObjectName)
    38  	tblInfo, err := getDmlTableInfo(ctx, tree.TableExprs{stmt.Table}, nil, nil)
    39  	if err != nil {
    40  		return nil, err
    41  	}
    42  	tableDef := tblInfo.tableDefs[0]
    43  	objRef := tblInfo.objRef[0]
    44  	clusterTable, err := getAccountInfoOfClusterTable(ctx, stmt.Accounts, tableDef, tblInfo.isClusterTable[0])
    45  	if err != nil {
    46  		return nil, err
    47  	}
    48  	// if tblInfo.haveConstraint {
    49  	// 	return nil, moerr.NewNotSupported(ctx.GetContext(), "table '%v' have contraint, can not use load statement", tblName)
    50  	// }
    51  
    52  	tableDef.Name2ColIndex = map[string]int32{}
    53  	node1 := &plan.Node{}
    54  	node1.NodeType = plan.Node_EXTERNAL_SCAN
    55  	node1.Stats = &plan.Stats{}
    56  	node1.ClusterTable = clusterTable
    57  
    58  	node2 := &plan.Node{}
    59  	node2.NodeType = plan.Node_PROJECT
    60  	node2.Stats = &plan.Stats{}
    61  	node2.NodeId = 1
    62  	node2.Children = []int32{0}
    63  
    64  	node3 := &plan.Node{}
    65  	node3.NodeType = plan.Node_INSERT
    66  	node3.Stats = &plan.Stats{}
    67  	node3.NodeId = 2
    68  	node3.Children = []int32{1}
    69  	// node3.ClusterTable = clusterTable
    70  
    71  	idxList := make([]int32, len(tableDef.Cols))
    72  	for i := 0; i < len(tableDef.Cols); i++ {
    73  		tableDef.Name2ColIndex[tableDef.Cols[i].Name] = int32(i)
    74  		tmp := &plan.Expr{
    75  			Typ: tableDef.Cols[i].Typ,
    76  			Expr: &plan.Expr_Col{
    77  				Col: &plan.ColRef{
    78  					ColPos: int32(i),
    79  					Name:   tblName + "." + tableDef.Cols[i].Name,
    80  				},
    81  			},
    82  		}
    83  		idxList[i] = int32(i)
    84  		node1.ProjectList = append(node1.ProjectList, tmp)
    85  		// node3.ProjectList = append(node3.ProjectList, tmp)
    86  	}
    87  	if err := GetProjectNode(stmt, ctx, node2, tableDef.Name2ColIndex, clusterTable); err != nil {
    88  		return nil, err
    89  	}
    90  	if err := checkNullMap(stmt, tableDef.Cols, ctx); err != nil {
    91  		return nil, err
    92  	}
    93  
    94  	// node3.TableDef = tableDef
    95  	// node3.ObjRef = objRef
    96  	node3.InsertCtx = &plan.InsertCtx{
    97  		Ref:          objRef,
    98  		Idx:          idxList,
    99  		TableDef:     tableDef,
   100  		ClusterTable: clusterTable,
   101  		// ParentIdx:    map[string]int32{},
   102  	}
   103  
   104  	stmt.Param.Tail.ColumnList = nil
   105  	stmt.Param.LoadFile = true
   106  
   107  	json_byte, err := json.Marshal(stmt.Param)
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  
   112  	tableDef.Createsql = string(json_byte)
   113  	node1.TableDef = tableDef
   114  	node1.ObjRef = objRef
   115  
   116  	nodes := make([]*plan.Node, 3)
   117  	nodes[0] = node1
   118  	nodes[1] = node2
   119  	nodes[2] = node3
   120  	query := &plan.Query{
   121  		StmtType: plan.Query_INSERT,
   122  		Steps:    []int32{2},
   123  		Nodes:    nodes,
   124  	}
   125  	pn := &Plan{
   126  		Plan: &plan.Plan_Query{
   127  			Query: query,
   128  		},
   129  	}
   130  	pn.GetQuery().LoadTag = true
   131  	return pn, nil
   132  }
   133  
   134  func checkFileExist(param *tree.ExternParam, ctx CompilerContext) error {
   135  	if param.Local {
   136  		return nil
   137  	}
   138  	param.Ctx = ctx.GetContext()
   139  	if param.ScanType == tree.S3 {
   140  		if err := InitS3Param(param); err != nil {
   141  			return err
   142  		}
   143  	} else {
   144  		if err := InitInfileParam(param); err != nil {
   145  			return err
   146  		}
   147  	}
   148  
   149  	fileList, _, err := ReadDir(param)
   150  	if err != nil {
   151  		return err
   152  	}
   153  	if len(fileList) == 0 {
   154  		return moerr.NewInvalidInput(param.Ctx, "the file does not exist in load flow")
   155  	}
   156  	param.Ctx = nil
   157  	return nil
   158  }
   159  
   160  func GetProjectNode(stmt *tree.Load, ctx CompilerContext, node *plan.Node, Name2ColIndex map[string]int32, clusterTable *ClusterTable) error {
   161  	tblName := string(stmt.Table.ObjectName)
   162  	dbName := string(stmt.Table.SchemaName)
   163  	_, tableDef := ctx.Resolve(dbName, tblName)
   164  	if tableDef == nil {
   165  		return moerr.NewInternalError(ctx.GetContext(), "invalid table name: %s", string(stmt.Table.ObjectName))
   166  	}
   167  	if len(stmt.Param.Tail.ColumnList) > len(tableDef.Cols) {
   168  		return moerr.NewInternalError(ctx.GetContext(), "the load data column list is larger than table column")
   169  	}
   170  	colToIndex := make(map[int32]string, 0)
   171  	if len(stmt.Param.Tail.ColumnList) == 0 {
   172  		for i := 0; i < len(tableDef.Cols); i++ {
   173  			colToIndex[int32(i)] = tableDef.Cols[i].Name
   174  		}
   175  	} else {
   176  		for i, col := range stmt.Param.Tail.ColumnList {
   177  			switch realCol := col.(type) {
   178  			case *tree.UnresolvedName:
   179  				if _, ok := Name2ColIndex[realCol.Parts[0]]; !ok {
   180  					return moerr.NewInternalError(ctx.GetContext(), "column '%s' does not exist", realCol.Parts[0])
   181  				}
   182  				colToIndex[int32(i)] = realCol.Parts[0]
   183  				if clusterTable.GetIsClusterTable() {
   184  					//user can not specify the column account_id of the cluster table in the syntax
   185  					if util.IsClusterTableAttribute(realCol.Parts[0]) {
   186  						return moerr.NewInvalidInput(ctx.GetContext(), "do not specify the attribute %s for the cluster table", util.GetClusterTableAttributeName())
   187  					}
   188  				}
   189  			case *tree.VarExpr:
   190  				//NOTE:variable like '@abc' will be passed by.
   191  			default:
   192  				return moerr.NewInternalError(ctx.GetContext(), "unsupported column type %v", realCol)
   193  			}
   194  		}
   195  	}
   196  	node.ProjectList = make([]*plan.Expr, len(tableDef.Cols))
   197  	projectVec := make([]*plan.Expr, len(tableDef.Cols))
   198  	for i := 0; i < len(tableDef.Cols); i++ {
   199  		tmp := &plan.Expr{
   200  			Typ: tableDef.Cols[i].Typ,
   201  			Expr: &plan.Expr_Col{
   202  				Col: &plan.ColRef{
   203  					ColPos: int32(i),
   204  					Name:   tblName + "." + tableDef.Cols[i].Name,
   205  				},
   206  			},
   207  		}
   208  		projectVec[i] = tmp
   209  	}
   210  	for i := 0; i < len(tableDef.Cols); i++ {
   211  		if v, ok := colToIndex[int32(i)]; ok {
   212  			node.ProjectList[Name2ColIndex[v]] = projectVec[i]
   213  		}
   214  	}
   215  	var tmp *plan.Expr
   216  	//var err error
   217  	for i := 0; i < len(tableDef.Cols); i++ {
   218  		if node.ProjectList[i] != nil {
   219  			continue
   220  		}
   221  
   222  		if tableDef.Cols[i].Default.Expr == nil || tableDef.Cols[i].Default.NullAbility {
   223  			tmp = makePlan2NullConstExprWithType()
   224  		} else {
   225  			tmp = &plan.Expr{
   226  				Typ:  tableDef.Cols[i].Default.Expr.Typ,
   227  				Expr: tableDef.Cols[i].Default.Expr.Expr,
   228  			}
   229  		}
   230  		node.ProjectList[i] = tmp
   231  	}
   232  	return nil
   233  }
   234  
   235  func InitNullMap(param *tree.ExternParam, ctx CompilerContext) error {
   236  	param.NullMap = make(map[string][]string)
   237  
   238  	for i := 0; i < len(param.Tail.Assignments); i++ {
   239  		expr, ok := param.Tail.Assignments[i].Expr.(*tree.FuncExpr)
   240  		if !ok {
   241  			param.Tail.Assignments[i].Expr = nil
   242  			return nil
   243  		}
   244  		if len(expr.Exprs) != 2 {
   245  			param.Tail.Assignments[i].Expr = nil
   246  			return nil
   247  		}
   248  
   249  		expr2, ok := expr.Func.FunctionReference.(*tree.UnresolvedName)
   250  		if !ok || expr2.Parts[0] != "nullif" {
   251  			param.Tail.Assignments[i].Expr = nil
   252  			return nil
   253  		}
   254  
   255  		expr3, ok := expr.Exprs[0].(*tree.UnresolvedName)
   256  		if !ok {
   257  			return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func first param is not UnresolvedName form")
   258  		}
   259  
   260  		expr4, ok := expr.Exprs[1].(*tree.NumVal)
   261  		if !ok {
   262  			return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func second param is not NumVal form")
   263  		}
   264  		for j := 0; j < len(param.Tail.Assignments[i].Names); j++ {
   265  			col := param.Tail.Assignments[i].Names[j].Parts[0]
   266  			if col != expr3.Parts[0] {
   267  				return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func first param must equal to colName")
   268  			}
   269  			param.NullMap[col] = append(param.NullMap[col], strings.ToLower(expr4.String()))
   270  		}
   271  		param.Tail.Assignments[i].Expr = nil
   272  	}
   273  	return nil
   274  }
   275  
   276  func checkNullMap(stmt *tree.Load, Cols []*ColDef, ctx CompilerContext) error {
   277  	for k := range stmt.Param.NullMap {
   278  		find := false
   279  		for i := 0; i < len(Cols); i++ {
   280  			if Cols[i].Name == k {
   281  				find = true
   282  			}
   283  		}
   284  		if !find {
   285  			return moerr.NewInvalidInput(ctx.GetContext(), "wrong col name '%s' in nullif function", k)
   286  		}
   287  	}
   288  	return nil
   289  }