github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/build_load.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import (
    18  	"encoding/json"
    19  	"strings"
    20  	"time"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  	"github.com/matrixorigin/matrixone/pkg/container/types"
    24  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    25  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    26  	v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    27  )
    28  
    29  const (
    30  	LoadParallelMinSize = 1 << 20
    31  )
    32  
    33  func buildLoad(stmt *tree.Load, ctx CompilerContext, isPrepareStmt bool) (*Plan, error) {
    34  	start := time.Now()
    35  	defer func() {
    36  		v2.TxnStatementBuildLoadHistogram.Observe(time.Since(start).Seconds())
    37  	}()
    38  	tblName := string(stmt.Table.ObjectName)
    39  	tblInfo, err := getDmlTableInfo(ctx, tree.TableExprs{stmt.Table}, nil, nil, "insert")
    40  	if err != nil {
    41  		return nil, err
    42  	}
    43  
    44  	stmt.Param.Local = stmt.Local
    45  	fileName, err := checkFileExist(stmt.Param, ctx)
    46  	if err != nil {
    47  		return nil, err
    48  	}
    49  
    50  	if err := InitNullMap(stmt.Param, ctx); err != nil {
    51  		return nil, err
    52  	}
    53  	tableDef := tblInfo.tableDefs[0]
    54  	objRef := tblInfo.objRef[0]
    55  
    56  	tableDef.Name2ColIndex = map[string]int32{}
    57  	var externalProject []*Expr
    58  	for i := 0; i < len(tableDef.Cols); i++ {
    59  		idx := int32(i)
    60  		tableDef.Name2ColIndex[tableDef.Cols[i].Name] = idx
    61  		colExpr := &plan.Expr{
    62  			Typ: tableDef.Cols[i].Typ,
    63  			Expr: &plan.Expr_Col{
    64  				Col: &plan.ColRef{
    65  					ColPos: idx,
    66  					Name:   tblName + "." + tableDef.Cols[i].Name,
    67  				},
    68  			},
    69  		}
    70  		externalProject = append(externalProject, colExpr)
    71  	}
    72  
    73  	if err := checkNullMap(stmt, tableDef.Cols, ctx); err != nil {
    74  		return nil, err
    75  	}
    76  
    77  	if stmt.Param.FileSize < LoadParallelMinSize {
    78  		stmt.Param.Parallel = false
    79  	}
    80  	stmt.Param.Tail.ColumnList = nil
    81  	stmt.Param.LoadFile = true
    82  	if stmt.Param.ScanType != tree.INLINE {
    83  		json_byte, err := json.Marshal(stmt.Param)
    84  		if err != nil {
    85  			return nil, err
    86  		}
    87  		tableDef.Createsql = string(json_byte)
    88  	}
    89  
    90  	builder := NewQueryBuilder(plan.Query_SELECT, ctx, isPrepareStmt)
    91  	bindCtx := NewBindContext(builder, nil)
    92  	terminated := ","
    93  	enclosedBy := []byte("\"")
    94  	escapedBy := []byte{0}
    95  	if stmt.Param.Tail.Fields != nil {
    96  		if stmt.Param.Tail.Fields.EnclosedBy != nil {
    97  			if stmt.Param.Tail.Fields.EnclosedBy.Value != 0 {
    98  				enclosedBy = []byte{stmt.Param.Tail.Fields.EnclosedBy.Value}
    99  			}
   100  		}
   101  		if stmt.Param.Tail.Fields.EscapedBy != nil {
   102  			if stmt.Param.Tail.Fields.EscapedBy.Value != 0 {
   103  				escapedBy = []byte{stmt.Param.Tail.Fields.EscapedBy.Value}
   104  			}
   105  		}
   106  		if stmt.Param.Tail.Fields.Terminated != nil {
   107  			terminated = stmt.Param.Tail.Fields.Terminated.Value
   108  		}
   109  	}
   110  
   111  	externalScanNode := &plan.Node{
   112  		NodeType:    plan.Node_EXTERNAL_SCAN,
   113  		Stats:       &plan.Stats{},
   114  		ProjectList: externalProject,
   115  		ObjRef:      objRef,
   116  		TableDef:    tableDef,
   117  		ExternScan: &plan.ExternScan{
   118  			Type:         int32(stmt.Param.ScanType),
   119  			Data:         stmt.Param.Data,
   120  			Format:       stmt.Param.Format,
   121  			IgnoredLines: uint64(stmt.Param.Tail.IgnoredLines),
   122  			EnclosedBy:   enclosedBy,
   123  			Terminated:   terminated,
   124  			EscapedBy:    escapedBy,
   125  			JsonType:     stmt.Param.JsonData,
   126  		},
   127  	}
   128  	lastNodeId := builder.appendNode(externalScanNode, bindCtx)
   129  
   130  	projectNode := &plan.Node{
   131  		Children: []int32{lastNodeId},
   132  		NodeType: plan.Node_PROJECT,
   133  		Stats:    &plan.Stats{},
   134  	}
   135  	ifExistAutoPkCol, err := getProjectNode(stmt, ctx, projectNode, tableDef)
   136  	if err != nil {
   137  		return nil, err
   138  	}
   139  	if stmt.Param.FileSize < LoadParallelMinSize {
   140  		stmt.Param.Parallel = false
   141  	}
   142  	if stmt.Param.Parallel && (getCompressType(stmt.Param, fileName) != tree.NOCOMPRESS || stmt.Local) {
   143  		projectNode.ProjectList = makeCastExpr(stmt, fileName, tableDef)
   144  	}
   145  	lastNodeId = builder.appendNode(projectNode, bindCtx)
   146  	builder.qry.LoadTag = true
   147  
   148  	//append lock node
   149  	// if lockNodeId, ok := appendLockNode(
   150  	// 	builder,
   151  	// 	bindCtx,
   152  	// 	lastNodeId,
   153  	// 	tableDef,
   154  	// 	true,
   155  	// 	true,
   156  	// 	-1,
   157  	// 	nil,
   158  	// ); ok {
   159  	// 	lastNodeId = lockNodeId
   160  	// }
   161  
   162  	// append hidden column to tableDef
   163  	newTableDef := DeepCopyTableDef(tableDef, true)
   164  	err = buildInsertPlans(ctx, builder, bindCtx, nil, objRef, newTableDef, lastNodeId, ifExistAutoPkCol, nil)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	// use shuffle for load if parallel and no compress
   169  	if stmt.Param.Parallel && (getCompressType(stmt.Param, fileName) == tree.NOCOMPRESS) {
   170  		for i := range builder.qry.Nodes {
   171  			node := builder.qry.Nodes[i]
   172  			if node.NodeType == plan.Node_INSERT {
   173  				if node.Stats.HashmapStats == nil {
   174  					node.Stats.HashmapStats = &plan.HashMapStats{}
   175  				}
   176  				node.Stats.HashmapStats.Shuffle = true
   177  			}
   178  		}
   179  	}
   180  
   181  	query := builder.qry
   182  	sqls, err := genSqlsForCheckFKSelfRefer(ctx.GetContext(),
   183  		objRef.SchemaName, newTableDef.Name, newTableDef.Cols, newTableDef.Fkeys)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	query.DetectSqls = sqls
   188  	reduceSinkSinkScanNodes(query)
   189  	query.StmtType = plan.Query_INSERT
   190  
   191  	pn := &Plan{
   192  		Plan: &plan.Plan_Query{
   193  			Query: query,
   194  		},
   195  	}
   196  	return pn, nil
   197  }
   198  
   199  func checkFileExist(param *tree.ExternParam, ctx CompilerContext) (string, error) {
   200  	if param.Local {
   201  		return "", nil
   202  	}
   203  	if param.ScanType == tree.INLINE {
   204  		return "", nil
   205  	}
   206  	param.Ctx = ctx.GetContext()
   207  	if param.ScanType == tree.S3 {
   208  		if err := InitS3Param(param); err != nil {
   209  			return "", err
   210  		}
   211  	} else {
   212  		if err := InitInfileParam(param); err != nil {
   213  			return "", err
   214  		}
   215  	}
   216  	if len(param.Filepath) == 0 {
   217  		return "", nil
   218  	}
   219  	if err := StatFile(param); err != nil {
   220  		if moerror, ok := err.(*moerr.Error); ok {
   221  			if moerror.ErrorCode() == moerr.ErrFileNotFound {
   222  				return "", moerr.NewInvalidInput(ctx.GetContext(), "the file does not exist in load flow")
   223  			} else {
   224  				return "", moerror
   225  			}
   226  		}
   227  		return "", moerr.NewInternalError(ctx.GetContext(), err.Error())
   228  	}
   229  	param.Init = true
   230  	return param.Filepath, nil
   231  }
   232  
   233  func getProjectNode(stmt *tree.Load, ctx CompilerContext, node *plan.Node, tableDef *TableDef) (bool, error) {
   234  	tblName := string(stmt.Table.ObjectName)
   235  	colToIndex := make(map[int32]string, 0)
   236  	ifExistAutoPkCol := false
   237  	if len(stmt.Param.Tail.ColumnList) == 0 {
   238  		for i := 0; i < len(tableDef.Cols); i++ {
   239  			colToIndex[int32(i)] = tableDef.Cols[i].Name
   240  		}
   241  	} else {
   242  		for i, col := range stmt.Param.Tail.ColumnList {
   243  			switch realCol := col.(type) {
   244  			case *tree.UnresolvedName:
   245  				if _, ok := tableDef.Name2ColIndex[realCol.Parts[0]]; !ok {
   246  					return ifExistAutoPkCol, moerr.NewInternalError(ctx.GetContext(), "column '%s' does not exist", realCol.Parts[0])
   247  				}
   248  				colToIndex[int32(i)] = realCol.Parts[0]
   249  			case *tree.VarExpr:
   250  				//NOTE:variable like '@abc' will be passed by.
   251  			default:
   252  				return ifExistAutoPkCol, moerr.NewInternalError(ctx.GetContext(), "unsupported column type %v", realCol)
   253  			}
   254  		}
   255  	}
   256  	node.ProjectList = make([]*plan.Expr, len(tableDef.Cols))
   257  	projectVec := make([]*plan.Expr, len(tableDef.Cols))
   258  	for i := 0; i < len(tableDef.Cols); i++ {
   259  		tmp := &plan.Expr{
   260  			Typ: tableDef.Cols[i].Typ,
   261  			Expr: &plan.Expr_Col{
   262  				Col: &plan.ColRef{
   263  					ColPos: int32(i),
   264  					Name:   tblName + "." + tableDef.Cols[i].Name,
   265  				},
   266  			},
   267  		}
   268  		projectVec[i] = tmp
   269  	}
   270  	for i := 0; i < len(tableDef.Cols); i++ {
   271  		if v, ok := colToIndex[int32(i)]; ok {
   272  			node.ProjectList[tableDef.Name2ColIndex[v]] = projectVec[i]
   273  		}
   274  	}
   275  	var tmp *plan.Expr
   276  	//var err error
   277  	for i := 0; i < len(tableDef.Cols); i++ {
   278  		if node.ProjectList[i] != nil {
   279  			continue
   280  		}
   281  
   282  		if tableDef.Cols[i].Default.Expr == nil || tableDef.Cols[i].Default.NullAbility {
   283  			tmp = makePlan2NullConstExprWithType()
   284  		} else {
   285  			tmp = &plan.Expr{
   286  				Typ:  tableDef.Cols[i].Default.Expr.Typ,
   287  				Expr: tableDef.Cols[i].Default.Expr.Expr,
   288  			}
   289  		}
   290  		node.ProjectList[i] = tmp
   291  
   292  		if tableDef.Cols[i].Typ.AutoIncr && tableDef.Cols[i].Name == tableDef.Pkey.PkeyColName {
   293  			ifExistAutoPkCol = true
   294  		}
   295  	}
   296  	return ifExistAutoPkCol, nil
   297  }
   298  
   299  func InitNullMap(param *tree.ExternParam, ctx CompilerContext) error {
   300  	param.NullMap = make(map[string][]string)
   301  
   302  	for i := 0; i < len(param.Tail.Assignments); i++ {
   303  		expr, ok := param.Tail.Assignments[i].Expr.(*tree.FuncExpr)
   304  		if !ok {
   305  			param.Tail.Assignments[i].Expr = nil
   306  			return nil
   307  		}
   308  		if len(expr.Exprs) != 2 {
   309  			param.Tail.Assignments[i].Expr = nil
   310  			return nil
   311  		}
   312  
   313  		expr2, ok := expr.Func.FunctionReference.(*tree.UnresolvedName)
   314  		if !ok || expr2.Parts[0] != "nullif" {
   315  			param.Tail.Assignments[i].Expr = nil
   316  			return nil
   317  		}
   318  
   319  		expr3, ok := expr.Exprs[0].(*tree.UnresolvedName)
   320  		if !ok {
   321  			return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func first param is not UnresolvedName form")
   322  		}
   323  
   324  		expr4, ok := expr.Exprs[1].(*tree.NumVal)
   325  		if !ok {
   326  			return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func second param is not NumVal form")
   327  		}
   328  		for j := 0; j < len(param.Tail.Assignments[i].Names); j++ {
   329  			col := param.Tail.Assignments[i].Names[j].Parts[0]
   330  			if col != expr3.Parts[0] {
   331  				return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func first param must equal to colName")
   332  			}
   333  			param.NullMap[col] = append(param.NullMap[col], strings.ToLower(expr4.String()))
   334  		}
   335  		param.Tail.Assignments[i].Expr = nil
   336  	}
   337  	return nil
   338  }
   339  
   340  func checkNullMap(stmt *tree.Load, Cols []*ColDef, ctx CompilerContext) error {
   341  	for k := range stmt.Param.NullMap {
   342  		find := false
   343  		for i := 0; i < len(Cols); i++ {
   344  			if Cols[i].Name == k {
   345  				find = true
   346  			}
   347  		}
   348  		if !find {
   349  			return moerr.NewInvalidInput(ctx.GetContext(), "wrong col name '%s' in nullif function", k)
   350  		}
   351  	}
   352  	return nil
   353  }
   354  
   355  func getCompressType(param *tree.ExternParam, filepath string) string {
   356  	if param.CompressType != "" && param.CompressType != tree.AUTO {
   357  		return param.CompressType
   358  	}
   359  	index := strings.LastIndex(filepath, ".")
   360  	if index == -1 {
   361  		return tree.NOCOMPRESS
   362  	}
   363  	tail := string([]byte(filepath)[index+1:])
   364  	switch tail {
   365  	case "gz", "gzip":
   366  		return tree.GZIP
   367  	case "bz2", "bzip2":
   368  		return tree.BZIP2
   369  	case "lz4":
   370  		return tree.LZ4
   371  	default:
   372  		return tree.NOCOMPRESS
   373  	}
   374  }
   375  
   376  func makeCastExpr(stmt *tree.Load, fileName string, tableDef *TableDef) []*plan.Expr {
   377  	ret := make([]*plan.Expr, 0)
   378  	stringTyp := &plan.Type{
   379  		Id: int32(types.T_varchar),
   380  	}
   381  	for i := 0; i < len(tableDef.Cols); i++ {
   382  		typ := tableDef.Cols[i].Typ
   383  		expr := &plan.Expr{
   384  			Typ: *stringTyp,
   385  			Expr: &plan.Expr_Col{
   386  				Col: &plan.ColRef{
   387  					RelPos: 0,
   388  					ColPos: int32(i),
   389  				},
   390  			},
   391  		}
   392  
   393  		expr, _ = makePlan2CastExpr(stmt.Param.Ctx, expr, typ)
   394  		ret = append(ret, expr)
   395  	}
   396  	return ret
   397  }