github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/plan/build_load.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package plan 16 17 import ( 18 "encoding/json" 19 "strings" 20 21 "github.com/matrixorigin/matrixone/pkg/sql/util" 22 23 "github.com/matrixorigin/matrixone/pkg/common/moerr" 24 "github.com/matrixorigin/matrixone/pkg/pb/plan" 25 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 26 ) 27 28 func buildLoad(stmt *tree.Load, ctx CompilerContext) (*Plan, error) { 29 stmt.Param.Local = stmt.Local 30 if err := checkFileExist(stmt.Param, ctx); err != nil { 31 return nil, err 32 } 33 34 if err := InitNullMap(stmt.Param, ctx); err != nil { 35 return nil, err 36 } 37 tblName := string(stmt.Table.ObjectName) 38 tblInfo, err := getDmlTableInfo(ctx, tree.TableExprs{stmt.Table}, nil, nil) 39 if err != nil { 40 return nil, err 41 } 42 tableDef := tblInfo.tableDefs[0] 43 objRef := tblInfo.objRef[0] 44 clusterTable, err := getAccountInfoOfClusterTable(ctx, stmt.Accounts, tableDef, tblInfo.isClusterTable[0]) 45 if err != nil { 46 return nil, err 47 } 48 // if tblInfo.haveConstraint { 49 // return nil, moerr.NewNotSupported(ctx.GetContext(), "table '%v' have contraint, can not use load statement", tblName) 50 // } 51 52 tableDef.Name2ColIndex = map[string]int32{} 53 node1 := &plan.Node{} 54 node1.NodeType = plan.Node_EXTERNAL_SCAN 55 node1.Stats = &plan.Stats{} 56 node1.ClusterTable = clusterTable 57 58 node2 := &plan.Node{} 59 node2.NodeType = plan.Node_PROJECT 60 node2.Stats = &plan.Stats{} 61 node2.NodeId = 1 62 node2.Children = []int32{0} 63 64 node3 := &plan.Node{} 65 node3.NodeType = plan.Node_INSERT 66 node3.Stats = &plan.Stats{} 67 node3.NodeId = 2 68 node3.Children = []int32{1} 69 // node3.ClusterTable = clusterTable 70 71 idxList := make([]int32, len(tableDef.Cols)) 72 for i := 0; i < len(tableDef.Cols); i++ { 73 tableDef.Name2ColIndex[tableDef.Cols[i].Name] = int32(i) 74 tmp := &plan.Expr{ 75 Typ: tableDef.Cols[i].Typ, 76 Expr: &plan.Expr_Col{ 77 Col: &plan.ColRef{ 78 ColPos: int32(i), 79 Name: tblName + "." + tableDef.Cols[i].Name, 80 }, 81 }, 82 } 83 idxList[i] = int32(i) 84 node1.ProjectList = append(node1.ProjectList, tmp) 85 // node3.ProjectList = append(node3.ProjectList, tmp) 86 } 87 if err := GetProjectNode(stmt, ctx, node2, tableDef.Name2ColIndex, clusterTable); err != nil { 88 return nil, err 89 } 90 if err := checkNullMap(stmt, tableDef.Cols, ctx); err != nil { 91 return nil, err 92 } 93 94 // node3.TableDef = tableDef 95 // node3.ObjRef = objRef 96 node3.InsertCtx = &plan.InsertCtx{ 97 Ref: objRef, 98 Idx: idxList, 99 TableDef: tableDef, 100 ClusterTable: clusterTable, 101 // ParentIdx: map[string]int32{}, 102 } 103 104 stmt.Param.Tail.ColumnList = nil 105 stmt.Param.LoadFile = true 106 107 json_byte, err := json.Marshal(stmt.Param) 108 if err != nil { 109 return nil, err 110 } 111 112 tableDef.Createsql = string(json_byte) 113 node1.TableDef = tableDef 114 node1.ObjRef = objRef 115 116 nodes := make([]*plan.Node, 3) 117 nodes[0] = node1 118 nodes[1] = node2 119 nodes[2] = node3 120 query := &plan.Query{ 121 StmtType: plan.Query_INSERT, 122 Steps: []int32{2}, 123 Nodes: nodes, 124 } 125 pn := &Plan{ 126 Plan: &plan.Plan_Query{ 127 Query: query, 128 }, 129 } 130 pn.GetQuery().LoadTag = true 131 return pn, nil 132 } 133 134 func checkFileExist(param *tree.ExternParam, ctx CompilerContext) error { 135 if param.Local { 136 return nil 137 } 138 param.Ctx = ctx.GetContext() 139 if param.ScanType == tree.S3 { 140 if err := InitS3Param(param); err != nil { 141 return err 142 } 143 } else { 144 if err := InitInfileParam(param); err != nil { 145 return err 146 } 147 } 148 149 fileList, _, err := ReadDir(param) 150 if err != nil { 151 return err 152 } 153 if len(fileList) == 0 { 154 return moerr.NewInvalidInput(param.Ctx, "the file does not exist in load flow") 155 } 156 param.Ctx = nil 157 return nil 158 } 159 160 func GetProjectNode(stmt *tree.Load, ctx CompilerContext, node *plan.Node, Name2ColIndex map[string]int32, clusterTable *ClusterTable) error { 161 tblName := string(stmt.Table.ObjectName) 162 dbName := string(stmt.Table.SchemaName) 163 _, tableDef := ctx.Resolve(dbName, tblName) 164 if tableDef == nil { 165 return moerr.NewInternalError(ctx.GetContext(), "invalid table name: %s", string(stmt.Table.ObjectName)) 166 } 167 if len(stmt.Param.Tail.ColumnList) > len(tableDef.Cols) { 168 return moerr.NewInternalError(ctx.GetContext(), "the load data column list is larger than table column") 169 } 170 colToIndex := make(map[int32]string, 0) 171 if len(stmt.Param.Tail.ColumnList) == 0 { 172 for i := 0; i < len(tableDef.Cols); i++ { 173 colToIndex[int32(i)] = tableDef.Cols[i].Name 174 } 175 } else { 176 for i, col := range stmt.Param.Tail.ColumnList { 177 switch realCol := col.(type) { 178 case *tree.UnresolvedName: 179 if _, ok := Name2ColIndex[realCol.Parts[0]]; !ok { 180 return moerr.NewInternalError(ctx.GetContext(), "column '%s' does not exist", realCol.Parts[0]) 181 } 182 colToIndex[int32(i)] = realCol.Parts[0] 183 if clusterTable.GetIsClusterTable() { 184 //user can not specify the column account_id of the cluster table in the syntax 185 if util.IsClusterTableAttribute(realCol.Parts[0]) { 186 return moerr.NewInvalidInput(ctx.GetContext(), "do not specify the attribute %s for the cluster table", util.GetClusterTableAttributeName()) 187 } 188 } 189 case *tree.VarExpr: 190 //NOTE:variable like '@abc' will be passed by. 191 default: 192 return moerr.NewInternalError(ctx.GetContext(), "unsupported column type %v", realCol) 193 } 194 } 195 } 196 node.ProjectList = make([]*plan.Expr, len(tableDef.Cols)) 197 projectVec := make([]*plan.Expr, len(tableDef.Cols)) 198 for i := 0; i < len(tableDef.Cols); i++ { 199 tmp := &plan.Expr{ 200 Typ: tableDef.Cols[i].Typ, 201 Expr: &plan.Expr_Col{ 202 Col: &plan.ColRef{ 203 ColPos: int32(i), 204 Name: tblName + "." + tableDef.Cols[i].Name, 205 }, 206 }, 207 } 208 projectVec[i] = tmp 209 } 210 for i := 0; i < len(tableDef.Cols); i++ { 211 if v, ok := colToIndex[int32(i)]; ok { 212 node.ProjectList[Name2ColIndex[v]] = projectVec[i] 213 } 214 } 215 var tmp *plan.Expr 216 //var err error 217 for i := 0; i < len(tableDef.Cols); i++ { 218 if node.ProjectList[i] != nil { 219 continue 220 } 221 222 if tableDef.Cols[i].Default.Expr == nil || tableDef.Cols[i].Default.NullAbility { 223 tmp = makePlan2NullConstExprWithType() 224 } else { 225 tmp = &plan.Expr{ 226 Typ: tableDef.Cols[i].Default.Expr.Typ, 227 Expr: tableDef.Cols[i].Default.Expr.Expr, 228 } 229 } 230 node.ProjectList[i] = tmp 231 } 232 return nil 233 } 234 235 func InitNullMap(param *tree.ExternParam, ctx CompilerContext) error { 236 param.NullMap = make(map[string][]string) 237 238 for i := 0; i < len(param.Tail.Assignments); i++ { 239 expr, ok := param.Tail.Assignments[i].Expr.(*tree.FuncExpr) 240 if !ok { 241 param.Tail.Assignments[i].Expr = nil 242 return nil 243 } 244 if len(expr.Exprs) != 2 { 245 param.Tail.Assignments[i].Expr = nil 246 return nil 247 } 248 249 expr2, ok := expr.Func.FunctionReference.(*tree.UnresolvedName) 250 if !ok || expr2.Parts[0] != "nullif" { 251 param.Tail.Assignments[i].Expr = nil 252 return nil 253 } 254 255 expr3, ok := expr.Exprs[0].(*tree.UnresolvedName) 256 if !ok { 257 return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func first param is not UnresolvedName form") 258 } 259 260 expr4, ok := expr.Exprs[1].(*tree.NumVal) 261 if !ok { 262 return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func second param is not NumVal form") 263 } 264 for j := 0; j < len(param.Tail.Assignments[i].Names); j++ { 265 col := param.Tail.Assignments[i].Names[j].Parts[0] 266 if col != expr3.Parts[0] { 267 return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func first param must equal to colName") 268 } 269 param.NullMap[col] = append(param.NullMap[col], strings.ToLower(expr4.String())) 270 } 271 param.Tail.Assignments[i].Expr = nil 272 } 273 return nil 274 } 275 276 func checkNullMap(stmt *tree.Load, Cols []*ColDef, ctx CompilerContext) error { 277 for k := range stmt.Param.NullMap { 278 find := false 279 for i := 0; i < len(Cols); i++ { 280 if Cols[i].Name == k { 281 find = true 282 } 283 } 284 if !find { 285 return moerr.NewInvalidInput(ctx.GetContext(), "wrong col name '%s' in nullif function", k) 286 } 287 } 288 return nil 289 }