github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/build_load.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package plan 16 17 import ( 18 "encoding/json" 19 "strings" 20 "time" 21 22 "github.com/matrixorigin/matrixone/pkg/common/moerr" 23 "github.com/matrixorigin/matrixone/pkg/container/types" 24 "github.com/matrixorigin/matrixone/pkg/pb/plan" 25 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 26 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 27 ) 28 29 const ( 30 LoadParallelMinSize = 1 << 20 31 ) 32 33 func buildLoad(stmt *tree.Load, ctx CompilerContext, isPrepareStmt bool) (*Plan, error) { 34 start := time.Now() 35 defer func() { 36 v2.TxnStatementBuildLoadHistogram.Observe(time.Since(start).Seconds()) 37 }() 38 tblName := string(stmt.Table.ObjectName) 39 tblInfo, err := getDmlTableInfo(ctx, tree.TableExprs{stmt.Table}, nil, nil, "insert") 40 if err != nil { 41 return nil, err 42 } 43 44 stmt.Param.Local = stmt.Local 45 fileName, err := checkFileExist(stmt.Param, ctx) 46 if err != nil { 47 return nil, err 48 } 49 50 if err := InitNullMap(stmt.Param, ctx); err != nil { 51 return nil, err 52 } 53 tableDef := tblInfo.tableDefs[0] 54 objRef := tblInfo.objRef[0] 55 56 tableDef.Name2ColIndex = map[string]int32{} 57 var externalProject []*Expr 58 for i := 0; i < len(tableDef.Cols); i++ { 59 idx := int32(i) 60 tableDef.Name2ColIndex[tableDef.Cols[i].Name] = idx 61 colExpr := &plan.Expr{ 62 Typ: tableDef.Cols[i].Typ, 63 Expr: &plan.Expr_Col{ 64 Col: &plan.ColRef{ 65 ColPos: idx, 66 Name: tblName + "." + tableDef.Cols[i].Name, 67 }, 68 }, 69 } 70 externalProject = append(externalProject, colExpr) 71 } 72 73 if err := checkNullMap(stmt, tableDef.Cols, ctx); err != nil { 74 return nil, err 75 } 76 77 if stmt.Param.FileSize < LoadParallelMinSize { 78 stmt.Param.Parallel = false 79 } 80 stmt.Param.Tail.ColumnList = nil 81 stmt.Param.LoadFile = true 82 if stmt.Param.ScanType != tree.INLINE { 83 json_byte, err := json.Marshal(stmt.Param) 84 if err != nil { 85 return nil, err 86 } 87 tableDef.Createsql = string(json_byte) 88 } 89 90 builder := NewQueryBuilder(plan.Query_SELECT, ctx, isPrepareStmt) 91 bindCtx := NewBindContext(builder, nil) 92 terminated := "," 93 enclosedBy := []byte("\"") 94 escapedBy := []byte{0} 95 if stmt.Param.Tail.Fields != nil { 96 if stmt.Param.Tail.Fields.EnclosedBy != nil { 97 if stmt.Param.Tail.Fields.EnclosedBy.Value != 0 { 98 enclosedBy = []byte{stmt.Param.Tail.Fields.EnclosedBy.Value} 99 } 100 } 101 if stmt.Param.Tail.Fields.EscapedBy != nil { 102 if stmt.Param.Tail.Fields.EscapedBy.Value != 0 { 103 escapedBy = []byte{stmt.Param.Tail.Fields.EscapedBy.Value} 104 } 105 } 106 if stmt.Param.Tail.Fields.Terminated != nil { 107 terminated = stmt.Param.Tail.Fields.Terminated.Value 108 } 109 } 110 111 externalScanNode := &plan.Node{ 112 NodeType: plan.Node_EXTERNAL_SCAN, 113 Stats: &plan.Stats{}, 114 ProjectList: externalProject, 115 ObjRef: objRef, 116 TableDef: tableDef, 117 ExternScan: &plan.ExternScan{ 118 Type: int32(stmt.Param.ScanType), 119 Data: stmt.Param.Data, 120 Format: stmt.Param.Format, 121 IgnoredLines: uint64(stmt.Param.Tail.IgnoredLines), 122 EnclosedBy: enclosedBy, 123 Terminated: terminated, 124 EscapedBy: escapedBy, 125 JsonType: stmt.Param.JsonData, 126 }, 127 } 128 lastNodeId := builder.appendNode(externalScanNode, bindCtx) 129 130 projectNode := &plan.Node{ 131 Children: []int32{lastNodeId}, 132 NodeType: plan.Node_PROJECT, 133 Stats: &plan.Stats{}, 134 } 135 ifExistAutoPkCol, err := getProjectNode(stmt, ctx, projectNode, tableDef) 136 if err != nil { 137 return nil, err 138 } 139 if stmt.Param.FileSize < LoadParallelMinSize { 140 stmt.Param.Parallel = false 141 } 142 if stmt.Param.Parallel && (getCompressType(stmt.Param, fileName) != tree.NOCOMPRESS || stmt.Local) { 143 projectNode.ProjectList = makeCastExpr(stmt, fileName, tableDef) 144 } 145 lastNodeId = builder.appendNode(projectNode, bindCtx) 146 builder.qry.LoadTag = true 147 148 //append lock node 149 // if lockNodeId, ok := appendLockNode( 150 // builder, 151 // bindCtx, 152 // lastNodeId, 153 // tableDef, 154 // true, 155 // true, 156 // -1, 157 // nil, 158 // ); ok { 159 // lastNodeId = lockNodeId 160 // } 161 162 // append hidden column to tableDef 163 newTableDef := DeepCopyTableDef(tableDef, true) 164 err = buildInsertPlans(ctx, builder, bindCtx, nil, objRef, newTableDef, lastNodeId, ifExistAutoPkCol, nil) 165 if err != nil { 166 return nil, err 167 } 168 // use shuffle for load if parallel and no compress 169 if stmt.Param.Parallel && (getCompressType(stmt.Param, fileName) == tree.NOCOMPRESS) { 170 for i := range builder.qry.Nodes { 171 node := builder.qry.Nodes[i] 172 if node.NodeType == plan.Node_INSERT { 173 if node.Stats.HashmapStats == nil { 174 node.Stats.HashmapStats = &plan.HashMapStats{} 175 } 176 node.Stats.HashmapStats.Shuffle = true 177 } 178 } 179 } 180 181 query := builder.qry 182 sqls, err := genSqlsForCheckFKSelfRefer(ctx.GetContext(), 183 objRef.SchemaName, newTableDef.Name, newTableDef.Cols, newTableDef.Fkeys) 184 if err != nil { 185 return nil, err 186 } 187 query.DetectSqls = sqls 188 reduceSinkSinkScanNodes(query) 189 query.StmtType = plan.Query_INSERT 190 191 pn := &Plan{ 192 Plan: &plan.Plan_Query{ 193 Query: query, 194 }, 195 } 196 return pn, nil 197 } 198 199 func checkFileExist(param *tree.ExternParam, ctx CompilerContext) (string, error) { 200 if param.Local { 201 return "", nil 202 } 203 if param.ScanType == tree.INLINE { 204 return "", nil 205 } 206 param.Ctx = ctx.GetContext() 207 if param.ScanType == tree.S3 { 208 if err := InitS3Param(param); err != nil { 209 return "", err 210 } 211 } else { 212 if err := InitInfileParam(param); err != nil { 213 return "", err 214 } 215 } 216 if len(param.Filepath) == 0 { 217 return "", nil 218 } 219 if err := StatFile(param); err != nil { 220 if moerror, ok := err.(*moerr.Error); ok { 221 if moerror.ErrorCode() == moerr.ErrFileNotFound { 222 return "", moerr.NewInvalidInput(ctx.GetContext(), "the file does not exist in load flow") 223 } else { 224 return "", moerror 225 } 226 } 227 return "", moerr.NewInternalError(ctx.GetContext(), err.Error()) 228 } 229 param.Init = true 230 return param.Filepath, nil 231 } 232 233 func getProjectNode(stmt *tree.Load, ctx CompilerContext, node *plan.Node, tableDef *TableDef) (bool, error) { 234 tblName := string(stmt.Table.ObjectName) 235 colToIndex := make(map[int32]string, 0) 236 ifExistAutoPkCol := false 237 if len(stmt.Param.Tail.ColumnList) == 0 { 238 for i := 0; i < len(tableDef.Cols); i++ { 239 colToIndex[int32(i)] = tableDef.Cols[i].Name 240 } 241 } else { 242 for i, col := range stmt.Param.Tail.ColumnList { 243 switch realCol := col.(type) { 244 case *tree.UnresolvedName: 245 if _, ok := tableDef.Name2ColIndex[realCol.Parts[0]]; !ok { 246 return ifExistAutoPkCol, moerr.NewInternalError(ctx.GetContext(), "column '%s' does not exist", realCol.Parts[0]) 247 } 248 colToIndex[int32(i)] = realCol.Parts[0] 249 case *tree.VarExpr: 250 //NOTE:variable like '@abc' will be passed by. 251 default: 252 return ifExistAutoPkCol, moerr.NewInternalError(ctx.GetContext(), "unsupported column type %v", realCol) 253 } 254 } 255 } 256 node.ProjectList = make([]*plan.Expr, len(tableDef.Cols)) 257 projectVec := make([]*plan.Expr, len(tableDef.Cols)) 258 for i := 0; i < len(tableDef.Cols); i++ { 259 tmp := &plan.Expr{ 260 Typ: tableDef.Cols[i].Typ, 261 Expr: &plan.Expr_Col{ 262 Col: &plan.ColRef{ 263 ColPos: int32(i), 264 Name: tblName + "." + tableDef.Cols[i].Name, 265 }, 266 }, 267 } 268 projectVec[i] = tmp 269 } 270 for i := 0; i < len(tableDef.Cols); i++ { 271 if v, ok := colToIndex[int32(i)]; ok { 272 node.ProjectList[tableDef.Name2ColIndex[v]] = projectVec[i] 273 } 274 } 275 var tmp *plan.Expr 276 //var err error 277 for i := 0; i < len(tableDef.Cols); i++ { 278 if node.ProjectList[i] != nil { 279 continue 280 } 281 282 if tableDef.Cols[i].Default.Expr == nil || tableDef.Cols[i].Default.NullAbility { 283 tmp = makePlan2NullConstExprWithType() 284 } else { 285 tmp = &plan.Expr{ 286 Typ: tableDef.Cols[i].Default.Expr.Typ, 287 Expr: tableDef.Cols[i].Default.Expr.Expr, 288 } 289 } 290 node.ProjectList[i] = tmp 291 292 if tableDef.Cols[i].Typ.AutoIncr && tableDef.Cols[i].Name == tableDef.Pkey.PkeyColName { 293 ifExistAutoPkCol = true 294 } 295 } 296 return ifExistAutoPkCol, nil 297 } 298 299 func InitNullMap(param *tree.ExternParam, ctx CompilerContext) error { 300 param.NullMap = make(map[string][]string) 301 302 for i := 0; i < len(param.Tail.Assignments); i++ { 303 expr, ok := param.Tail.Assignments[i].Expr.(*tree.FuncExpr) 304 if !ok { 305 param.Tail.Assignments[i].Expr = nil 306 return nil 307 } 308 if len(expr.Exprs) != 2 { 309 param.Tail.Assignments[i].Expr = nil 310 return nil 311 } 312 313 expr2, ok := expr.Func.FunctionReference.(*tree.UnresolvedName) 314 if !ok || expr2.Parts[0] != "nullif" { 315 param.Tail.Assignments[i].Expr = nil 316 return nil 317 } 318 319 expr3, ok := expr.Exprs[0].(*tree.UnresolvedName) 320 if !ok { 321 return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func first param is not UnresolvedName form") 322 } 323 324 expr4, ok := expr.Exprs[1].(*tree.NumVal) 325 if !ok { 326 return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func second param is not NumVal form") 327 } 328 for j := 0; j < len(param.Tail.Assignments[i].Names); j++ { 329 col := param.Tail.Assignments[i].Names[j].Parts[0] 330 if col != expr3.Parts[0] { 331 return moerr.NewInvalidInput(ctx.GetContext(), "the nullif func first param must equal to colName") 332 } 333 param.NullMap[col] = append(param.NullMap[col], strings.ToLower(expr4.String())) 334 } 335 param.Tail.Assignments[i].Expr = nil 336 } 337 return nil 338 } 339 340 func checkNullMap(stmt *tree.Load, Cols []*ColDef, ctx CompilerContext) error { 341 for k := range stmt.Param.NullMap { 342 find := false 343 for i := 0; i < len(Cols); i++ { 344 if Cols[i].Name == k { 345 find = true 346 } 347 } 348 if !find { 349 return moerr.NewInvalidInput(ctx.GetContext(), "wrong col name '%s' in nullif function", k) 350 } 351 } 352 return nil 353 } 354 355 func getCompressType(param *tree.ExternParam, filepath string) string { 356 if param.CompressType != "" && param.CompressType != tree.AUTO { 357 return param.CompressType 358 } 359 index := strings.LastIndex(filepath, ".") 360 if index == -1 { 361 return tree.NOCOMPRESS 362 } 363 tail := string([]byte(filepath)[index+1:]) 364 switch tail { 365 case "gz", "gzip": 366 return tree.GZIP 367 case "bz2", "bzip2": 368 return tree.BZIP2 369 case "lz4": 370 return tree.LZ4 371 default: 372 return tree.NOCOMPRESS 373 } 374 } 375 376 func makeCastExpr(stmt *tree.Load, fileName string, tableDef *TableDef) []*plan.Expr { 377 ret := make([]*plan.Expr, 0) 378 stringTyp := &plan.Type{ 379 Id: int32(types.T_varchar), 380 } 381 for i := 0; i < len(tableDef.Cols); i++ { 382 typ := tableDef.Cols[i].Typ 383 expr := &plan.Expr{ 384 Typ: *stringTyp, 385 Expr: &plan.Expr_Col{ 386 Col: &plan.ColRef{ 387 RelPos: 0, 388 ColPos: int32(i), 389 }, 390 }, 391 } 392 393 expr, _ = makePlan2CastExpr(stmt.Param.Ctx, expr, typ) 394 ret = append(ret, expr) 395 } 396 return ret 397 }