github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/interlock/load_data.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "bytes" 18 "context" 19 "fmt" 20 "strings" 21 "sync/atomic" 22 "time" 23 24 "github.com/whtcorpsinc/errors" 25 "github.com/whtcorpsinc/failpoint" 26 "github.com/whtcorpsinc/BerolinaSQL/ast" 27 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 28 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 29 "github.com/whtcorpsinc/milevadb/memex" 30 "github.com/whtcorpsinc/milevadb/stochastikctx" 31 "github.com/whtcorpsinc/milevadb/causet" 32 "github.com/whtcorpsinc/milevadb/types" 33 "github.com/whtcorpsinc/milevadb/soliton/chunk" 34 "github.com/whtcorpsinc/milevadb/soliton/replog" 35 "github.com/whtcorpsinc/milevadb/soliton/logutil" 36 "go.uber.org/zap" 37 ) 38 39 var ( 40 null = []byte("NULL") 41 taskQueueSize = 16 // the maximum number of pending tasks to commit in queue 42 ) 43 44 // LoadDataInterDirc represents a load data interlock. 45 type LoadDataInterDirc struct { 46 baseInterlockingDirectorate 47 48 IsLocal bool 49 OnDuplicate ast.OnDuplicateKeyHandlingType 50 loadDataInfo *LoadDataInfo 51 } 52 53 // NewLoadDataInfo returns a LoadDataInfo structure, and it's only used for tests now. 54 func NewLoadDataInfo(ctx stochastikctx.Context, event []types.Causet, tbl causet.Block, defcaus []*causet.DeferredCauset) *LoadDataInfo { 55 insertVal := &InsertValues{baseInterlockingDirectorate: newBaseInterlockingDirectorate(ctx, nil, 0), Block: tbl} 56 return &LoadDataInfo{ 57 event: event, 58 InsertValues: insertVal, 59 Block: tbl, 60 Ctx: ctx, 61 } 62 } 63 64 // Next implements the InterlockingDirectorate Next interface. 65 func (e *LoadDataInterDirc) Next(ctx context.Context, req *chunk.Chunk) error { 66 req.GrowAndReset(e.maxChunkSize) 67 // TODO: support load data without local field. 68 if !e.IsLocal { 69 return errors.New("Load Data: don't support load data without local field") 70 } 71 // TODO: support load data with replace field. 72 if e.OnDuplicate == ast.OnDuplicateKeyHandlingReplace { 73 return errors.New("Load Data: don't support load data with replace field") 74 } 75 // TODO: support lines terminated is "". 76 if len(e.loadDataInfo.LinesInfo.Terminated) == 0 { 77 return errors.New("Load Data: don't support load data terminated is nil") 78 } 79 80 sctx := e.loadDataInfo.ctx 81 val := sctx.Value(LoadDataVarKey) 82 if val != nil { 83 sctx.SetValue(LoadDataVarKey, nil) 84 return errors.New("Load Data: previous load data option isn't closed normal") 85 } 86 if e.loadDataInfo.Path == "" { 87 return errors.New("Load Data: infile path is empty") 88 } 89 sctx.SetValue(LoadDataVarKey, e.loadDataInfo) 90 91 return nil 92 } 93 94 // Close implements the InterlockingDirectorate Close interface. 95 func (e *LoadDataInterDirc) Close() error { 96 return nil 97 } 98 99 // Open implements the InterlockingDirectorate Open interface. 100 func (e *LoadDataInterDirc) Open(ctx context.Context) error { 101 if e.loadDataInfo.insertDeferredCausets != nil { 102 e.loadDataInfo.initEvalBuffer() 103 } 104 return nil 105 } 106 107 // CommitTask is used for fetching data from data preparing routine into committing routine. 108 type CommitTask struct { 109 cnt uint64 110 rows [][]types.Causet 111 } 112 113 // LoadDataInfo saves the information of loading data operation. 114 type LoadDataInfo struct { 115 *InsertValues 116 117 event []types.Causet 118 Path string 119 Block causet.Block 120 FieldsInfo *ast.FieldsClause 121 LinesInfo *ast.LinesClause 122 IgnoreLines uint64 123 Ctx stochastikctx.Context 124 rows [][]types.Causet 125 Drained bool 126 127 DeferredCausetAssignments []*ast.Assignment 128 DeferredCausetsAndUserVars []*ast.DeferredCausetNameOrUserVar 129 FieldMappings []*FieldMapping 130 131 commitTaskQueue chan CommitTask 132 StopCh chan struct{} 133 QuitCh chan struct{} 134 } 135 136 // FieldMapping inticates the relationship between input field and causet defCausumn or user variable 137 type FieldMapping struct { 138 DeferredCauset *causet.DeferredCauset 139 UserVar *ast.VariableExpr 140 } 141 142 // initLoadDeferredCausets sets defCausumns which the input fields loaded to. 143 func (e *LoadDataInfo) initLoadDeferredCausets(defCausumnNames []string) error { 144 var defcaus []*causet.DeferredCauset 145 var missingDefCausName string 146 var err error 147 blockDefCauss := e.Block.DefCauss() 148 149 if len(defCausumnNames) != len(blockDefCauss) { 150 for _, v := range e.DeferredCausetAssignments { 151 defCausumnNames = append(defCausumnNames, v.DeferredCauset.Name.O) 152 } 153 154 defcaus, missingDefCausName = causet.FindDefCauss(blockDefCauss, defCausumnNames, e.Block.Meta().PKIsHandle) 155 if missingDefCausName != "" { 156 return errors.Errorf("LOAD DATA INTO %s: unknown defCausumn %s", e.Block.Meta().Name.O, missingDefCausName) 157 } 158 } else { 159 defcaus = blockDefCauss 160 } 161 162 for _, defCaus := range defcaus { 163 if !defCaus.IsGenerated() { 164 e.insertDeferredCausets = append(e.insertDeferredCausets, defCaus) 165 } 166 if defCaus.Name.L == perceptron.ExtraHandleName.L { 167 if !e.ctx.GetStochastikVars().AllowWriteEventID { 168 return errors.Errorf("load data memex for _milevadb_rowid are not supported.") 169 } 170 e.hasExtraHandle = true 171 break 172 } 173 } 174 175 // Check defCausumn whether is specified only once. 176 err = causet.CheckOnce(defcaus) 177 if err != nil { 178 return err 179 } 180 181 return nil 182 } 183 184 // initFieldMappings make a field mapping slice to implicitly map input field to causet defCausumn or user defined variable 185 // the slice's order is the same as the order of the input fields. 186 // Returns a slice of same ordered defCausumn names without user defined variable names. 187 func (e *LoadDataInfo) initFieldMappings() []string { 188 defCausumns := make([]string, 0, len(e.DeferredCausetsAndUserVars)+len(e.DeferredCausetAssignments)) 189 blockDefCauss := e.Block.DefCauss() 190 191 if len(e.DeferredCausetsAndUserVars) == 0 { 192 for _, v := range blockDefCauss { 193 fieldMapping := &FieldMapping{ 194 DeferredCauset: v, 195 } 196 e.FieldMappings = append(e.FieldMappings, fieldMapping) 197 defCausumns = append(defCausumns, v.Name.O) 198 } 199 200 return defCausumns 201 } 202 203 var defCausumn *causet.DeferredCauset 204 205 for _, v := range e.DeferredCausetsAndUserVars { 206 if v.DeferredCausetName != nil { 207 defCausumn = causet.FindDefCaus(blockDefCauss, v.DeferredCausetName.Name.O) 208 defCausumns = append(defCausumns, v.DeferredCausetName.Name.O) 209 } else { 210 defCausumn = nil 211 } 212 213 fieldMapping := &FieldMapping{ 214 DeferredCauset: defCausumn, 215 UserVar: v.UserVar, 216 } 217 e.FieldMappings = append(e.FieldMappings, fieldMapping) 218 } 219 220 return defCausumns 221 } 222 223 // GetEvents getter for rows 224 func (e *LoadDataInfo) GetEvents() [][]types.Causet { 225 return e.rows 226 } 227 228 // GetCurBatchCnt getter for curBatchCnt 229 func (e *LoadDataInfo) GetCurBatchCnt() uint64 { 230 return e.curBatchCnt 231 } 232 233 // CloseTaskQueue preparing routine to inform commit routine no more data 234 func (e *LoadDataInfo) CloseTaskQueue() { 235 close(e.commitTaskQueue) 236 } 237 238 // InitQueues initialize task queue and error report queue 239 func (e *LoadDataInfo) InitQueues() { 240 e.commitTaskQueue = make(chan CommitTask, taskQueueSize) 241 e.StopCh = make(chan struct{}, 2) 242 e.QuitCh = make(chan struct{}) 243 } 244 245 // StartStopWatcher monitor StopCh to force quit 246 func (e *LoadDataInfo) StartStopWatcher() { 247 go func() { 248 <-e.StopCh 249 close(e.QuitCh) 250 }() 251 } 252 253 // ForceQuit let commit quit directly 254 func (e *LoadDataInfo) ForceQuit() { 255 e.StopCh <- struct{}{} 256 } 257 258 // MakeCommitTask produce commit task with data in LoadDataInfo.rows LoadDataInfo.curBatchCnt 259 func (e *LoadDataInfo) MakeCommitTask() CommitTask { 260 return CommitTask{e.curBatchCnt, e.rows} 261 } 262 263 // EnqOneTask feed one batch commit task to commit work 264 func (e *LoadDataInfo) EnqOneTask(ctx context.Context) error { 265 var err error 266 if e.curBatchCnt > 0 { 267 sendOk := false 268 for !sendOk { 269 select { 270 case e.commitTaskQueue <- e.MakeCommitTask(): 271 sendOk = true 272 case <-e.QuitCh: 273 err = errors.New("EnqOneTask forced to quit") 274 logutil.Logger(ctx).Error("EnqOneTask forced to quit, possible commitWork error") 275 return err 276 } 277 } 278 // reset rows buffer, will reallocate buffer but NOT reuse 279 e.SetMaxEventsInBatch(e.maxEventsInBatch) 280 } 281 return err 282 } 283 284 // CommitOneTask insert Data from LoadDataInfo.rows, then make commit and refresh txn 285 func (e *LoadDataInfo) CommitOneTask(ctx context.Context, task CommitTask) error { 286 var err error 287 defer func() { 288 if err != nil { 289 e.Ctx.StmtRollback() 290 } 291 }() 292 err = e.CheckAndInsertOneBatch(ctx, task.rows, task.cnt) 293 if err != nil { 294 logutil.Logger(ctx).Error("commit error CheckAndInsert", zap.Error(err)) 295 return err 296 } 297 failpoint.Inject("commitOneTaskErr", func() error { 298 return errors.New("mock commit one task error") 299 }) 300 e.Ctx.StmtCommit() 301 // Make sure that there are no retries when committing. 302 if err = e.Ctx.RefreshTxnCtx(ctx); err != nil { 303 logutil.Logger(ctx).Error("commit error refresh", zap.Error(err)) 304 return err 305 } 306 return err 307 } 308 309 // CommitWork commit batch sequentially 310 func (e *LoadDataInfo) CommitWork(ctx context.Context) error { 311 var err error 312 defer func() { 313 r := recover() 314 if r != nil { 315 logutil.Logger(ctx).Error("CommitWork panicked", 316 zap.Reflect("r", r), 317 zap.Stack("stack")) 318 } 319 if err != nil || r != nil { 320 e.ForceQuit() 321 } 322 if err != nil { 323 e.ctx.StmtRollback() 324 } 325 }() 326 var tasks uint64 327 var end = false 328 for !end { 329 select { 330 case <-e.QuitCh: 331 err = errors.New("commit forced to quit") 332 logutil.Logger(ctx).Error("commit forced to quit, possible preparation failed") 333 return err 334 case commitTask, ok := <-e.commitTaskQueue: 335 if ok { 336 start := time.Now() 337 err = e.CommitOneTask(ctx, commitTask) 338 if err != nil { 339 break 340 } 341 tasks++ 342 logutil.Logger(ctx).Info("commit one task success", 343 zap.Duration("commit time usage", time.Since(start)), 344 zap.Uint64("keys processed", commitTask.cnt), 345 zap.Uint64("tasks processed", tasks), 346 zap.Int("tasks in queue", len(e.commitTaskQueue))) 347 } else { 348 end = true 349 } 350 } 351 if err != nil { 352 logutil.Logger(ctx).Error("load data commit work error", zap.Error(err)) 353 break 354 } 355 if atomic.CompareAndSwapUint32(&e.Ctx.GetStochastikVars().Killed, 1, 0) { 356 logutil.Logger(ctx).Info("load data query interrupted quit data processing") 357 err = ErrQueryInterrupted 358 break 359 } 360 } 361 return err 362 } 363 364 // SetMaxEventsInBatch sets the max number of rows to insert in a batch. 365 func (e *LoadDataInfo) SetMaxEventsInBatch(limit uint64) { 366 e.maxEventsInBatch = limit 367 e.rows = make([][]types.Causet, 0, limit) 368 e.curBatchCnt = 0 369 } 370 371 // getValidData returns prevData and curData that starts from starting symbol. 372 // If the data doesn't have starting symbol, prevData is nil and curData is curData[len(curData)-startingLen+1:]. 373 // If curData size less than startingLen, curData is returned directly. 374 func (e *LoadDataInfo) getValidData(prevData, curData []byte) ([]byte, []byte) { 375 startingLen := len(e.LinesInfo.Starting) 376 if startingLen == 0 { 377 return prevData, curData 378 } 379 380 prevLen := len(prevData) 381 if prevLen > 0 { 382 // starting symbol in the prevData 383 idx := strings.Index(string(replog.String(prevData)), e.LinesInfo.Starting) 384 if idx != -1 { 385 return prevData[idx:], curData 386 } 387 388 // starting symbol in the midbse of prevData and curData 389 restStart := curData 390 if len(curData) >= startingLen { 391 restStart = curData[:startingLen-1] 392 } 393 prevData = append(prevData, restStart...) 394 idx = strings.Index(string(replog.String(prevData)), e.LinesInfo.Starting) 395 if idx != -1 { 396 return prevData[idx:prevLen], curData 397 } 398 } 399 400 // starting symbol in the curData 401 idx := strings.Index(string(replog.String(curData)), e.LinesInfo.Starting) 402 if idx != -1 { 403 return nil, curData[idx:] 404 } 405 406 // no starting symbol 407 if len(curData) >= startingLen { 408 curData = curData[len(curData)-startingLen+1:] 409 } 410 return nil, curData 411 } 412 413 // getLine returns a line, curData, the next data start index and a bool value. 414 // If it has starting symbol the bool is true, otherwise is false. 415 func (e *LoadDataInfo) getLine(prevData, curData []byte) ([]byte, []byte, bool) { 416 startingLen := len(e.LinesInfo.Starting) 417 prevData, curData = e.getValidData(prevData, curData) 418 if prevData == nil && len(curData) < startingLen { 419 return nil, curData, false 420 } 421 422 prevLen := len(prevData) 423 terminatedLen := len(e.LinesInfo.Terminated) 424 curStartIdx := 0 425 if prevLen < startingLen { 426 curStartIdx = startingLen - prevLen 427 } 428 endIdx := -1 429 if len(curData) >= curStartIdx { 430 endIdx = strings.Index(string(replog.String(curData[curStartIdx:])), e.LinesInfo.Terminated) 431 } 432 if endIdx == -1 { 433 // no terminated symbol 434 if len(prevData) == 0 { 435 return nil, curData, true 436 } 437 438 // terminated symbol in the midbse of prevData and curData 439 curData = append(prevData, curData...) 440 endIdx = strings.Index(string(replog.String(curData[startingLen:])), e.LinesInfo.Terminated) 441 if endIdx != -1 { 442 nextDataIdx := startingLen + endIdx + terminatedLen 443 return curData[startingLen : startingLen+endIdx], curData[nextDataIdx:], true 444 } 445 // no terminated symbol 446 return nil, curData, true 447 } 448 449 // terminated symbol in the curData 450 nextDataIdx := curStartIdx + endIdx + terminatedLen 451 if len(prevData) == 0 { 452 return curData[curStartIdx : curStartIdx+endIdx], curData[nextDataIdx:], true 453 } 454 455 // terminated symbol in the curData 456 prevData = append(prevData, curData[:nextDataIdx]...) 457 endIdx = strings.Index(string(replog.String(prevData[startingLen:])), e.LinesInfo.Terminated) 458 if endIdx >= prevLen { 459 return prevData[startingLen : startingLen+endIdx], curData[nextDataIdx:], true 460 } 461 462 // terminated symbol in the midbse of prevData and curData 463 lineLen := startingLen + endIdx + terminatedLen 464 return prevData[startingLen : startingLen+endIdx], curData[lineLen-prevLen:], true 465 } 466 467 // InsertData inserts data into specified causet according to the specified format. 468 // If it has the rest of data isn't completed the processing, then it returns without completed data. 469 // If the number of inserted rows reaches the batchEvents, then the second return value is true. 470 // If prevData isn't nil and curData is nil, there are no other data to deal with and the isEOF is true. 471 func (e *LoadDataInfo) InsertData(ctx context.Context, prevData, curData []byte) ([]byte, bool, error) { 472 if len(prevData) == 0 && len(curData) == 0 { 473 return nil, false, nil 474 } 475 var line []byte 476 var isEOF, hasStarting, reachLimit bool 477 if len(prevData) > 0 && len(curData) == 0 { 478 isEOF = true 479 prevData, curData = curData, prevData 480 } 481 for len(curData) > 0 { 482 line, curData, hasStarting = e.getLine(prevData, curData) 483 prevData = nil 484 // If it doesn't find the terminated symbol and this data isn't the last data, 485 // the data can't be inserted. 486 if line == nil && !isEOF { 487 break 488 } 489 // If doesn't find starting symbol, this data can't be inserted. 490 if !hasStarting { 491 if isEOF { 492 curData = nil 493 } 494 break 495 } 496 if line == nil && isEOF { 497 line = curData[len(e.LinesInfo.Starting):] 498 curData = nil 499 } 500 501 if e.IgnoreLines > 0 { 502 e.IgnoreLines-- 503 continue 504 } 505 defcaus, err := e.getFieldsFromLine(line) 506 if err != nil { 507 return nil, false, err 508 } 509 // rowCount will be used in fillEvent(), last insert ID will be assigned according to the rowCount = 1. 510 // So should add first here. 511 e.rowCount++ 512 e.rows = append(e.rows, e.defcausToEvent(ctx, defcaus)) 513 e.curBatchCnt++ 514 if e.maxEventsInBatch != 0 && e.rowCount%e.maxEventsInBatch == 0 { 515 reachLimit = true 516 logutil.Logger(ctx).Info("batch limit hit when inserting rows", zap.Int("maxBatchEvents", e.maxChunkSize), 517 zap.Uint64("totalEvents", e.rowCount)) 518 break 519 } 520 } 521 return curData, reachLimit, nil 522 } 523 524 // CheckAndInsertOneBatch is used to commit one transaction batch full filled data 525 func (e *LoadDataInfo) CheckAndInsertOneBatch(ctx context.Context, rows [][]types.Causet, cnt uint64) error { 526 var err error 527 if cnt == 0 { 528 return err 529 } 530 e.ctx.GetStochastikVars().StmtCtx.AddRecordEvents(cnt) 531 err = e.batchCheckAndInsert(ctx, rows[0:cnt], e.addRecordLD) 532 if err != nil { 533 return err 534 } 535 return err 536 } 537 538 // SetMessage sets info message(ERR_LOAD_INFO) generated by LOAD memex, it is public because of the special way that 539 // LOAD memex is handled. 540 func (e *LoadDataInfo) SetMessage() { 541 stmtCtx := e.ctx.GetStochastikVars().StmtCtx 542 numRecords := stmtCtx.RecordEvents() 543 numDeletes := 0 544 numSkipped := numRecords - stmtCtx.CopiedEvents() 545 numWarnings := stmtCtx.WarningCount() 546 msg := fmt.Sprintf(allegrosql.MyALLEGROSQLErrName[allegrosql.ErrLoadInfo], numRecords, numDeletes, numSkipped, numWarnings) 547 e.ctx.GetStochastikVars().StmtCtx.SetMessage(msg) 548 } 549 550 func (e *LoadDataInfo) defcausToEvent(ctx context.Context, defcaus []field) []types.Causet { 551 event := make([]types.Causet, 0, len(e.insertDeferredCausets)) 552 553 for i := 0; i < len(e.FieldMappings); i++ { 554 if i >= len(defcaus) { 555 if e.FieldMappings[i].DeferredCauset == nil { 556 stochastikVars := e.Ctx.GetStochastikVars() 557 stochastikVars.SetUserVar(e.FieldMappings[i].UserVar.Name, "", allegrosql.DefaultDefCauslationName) 558 continue 559 } 560 561 // If some defCausumns is missing and their type is time and has not null flag, they should be set as current time. 562 if types.IsTypeTime(e.FieldMappings[i].DeferredCauset.Tp) && allegrosql.HasNotNullFlag(e.FieldMappings[i].DeferredCauset.Flag) { 563 event = append(event, types.NewTimeCauset(types.CurrentTime(e.FieldMappings[i].DeferredCauset.Tp))) 564 continue 565 } 566 567 event = append(event, types.NewCauset(nil)) 568 continue 569 } 570 571 if e.FieldMappings[i].DeferredCauset == nil { 572 stochastikVars := e.Ctx.GetStochastikVars() 573 stochastikVars.SetUserVar(e.FieldMappings[i].UserVar.Name, string(defcaus[i].str), allegrosql.DefaultDefCauslationName) 574 continue 575 } 576 577 // The field with only "\N" in it is handled as NULL in the csv file. 578 // See http://dev.allegrosql.com/doc/refman/5.7/en/load-data.html 579 if defcaus[i].maybeNull && string(defcaus[i].str) == "N" { 580 event = append(event, types.NewCauset(nil)) 581 continue 582 } 583 584 event = append(event, types.NewCauset(string(defcaus[i].str))) 585 } 586 for i := 0; i < len(e.DeferredCausetAssignments); i++ { 587 // eval memex of `SET` clause 588 d, err := memex.EvalAstExpr(e.Ctx, e.DeferredCausetAssignments[i].Expr) 589 if err != nil { 590 e.handleWarning(err) 591 return nil 592 } 593 event = append(event, d) 594 } 595 596 // a new event buffer will be allocated in getEvent 597 newEvent, err := e.getEvent(ctx, event) 598 if err != nil { 599 e.handleWarning(err) 600 return nil 601 } 602 603 return newEvent 604 } 605 606 func (e *LoadDataInfo) addRecordLD(ctx context.Context, event []types.Causet) error { 607 if event == nil { 608 return nil 609 } 610 err := e.addRecord(ctx, event) 611 if err != nil { 612 e.handleWarning(err) 613 return err 614 } 615 return nil 616 } 617 618 type field struct { 619 str []byte 620 maybeNull bool 621 enclosed bool 622 } 623 624 type fieldWriter struct { 625 pos int 626 ReadBuf []byte 627 OutputBuf []byte 628 term string 629 enclosedChar byte 630 fieldTermChar byte 631 isEnclosed bool 632 isLineStart bool 633 isFieldStart bool 634 } 635 636 func (w *fieldWriter) Init(enclosedChar byte, fieldTermChar byte, readBuf []byte, term string) { 637 w.isEnclosed = false 638 w.isLineStart = true 639 w.isFieldStart = true 640 w.ReadBuf = readBuf 641 w.enclosedChar = enclosedChar 642 w.fieldTermChar = fieldTermChar 643 w.term = term 644 } 645 646 func (w *fieldWriter) putback() { 647 w.pos-- 648 } 649 650 func (w *fieldWriter) getChar() (bool, byte) { 651 if w.pos < len(w.ReadBuf) { 652 ret := w.ReadBuf[w.pos] 653 w.pos++ 654 return true, ret 655 } 656 return false, 0 657 } 658 659 func (w *fieldWriter) isTerminator() bool { 660 chkpt, isterm := w.pos, true 661 for i := 1; i < len(w.term); i++ { 662 flag, ch := w.getChar() 663 if !flag || ch != w.term[i] { 664 isterm = false 665 break 666 } 667 } 668 if !isterm { 669 w.pos = chkpt 670 return false 671 } 672 return true 673 } 674 675 func (w *fieldWriter) outputField(enclosed bool) field { 676 var fild []byte 677 start := 0 678 if enclosed { 679 start = 1 680 } 681 for i := start; i < len(w.OutputBuf); i++ { 682 fild = append(fild, w.OutputBuf[i]) 683 } 684 if len(fild) == 0 { 685 fild = []byte("") 686 } 687 w.OutputBuf = w.OutputBuf[0:0] 688 w.isEnclosed = false 689 w.isFieldStart = true 690 return field{fild, false, enclosed} 691 } 692 693 func (w *fieldWriter) GetField() (bool, field) { 694 // The first return value implies whether fieldWriter read the last character of line. 695 if w.isLineStart { 696 _, ch := w.getChar() 697 if ch == w.enclosedChar { 698 w.isEnclosed = true 699 w.isFieldStart, w.isLineStart = false, false 700 w.OutputBuf = append(w.OutputBuf, ch) 701 } else { 702 w.putback() 703 } 704 } 705 for { 706 flag, ch := w.getChar() 707 if !flag { 708 ret := w.outputField(false) 709 return true, ret 710 } 711 if ch == w.enclosedChar && w.isFieldStart { 712 // If read enclosed char at field start. 713 w.isEnclosed = true 714 w.OutputBuf = append(w.OutputBuf, ch) 715 w.isLineStart, w.isFieldStart = false, false 716 continue 717 } 718 w.isLineStart, w.isFieldStart = false, false 719 if ch == w.fieldTermChar && !w.isEnclosed { 720 // If read filed terminate char. 721 if w.isTerminator() { 722 ret := w.outputField(false) 723 return false, ret 724 } 725 w.OutputBuf = append(w.OutputBuf, ch) 726 } else if ch == w.enclosedChar && w.isEnclosed { 727 // If read enclosed char, look ahead. 728 flag, ch = w.getChar() 729 if !flag { 730 ret := w.outputField(true) 731 return true, ret 732 } else if ch == w.enclosedChar { 733 w.OutputBuf = append(w.OutputBuf, ch) 734 continue 735 } else if ch == w.fieldTermChar { 736 // If the next char is fieldTermChar, look ahead. 737 if w.isTerminator() { 738 ret := w.outputField(true) 739 return false, ret 740 } 741 w.OutputBuf = append(w.OutputBuf, ch) 742 } else { 743 // If there is no terminator behind enclosedChar, put the char back. 744 w.OutputBuf = append(w.OutputBuf, w.enclosedChar) 745 w.putback() 746 } 747 } else if ch == '\\' { 748 // TODO: escape only support '\' 749 // When the escaped character is interpreted as if 750 // it was not escaped, backslash is ignored. 751 flag, ch = w.getChar() 752 if flag { 753 w.OutputBuf = append(w.OutputBuf, '\\') 754 w.OutputBuf = append(w.OutputBuf, ch) 755 } 756 } else { 757 w.OutputBuf = append(w.OutputBuf, ch) 758 } 759 } 760 } 761 762 // getFieldsFromLine splits line according to fieldsInfo. 763 func (e *LoadDataInfo) getFieldsFromLine(line []byte) ([]field, error) { 764 var ( 765 reader fieldWriter 766 fields []field 767 ) 768 769 if len(line) == 0 { 770 str := []byte("") 771 fields = append(fields, field{str, false, false}) 772 return fields, nil 773 } 774 775 reader.Init(e.FieldsInfo.Enclosed, e.FieldsInfo.Terminated[0], line, e.FieldsInfo.Terminated) 776 for { 777 eol, f := reader.GetField() 778 f = f.escape() 779 if bytes.Equal(f.str, null) && !f.enclosed { 780 f.str = []byte{'N'} 781 f.maybeNull = true 782 } 783 fields = append(fields, f) 784 if eol { 785 break 786 } 787 } 788 return fields, nil 789 } 790 791 // escape handles escape characters when running load data memex. 792 // See http://dev.allegrosql.com/doc/refman/5.7/en/load-data.html 793 // TODO: escape only support '\' as the `ESCAPED BY` character, it should support specify characters. 794 func (f *field) escape() field { 795 pos := 0 796 for i := 0; i < len(f.str); i++ { 797 c := f.str[i] 798 if i+1 < len(f.str) && f.str[i] == '\\' { 799 c = f.escapeChar(f.str[i+1]) 800 i++ 801 } 802 803 f.str[pos] = c 804 pos++ 805 } 806 return field{f.str[:pos], f.maybeNull, f.enclosed} 807 } 808 809 func (f *field) escapeChar(c byte) byte { 810 switch c { 811 case '0': 812 return 0 813 case 'b': 814 return '\b' 815 case 'n': 816 return '\n' 817 case 'r': 818 return '\r' 819 case 't': 820 return '\t' 821 case 'Z': 822 return 26 823 case 'N': 824 f.maybeNull = true 825 return c 826 case '\\': 827 return c 828 default: 829 return c 830 } 831 } 832 833 // loadDataVarKeyType is a dummy type to avoid naming defCauslision in context. 834 type loadDataVarKeyType int 835 836 // String defines a Stringer function for debugging and pretty printing. 837 func (k loadDataVarKeyType) String() string { 838 return "load_data_var" 839 } 840 841 // LoadDataVarKey is a variable key for load data. 842 const LoadDataVarKey loadDataVarKeyType = 0