github.com/matrixorigin/matrixone@v0.7.0/pkg/frontend/load.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package frontend 16 17 import ( 18 "context" 19 "encoding/csv" 20 "errors" 21 "math" 22 "os" 23 "runtime" 24 "strconv" 25 "strings" 26 "sync" 27 "sync/atomic" 28 "time" 29 30 "github.com/matrixorigin/matrixone/pkg/container/batch" 31 "github.com/matrixorigin/matrixone/pkg/logutil" 32 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine" 34 "github.com/matrixorigin/matrixone/pkg/vm/process" 35 "github.com/matrixorigin/simdcsv" 36 37 "github.com/matrixorigin/matrixone/pkg/common/moerr" 38 "github.com/matrixorigin/matrixone/pkg/container/nulls" 39 "github.com/matrixorigin/matrixone/pkg/container/types" 40 "github.com/matrixorigin/matrixone/pkg/container/vector" 41 ) 42 43 type LoadResult struct { 44 Records, Deleted, Skipped, Warnings, WriteTimeout uint64 45 } 46 47 type DebugTime struct { 48 row2col time.Duration 49 fillBlank time.Duration 50 toStorage time.Duration 51 52 writeBatch time.Duration 53 resetBatch time.Duration 54 55 // prefix time.Duration 56 // skip_bytes time.Duration 57 58 // process_field time.Duration 59 // split_field time.Duration 60 // split_before_loop time.Duration 61 // wait_loop time.Duration 62 // handler_get time.Duration 63 // wait_switch time.Duration 64 // field_first_byte time.Duration 65 // field_enclosed time.Duration 66 // field_without time.Duration 67 // field_skip_bytes time.Duration 68 69 callback time.Duration 70 asyncChan atomic.Value // time.Duration 71 csvLineArray1 atomic.Value // time.Duration 72 csvLineArray2 time.Duration 73 asyncChanLoop atomic.Value // time.Duration 74 saveParsedLine time.Duration 75 choose_true time.Duration 76 choose_false time.Duration 77 } 78 79 type SharePart struct { 80 //load reference 81 load *tree.Import 82 //how to handle errors during converting field 83 ignoreFieldError bool 84 85 //index of line in line array 86 lineIdx int 87 maxFieldCnt int 88 bytes uint64 89 90 lineCount uint64 91 92 //batch 93 batchSize int 94 skipWriteBatch bool 95 96 //map column id in from data to column id in table 97 dataColumnId2TableColumnId []int 98 99 cols []*engine.AttributeDef 100 attrName []string 101 timestamp uint64 102 103 //simd csv 104 simdCsvLineArray [][]string 105 106 //storage 107 storage engine.Engine 108 dbHandler engine.Database 109 tableHandler engine.Relation 110 dbName string 111 tableName string 112 txnHandler *TxnHandler 113 oneTxnPerBatch bool 114 ses *Session 115 116 //result of load 117 result *LoadResult 118 119 loadCtx context.Context 120 } 121 122 type notifyEventType int 123 124 const ( 125 NOTIFY_EVENT_WRITE_BATCH_ERROR notifyEventType = iota 126 NOTIFY_EVENT_WRITE_BATCH_RESULT 127 NOTIFY_EVENT_READ_SIMDCSV_ERROR 128 NOTIFY_EVENT_OUTPUT_SIMDCSV_ERROR 129 NOTIFY_EVENT_END 130 ) 131 132 const NULL_FLAG = "\\N" 133 134 type notifyEvent struct { 135 neType notifyEventType 136 err error 137 wbh *WriteBatchHandler 138 } 139 140 func newNotifyEvent(t notifyEventType, e error, w *WriteBatchHandler) *notifyEvent { 141 return ¬ifyEvent{ 142 neType: t, 143 err: e, 144 wbh: w, 145 } 146 } 147 148 type PoolElement struct { 149 id int 150 bat *batch.Batch 151 lineArray [][]string 152 } 153 154 type ThreadInfo struct { 155 threadCnt int32 156 startTime atomic.Value 157 } 158 159 func (t *ThreadInfo) SetTime(tmp time.Time) { 160 t.startTime.Store(tmp) 161 } 162 163 func (t *ThreadInfo) GetTime() (val interface{}) { 164 return t.startTime.Load() 165 } 166 167 func (t *ThreadInfo) SetCnt(id int32) { 168 atomic.StoreInt32(&t.threadCnt, id) 169 } 170 171 func (t *ThreadInfo) GetCnt() int32 { 172 return atomic.LoadInt32(&t.threadCnt) 173 } 174 175 type ParseLineHandler struct { 176 SharePart 177 DebugTime 178 179 threadInfo map[int]*ThreadInfo 180 simdCsvReader *simdcsv.Reader 181 //the count of writing routine 182 simdCsvConcurrencyCountOfWriteBatch int 183 //wait write routines to quit 184 simdCsvWaitWriteRoutineToQuit *sync.WaitGroup 185 simdCsvBatchPool chan *PoolElement 186 simdCsvNotiyEventChan chan *notifyEvent 187 closeOnce sync.Once 188 proc *process.Process 189 } 190 191 type WriteBatchHandler struct { 192 SharePart 193 DebugTime 194 *ThreadInfo 195 196 batchData *batch.Batch 197 pl *PoolElement 198 batchFilled int 199 simdCsvErr error 200 } 201 202 func (plh *ParseLineHandler) getLineOutCallback(lineOut simdcsv.LineOut) error { 203 wait_a := time.Now() 204 defer func() { 205 AtomicAddDuration(plh.asyncChan, time.Since(wait_a)) 206 }() 207 208 wait_d := time.Now() 209 if lineOut.Line == nil && lineOut.Lines == nil { 210 return nil 211 } 212 if lineOut.Line != nil { 213 //step 1 : skip dropped lines 214 if plh.lineCount < plh.load.Param.Tail.IgnoredLines { 215 plh.lineCount++ 216 return nil 217 } 218 219 wait_b := time.Now() 220 221 //step 2 : append line into line array 222 plh.simdCsvLineArray[plh.lineIdx] = lineOut.Line 223 plh.lineIdx++ 224 plh.lineCount++ 225 plh.maxFieldCnt = Max(plh.maxFieldCnt, len(lineOut.Line)) 226 227 AtomicAddDuration(plh.csvLineArray1, time.Since(wait_b)) 228 229 if plh.lineIdx == plh.batchSize { 230 //logutil.Infof("+++++ batch bytes %v B %v MB",plh.bytes,plh.bytes / 1024.0 / 1024.0) 231 err := saveLinesToStorage(plh, false) 232 if err != nil { 233 return err 234 } 235 236 plh.lineIdx = 0 237 plh.maxFieldCnt = 0 238 plh.bytes = 0 239 } 240 } 241 AtomicAddDuration(plh.asyncChanLoop, time.Since(wait_d)) 242 243 return nil 244 } 245 246 func AtomicAddDuration(v atomic.Value, t interface{}) { 247 var ti time.Duration = 0 248 switch t := t.(type) { 249 case time.Duration: 250 ti = t 251 case atomic.Value: 252 tx := t 253 if tx.Load() != nil { 254 ti = tx.Load().(time.Duration) 255 } 256 } 257 if v.Load() == nil { 258 v.Store(time.Duration(0) + ti) 259 } else { 260 v.Store(v.Load().(time.Duration) + ti) 261 } 262 } 263 264 func (plh *ParseLineHandler) close() { 265 //plh.closeOnceGetParsedLinesChan.Do(func() { 266 // close(getLineOutChan(plh.simdCsvGetParsedLinesChan)) 267 //}) 268 plh.closeOnce.Do(func() { 269 close(plh.simdCsvBatchPool) 270 close(plh.simdCsvNotiyEventChan) 271 plh.simdCsvReader.Close() 272 }) 273 } 274 275 /* 276 alloc space for the batch 277 */ 278 func makeBatch(handler *ParseLineHandler, proc *process.Process, id int) *PoolElement { 279 batchData := batch.New(true, handler.attrName) 280 281 //logutil.Infof("----- batchSize %d attrName %v",batchSize,handler.attrName) 282 283 batchSize := handler.batchSize 284 285 //alloc space for vector 286 for i := 0; i < len(handler.attrName); i++ { 287 // XXX memory alloc, where is the proc.Mp? 288 vec := vector.NewOriginal(handler.cols[i].Attr.Type) 289 vector.PreAlloc(vec, batchSize, batchSize, proc.Mp()) 290 291 //vec := vector.PreAllocType(handler.cols[i].Attr.Type, batchSize, batchSize, proc.Mp()) 292 batchData.Vecs[i] = vec 293 } 294 295 return &PoolElement{ 296 id: id, 297 bat: batchData, 298 lineArray: make([][]string, handler.batchSize), 299 } 300 } 301 302 /* 303 Init ParseLineHandler 304 */ 305 func initParseLineHandler(requestCtx context.Context, proc *process.Process, handler *ParseLineHandler) error { 306 relation := handler.tableHandler 307 load := handler.load 308 309 var cols []*engine.AttributeDef = nil 310 defs, err := relation.TableDefs(requestCtx) 311 if err != nil { 312 return err 313 } 314 for _, def := range defs { 315 attr, ok := def.(*engine.AttributeDef) 316 if ok { 317 cols = append(cols, attr) 318 } 319 } 320 321 attrName := make([]string, len(cols)) 322 tableName2ColumnId := make(map[string]int) 323 for i, col := range cols { 324 attrName[i] = col.Attr.Name 325 tableName2ColumnId[col.Attr.Name] = i 326 } 327 328 handler.cols = cols 329 handler.attrName = attrName 330 331 //define the peer column for LOAD DATA's column list. 332 var dataColumnId2TableColumnId []int 333 if len(load.Param.Tail.ColumnList) == 0 { 334 dataColumnId2TableColumnId = make([]int, len(cols)) 335 for i := 0; i < len(cols); i++ { 336 dataColumnId2TableColumnId[i] = i 337 } 338 } else { 339 dataColumnId2TableColumnId = make([]int, len(load.Param.Tail.ColumnList)) 340 for i, col := range load.Param.Tail.ColumnList { 341 switch realCol := col.(type) { 342 case *tree.UnresolvedName: 343 tid, ok := tableName2ColumnId[realCol.Parts[0]] 344 if !ok { 345 return moerr.NewInternalError(proc.Ctx, "no such column %s", realCol.Parts[0]) 346 } 347 dataColumnId2TableColumnId[i] = tid 348 case *tree.VarExpr: 349 //NOTE:variable like '@abc' will be passed by. 350 dataColumnId2TableColumnId[i] = -1 351 default: 352 return moerr.NewInternalError(proc.Ctx, "unsupported column type %v", realCol) 353 } 354 } 355 } 356 handler.dataColumnId2TableColumnId = dataColumnId2TableColumnId 357 358 //allocate batch 359 for j := 0; j < cap(handler.simdCsvBatchPool); j++ { 360 batchData := makeBatch(handler, proc, j) 361 handler.simdCsvBatchPool <- batchData 362 } 363 return nil 364 } 365 366 /* 367 alloc a batch from the pool. 368 if the pool does not have batch anymore, the caller routine will be suspended. 369 */ 370 func allocBatch(handler *ParseLineHandler) *PoolElement { 371 batchData := <-handler.simdCsvBatchPool 372 return batchData 373 } 374 375 /* 376 return a batch into the pool 377 */ 378 func releaseBatch(handler *ParseLineHandler, pl *PoolElement) { 379 //clear batch 380 //clear vector.nulls.Nulls 381 for _, vec := range pl.bat.Vecs { 382 vec.Nsp = &nulls.Nulls{} 383 // XXX old code special handle varlen types. Now it is no op. 384 } 385 handler.simdCsvBatchPool <- pl 386 } 387 388 /* 389 * 390 it may be suspended, when the pool does not have enough batch 391 */ 392 func initWriteBatchHandler(handler *ParseLineHandler, wHandler *WriteBatchHandler) error { 393 wHandler.ignoreFieldError = handler.ignoreFieldError 394 wHandler.cols = handler.cols 395 wHandler.dataColumnId2TableColumnId = handler.dataColumnId2TableColumnId 396 wHandler.batchSize = handler.batchSize 397 wHandler.attrName = handler.attrName 398 wHandler.storage = handler.storage 399 wHandler.dbName = handler.dbName 400 wHandler.dbHandler = handler.dbHandler 401 wHandler.tableHandler = handler.tableHandler 402 wHandler.tableName = handler.tableName 403 wHandler.txnHandler = handler.txnHandler 404 wHandler.ses = handler.ses 405 wHandler.oneTxnPerBatch = handler.oneTxnPerBatch 406 wHandler.timestamp = handler.timestamp 407 wHandler.result = &LoadResult{} 408 wHandler.lineCount = handler.lineCount 409 wHandler.skipWriteBatch = handler.skipWriteBatch 410 wHandler.loadCtx = handler.loadCtx 411 412 wHandler.pl = allocBatch(handler) 413 wHandler.ThreadInfo = handler.threadInfo[wHandler.pl.id] 414 wHandler.simdCsvLineArray = wHandler.pl.lineArray 415 for i := 0; i < handler.lineIdx; i++ { 416 wHandler.simdCsvLineArray[i] = handler.simdCsvLineArray[i] 417 } 418 419 wHandler.batchData = wHandler.pl.bat 420 return nil 421 } 422 423 func collectWriteBatchResult(handler *ParseLineHandler, wh *WriteBatchHandler, err error) { 424 //logutil.Infof("++++> %d %d %d %d", 425 // wh.result.Skipped, 426 // wh.result.Deleted, 427 // wh.result.Warnings, 428 // wh.result.Records, 429 //) 430 if wh == nil { 431 return 432 } 433 434 handler.result.Skipped += wh.result.Skipped 435 handler.result.Deleted += wh.result.Deleted 436 handler.result.Warnings += wh.result.Warnings 437 handler.result.Records += wh.result.Records 438 handler.result.WriteTimeout += wh.result.WriteTimeout 439 // 440 handler.row2col += wh.row2col 441 handler.fillBlank += wh.fillBlank 442 handler.toStorage += wh.toStorage 443 444 handler.writeBatch += wh.writeBatch 445 handler.resetBatch += wh.resetBatch 446 447 // 448 handler.callback += wh.callback 449 AtomicAddDuration(handler.asyncChan, wh.asyncChan) 450 AtomicAddDuration(handler.asyncChanLoop, wh.asyncChanLoop) 451 AtomicAddDuration(handler.csvLineArray1, wh.csvLineArray1) 452 handler.csvLineArray2 += wh.csvLineArray2 453 handler.saveParsedLine += wh.saveParsedLine 454 handler.choose_true += wh.choose_true 455 handler.choose_false += wh.choose_false 456 457 wh.batchData = nil 458 wh.simdCsvLineArray = nil 459 wh.simdCsvErr = nil 460 } 461 462 func makeParsedFailedError(ctx context.Context, tp, field, column string, line uint64, offset int) *moerr.Error { 463 return moerr.NewDataTruncated(ctx, tp, "value '%s' for column '%s' at row '%d'", field, column, line+uint64(offset)) 464 } 465 466 func errorCanBeIgnored(err error) bool { 467 switch err.(type) { 468 case *moerr.Error, *csv.ParseError: 469 return false 470 default: 471 return true 472 } 473 } 474 475 /* 476 isWriteBatchTimeoutError returns true when the err is a write batch timeout. 477 */ 478 func isWriteBatchTimeoutError(err error) bool { 479 if err == nil { 480 return false 481 } 482 return errors.Is(err, context.DeadlineExceeded) 483 } 484 485 func judgeInterge(field string) bool { 486 for i := 0; i < len(field); i++ { 487 if field[i] > '9' || field[i] < '0' { 488 return false 489 } 490 } 491 return true 492 } 493 494 func rowToColumnAndSaveToStorage(handler *WriteBatchHandler, proc *process.Process, forceConvert bool, row2colChoose bool) error { 495 begin := time.Now() 496 defer func() { 497 handler.saveParsedLine += time.Since(begin) 498 //logutil.Infof("-----saveParsedLinesToBatchSimdCsv %s",time.Since(begin)) 499 }() 500 501 countOfLineArray := handler.lineIdx 502 503 /* 504 XXX: orig code commented out the panic, therefore this 505 branch is noop, generating a go warning. panic will 506 cause a test failure. 507 508 Comment out the whole if block to make test pass. Need 509 to fix. 510 511 if !forceConvert { 512 if countOfLineArray != handler.batchSize { 513 // logutil.Infof("---->countOfLineArray %d batchSize %d ",countOfLineArray,handler.batchSize) 514 panic("-----write a batch") 515 } 516 } 517 */ 518 519 batchData := handler.batchData 520 columnFLags := make([]byte, len(batchData.Vecs)) 521 fetchCnt := 0 522 var err error 523 allFetchCnt := 0 524 525 row2col := time.Duration(0) 526 fillBlank := time.Duration(0) 527 toStorage := time.Duration(0) 528 fetchCnt = countOfLineArray 529 //logutil.Infof("-----fetchCnt %d len(lineArray) %d",fetchCnt,len(handler.simdCsvLineArray)) 530 fetchLines := handler.simdCsvLineArray[:fetchCnt] 531 532 /* 533 row to column 534 */ 535 536 batchBegin := handler.batchFilled 537 ignoreFieldError := handler.ignoreFieldError 538 result := handler.result 539 540 //logutil.Infof("-----ignoreFieldError %v",handler.ignoreFieldError) 541 timeZone := handler.ses.GetTimeZone() 542 if row2colChoose { 543 wait_d := time.Now() 544 for i, line := range fetchLines { 545 //wait_a := time.Now() 546 rowIdx := batchBegin + i 547 offset := i + 1 548 base := handler.lineCount - uint64(fetchCnt) 549 //logutil.Infof("------ linecount %d fetchcnt %d base %d offset %d", 550 // handler.lineCount,fetchCnt,base,offset) 551 //record missing column 552 for k := 0; k < len(columnFLags); k++ { 553 columnFLags[k] = 0 554 } 555 556 for j, lineStr := range line { 557 //logutil.Infof("data col %d : %v",j,field) 558 //where will column j go ? 559 colIdx := -1 560 if j < len(handler.dataColumnId2TableColumnId) { 561 colIdx = handler.dataColumnId2TableColumnId[j] 562 } 563 //else{ 564 // //mysql warning ER_WARN_TOO_MANY_RECORDS 565 // result.Warnings++ 566 //} 567 //drop this field 568 if colIdx == -1 { 569 continue 570 } 571 572 //put it into batch 573 vec := batchData.Vecs[colIdx] 574 vecAttr := batchData.Attrs[colIdx] 575 field := strings.TrimSpace(lineStr) 576 577 id := types.T(vec.Typ.Oid) 578 if id != types.T_char && id != types.T_varchar { 579 field = strings.TrimSpace(field) 580 } 581 isNullOrEmpty := field == NULL_FLAG 582 if id != types.T_char && id != types.T_varchar && id != types.T_json && id != types.T_blob && id != types.T_text { 583 isNullOrEmpty = isNullOrEmpty || len(field) == 0 584 } 585 586 //record colIdx 587 columnFLags[colIdx] = 1 588 589 //logutil.Infof("data set col %d : %v ",j,field) 590 591 switch vec.Typ.Oid { 592 case types.T_bool: 593 cols := vector.MustTCols[bool](vec) 594 if isNullOrEmpty { 595 nulls.Add(vec.Nsp, uint64(rowIdx)) 596 } else { 597 if field == "true" || field == "1" { 598 cols[rowIdx] = true 599 } else if field == "false" || field == "0" { 600 cols[rowIdx] = false 601 } else { 602 return moerr.NewInternalError(proc.Ctx, "the input value '%s' is not bool type", field) 603 } 604 } 605 case types.T_int8: 606 cols := vector.MustTCols[int8](vec) 607 if isNullOrEmpty { 608 nulls.Add(vec.Nsp, uint64(rowIdx)) 609 } else { 610 if judgeInterge(field) { 611 d, err := strconv.ParseInt(field, 10, 8) 612 if err != nil { 613 logutil.Errorf("parse field[%v] err:%v", field, err) 614 if !ignoreFieldError { 615 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 616 } 617 result.Warnings++ 618 d = 0 619 } 620 cols[rowIdx] = int8(d) 621 } else { 622 d, err := strconv.ParseFloat(field, 64) 623 if err != nil || d < math.MinInt8 || d > math.MaxInt8 { 624 logutil.Errorf("parse field[%v] err:%v", field, err) 625 if !ignoreFieldError { 626 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 627 } 628 result.Warnings++ 629 d = 0 630 } 631 cols[rowIdx] = int8(d) 632 } 633 } 634 case types.T_int16: 635 cols := vector.MustTCols[int16](vec) 636 if isNullOrEmpty { 637 nulls.Add(vec.Nsp, uint64(rowIdx)) 638 } else { 639 if judgeInterge(field) { 640 d, err := strconv.ParseInt(field, 10, 16) 641 if err != nil { 642 logutil.Errorf("parse field[%v] err:%v", field, err) 643 if !ignoreFieldError { 644 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 645 } 646 result.Warnings++ 647 d = 0 648 } 649 cols[rowIdx] = int16(d) 650 } else { 651 d, err := strconv.ParseFloat(field, 64) 652 if err != nil || d < math.MinInt16 || d > math.MaxInt16 { 653 logutil.Errorf("parse field[%v] err:%v", field, err) 654 if !ignoreFieldError { 655 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 656 } 657 result.Warnings++ 658 d = 0 659 } 660 cols[rowIdx] = int16(d) 661 } 662 } 663 case types.T_int32: 664 cols := vector.MustTCols[int32](vec) 665 if isNullOrEmpty { 666 nulls.Add(vec.Nsp, uint64(rowIdx)) 667 } else { 668 if judgeInterge(field) { 669 d, err := strconv.ParseInt(field, 10, 32) 670 if err != nil { 671 logutil.Errorf("parse field[%v] err:%v", field, err) 672 if !ignoreFieldError { 673 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 674 } 675 result.Warnings++ 676 d = 0 677 } 678 cols[rowIdx] = int32(d) 679 } else { 680 d, err := strconv.ParseFloat(field, 64) 681 if err != nil || d < math.MinInt32 || d > math.MaxInt32 { 682 logutil.Errorf("parse field[%v] err:%v", field, err) 683 if !ignoreFieldError { 684 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 685 } 686 result.Warnings++ 687 d = 0 688 } 689 cols[rowIdx] = int32(d) 690 } 691 } 692 case types.T_int64: 693 cols := vector.MustTCols[int64](vec) 694 if isNullOrEmpty { 695 nulls.Add(vec.Nsp, uint64(rowIdx)) 696 } else { 697 if judgeInterge(field) { 698 d, err := strconv.ParseInt(field, 10, 64) 699 if err != nil { 700 logutil.Errorf("parse field[%v] err:%v", field, err) 701 if !ignoreFieldError { 702 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 703 } 704 result.Warnings++ 705 d = 0 706 } 707 cols[rowIdx] = d 708 } else { 709 d, err := strconv.ParseFloat(field, 64) 710 if err != nil || d < math.MinInt64 || d > math.MaxInt64 { 711 logutil.Errorf("parse field[%v] err:%v", field, err) 712 if !ignoreFieldError { 713 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 714 } 715 result.Warnings++ 716 d = 0 717 } 718 cols[rowIdx] = int64(d) 719 } 720 } 721 case types.T_uint8: 722 cols := vector.MustTCols[uint8](vec) 723 if isNullOrEmpty { 724 nulls.Add(vec.Nsp, uint64(rowIdx)) 725 } else { 726 if judgeInterge(field) { 727 d, err := strconv.ParseUint(field, 10, 8) 728 if err != nil { 729 logutil.Errorf("parse field[%v] err:%v", field, err) 730 if !ignoreFieldError { 731 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 732 } 733 result.Warnings++ 734 d = 0 735 } 736 cols[rowIdx] = uint8(d) 737 } else { 738 d, err := strconv.ParseFloat(field, 64) 739 if err != nil || d < 0 || d > math.MaxUint8 { 740 logutil.Errorf("parse field[%v] err:%v", field, err) 741 if !ignoreFieldError { 742 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 743 } 744 result.Warnings++ 745 d = 0 746 } 747 cols[rowIdx] = uint8(d) 748 } 749 } 750 case types.T_uint16: 751 cols := vector.MustTCols[uint16](vec) 752 if isNullOrEmpty { 753 nulls.Add(vec.Nsp, uint64(rowIdx)) 754 } else { 755 if judgeInterge(field) { 756 d, err := strconv.ParseUint(field, 10, 16) 757 if err != nil { 758 logutil.Errorf("parse field[%v] err:%v", field, err) 759 if !ignoreFieldError { 760 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 761 } 762 result.Warnings++ 763 d = 0 764 } 765 cols[rowIdx] = uint16(d) 766 } else { 767 d, err := strconv.ParseFloat(field, 64) 768 if err != nil || d < 0 || d > math.MaxUint16 { 769 logutil.Errorf("parse field[%v] err:%v", field, err) 770 if !ignoreFieldError { 771 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 772 } 773 result.Warnings++ 774 d = 0 775 } 776 cols[rowIdx] = uint16(d) 777 } 778 } 779 case types.T_uint32: 780 cols := vector.MustTCols[uint32](vec) 781 if isNullOrEmpty { 782 nulls.Add(vec.Nsp, uint64(rowIdx)) 783 } else { 784 if judgeInterge(field) { 785 d, err := strconv.ParseUint(field, 10, 32) 786 if err != nil { 787 logutil.Errorf("parse field[%v] err:%v", field, err) 788 if !ignoreFieldError { 789 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 790 } 791 result.Warnings++ 792 d = 0 793 } 794 cols[rowIdx] = uint32(d) 795 } else { 796 d, err := strconv.ParseFloat(field, 64) 797 if err != nil || d < 0 || d > math.MaxUint32 { 798 logutil.Errorf("parse field[%v] err:%v", field, err) 799 if !ignoreFieldError { 800 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 801 } 802 result.Warnings++ 803 d = 0 804 } 805 cols[rowIdx] = uint32(d) 806 } 807 } 808 case types.T_uint64: 809 cols := vector.MustTCols[uint64](vec) 810 if isNullOrEmpty { 811 nulls.Add(vec.Nsp, uint64(rowIdx)) 812 } else { 813 if judgeInterge(field) { 814 d, err := strconv.ParseUint(field, 10, 64) 815 if err != nil { 816 logutil.Errorf("parse field[%v] err:%v", field, err) 817 if !ignoreFieldError { 818 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 819 } 820 result.Warnings++ 821 d = 0 822 } 823 cols[rowIdx] = d 824 } else { 825 d, err := strconv.ParseFloat(field, 64) 826 if err != nil || d < 0 || d > math.MaxUint64 { 827 logutil.Errorf("parse field[%v] err:%v", field, err) 828 if !ignoreFieldError { 829 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 830 } 831 result.Warnings++ 832 d = 0 833 //break 834 } 835 cols[rowIdx] = uint64(d) 836 } 837 } 838 case types.T_float32: 839 cols := vector.MustTCols[float32](vec) 840 if isNullOrEmpty { 841 nulls.Add(vec.Nsp, uint64(rowIdx)) 842 } else { 843 d, err := strconv.ParseFloat(field, 32) 844 if err != nil { 845 logutil.Errorf("parse field[%v] err:%v", field, err) 846 if !ignoreFieldError { 847 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 848 } 849 result.Warnings++ 850 d = 0 851 //break 852 } 853 cols[rowIdx] = float32(d) 854 } 855 case types.T_float64: 856 cols := vector.MustTCols[float64](vec) 857 if isNullOrEmpty { 858 nulls.Add(vec.Nsp, uint64(rowIdx)) 859 } else { 860 fs := field 861 //logutil.Infof("==== > field string [%s] ",fs) 862 d, err := strconv.ParseFloat(fs, 64) 863 if err != nil { 864 logutil.Errorf("parse field[%v] err:%v", field, err) 865 if !ignoreFieldError { 866 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 867 } 868 result.Warnings++ 869 d = 0 870 //break 871 } 872 cols[rowIdx] = d 873 } 874 case types.T_char, types.T_varchar, types.T_blob, types.T_text: 875 if isNullOrEmpty { 876 nulls.Add(vec.Nsp, uint64(rowIdx)) 877 } else { 878 // XXX What about memory accounting? 879 vector.SetStringAt(vec, rowIdx, field, nil) 880 } 881 case types.T_json: 882 if isNullOrEmpty { 883 nulls.Add(vec.Nsp, uint64(rowIdx)) 884 } else { 885 json, err := types.ParseStringToByteJson(field) 886 if err != nil { 887 return moerr.NewInvalidInput(proc.Ctx, "Invalid %s text: '%s' for column '%s' at row '%d'", vec.Typ.String(), field, vecAttr, base+uint64(offset)) 888 } 889 jsonBytes, err := types.EncodeJson(json) 890 if err != nil { 891 return moerr.NewInvalidInput(proc.Ctx, "Invalid %s text: '%s' for column '%s' at row '%d'", vec.Typ.String(), field, vecAttr, base+uint64(offset)) 892 } 893 // XXX What about memory accounting? 894 vector.SetBytesAt(vec, rowIdx, jsonBytes, nil) 895 } 896 case types.T_date: 897 cols := vector.MustTCols[types.Date](vec) 898 if isNullOrEmpty { 899 nulls.Add(vec.Nsp, uint64(rowIdx)) 900 } else { 901 fs := field 902 //logutil.Infof("==== > field string [%s] ",fs) 903 d, err := types.ParseDateCast(fs) 904 if err != nil { 905 logutil.Errorf("parse field[%v] err:%v", field, err) 906 if !ignoreFieldError { 907 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 908 } 909 result.Warnings++ 910 d = 0 911 //break 912 } 913 cols[rowIdx] = d 914 } 915 case types.T_time: 916 cols := vector.MustTCols[types.Time](vec) 917 if isNullOrEmpty { 918 nulls.Add(vec.Nsp, uint64(rowIdx)) 919 } else { 920 fs := field 921 d, err := types.ParseTime(fs, vec.Typ.Precision) 922 if err != nil { 923 logutil.Errorf("parse field[%v] err:%v", field, err) 924 if !ignoreFieldError { 925 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 926 } 927 result.Warnings++ 928 d = 0 929 } 930 cols[rowIdx] = d 931 } 932 case types.T_datetime: 933 cols := vector.MustTCols[types.Datetime](vec) 934 if isNullOrEmpty { 935 nulls.Add(vec.Nsp, uint64(rowIdx)) 936 } else { 937 fs := field 938 d, err := types.ParseDatetime(fs, vec.Typ.Precision) 939 if err != nil { 940 logutil.Errorf("parse field[%v] err:%v", field, err) 941 if !ignoreFieldError { 942 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 943 } 944 result.Warnings++ 945 d = 0 946 } 947 cols[rowIdx] = d 948 } 949 case types.T_decimal64: 950 cols := vector.MustTCols[types.Decimal64](vec) 951 if isNullOrEmpty { 952 nulls.Add(vec.Nsp, uint64(rowIdx)) 953 } else { 954 d, err := types.Decimal64_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Scale) 955 if err != nil { 956 // we tolerate loss of digits. 957 if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) { 958 logutil.Errorf("parse field[%v] err:%v", field, err) 959 if !ignoreFieldError { 960 return moerr.NewInternalError(proc.Ctx, "the input value '%v' is invalid Decimal64 type for column %d", field, colIdx) 961 } 962 result.Warnings++ 963 d = types.Decimal64_Zero 964 } 965 } 966 cols[rowIdx] = d 967 } 968 case types.T_decimal128: 969 cols := vector.MustTCols[types.Decimal128](vec) 970 if isNullOrEmpty { 971 nulls.Add(vec.Nsp, uint64(rowIdx)) 972 } else { 973 d, err := types.Decimal128_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Scale) 974 if err != nil { 975 // we tolerate loss of digits. 976 if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) { 977 logutil.Errorf("parse field[%v] err:%v", field, err) 978 if !ignoreFieldError { 979 // XXX recreate another moerr, this may have side effect of 980 // another error log. 981 return moerr.NewInternalError(proc.Ctx, "the input value '%v' is invalid Decimal64 type for column %d", field, colIdx) 982 } 983 result.Warnings++ 984 d = types.Decimal128_Zero 985 } 986 } 987 cols[rowIdx] = d 988 } 989 case types.T_timestamp: 990 cols := vector.MustTCols[types.Timestamp](vec) 991 if isNullOrEmpty { 992 nulls.Add(vec.Nsp, uint64(rowIdx)) 993 } else { 994 fs := field 995 d, err := types.ParseTimestamp(timeZone, fs, vec.Typ.Precision) 996 if err != nil { 997 logutil.Errorf("parse field[%v] err:%v", field, err) 998 if !ignoreFieldError { 999 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 1000 } 1001 result.Warnings++ 1002 d = types.Timestamp(0) 1003 } 1004 cols[rowIdx] = d 1005 } 1006 case types.T_uuid: 1007 cols := vector.MustTCols[types.Uuid](vec) 1008 if isNullOrEmpty { 1009 nulls.Add(vec.Nsp, uint64(rowIdx)) 1010 } else { 1011 d, err := types.ParseUuid(field) 1012 if err != nil { 1013 logutil.Errorf("parse field[%v] err:%v", field, err) 1014 if !ignoreFieldError { 1015 return makeParsedFailedError(proc.Ctx, vec.Typ.String(), field, vecAttr, base, offset) 1016 } 1017 result.Warnings++ 1018 d = types.Uuid{0} 1019 } 1020 cols[rowIdx] = d 1021 } 1022 default: 1023 panic("unsupported oid") 1024 } 1025 } 1026 //row2col += time.Since(wait_a) 1027 1028 //wait_b := time.Now() 1029 //the row does not have field 1030 for k := 0; k < len(columnFLags); k++ { 1031 if columnFLags[k] == 0 { 1032 vec := batchData.Vecs[k] 1033 nulls.Add(vec.Nsp, uint64(rowIdx)) 1034 1035 //mysql warning ER_WARN_TOO_FEW_RECORDS 1036 //result.Warnings++ 1037 } 1038 } 1039 //fillBlank += time.Since(wait_b) 1040 } 1041 handler.choose_true += time.Since(wait_d) 1042 } else { 1043 wait_d := time.Now() 1044 //record missing column 1045 for k := 0; k < len(columnFLags); k++ { 1046 columnFLags[k] = 0 1047 } 1048 1049 wait_a := time.Now() 1050 //column 1051 for j := 0; j < handler.maxFieldCnt; j++ { 1052 //where will column j go ? 1053 colIdx := -1 1054 if j < len(handler.dataColumnId2TableColumnId) { 1055 colIdx = handler.dataColumnId2TableColumnId[j] 1056 } 1057 //drop this field 1058 if colIdx == -1 { 1059 continue 1060 } 1061 1062 //put it into batch 1063 vec := batchData.Vecs[colIdx] 1064 vecAttr := batchData.Attrs[colIdx] 1065 columnFLags[j] = 1 1066 1067 switch vec.Typ.Oid { 1068 case types.T_bool: 1069 cols := vector.MustTCols[bool](vec) 1070 for i := 0; i < countOfLineArray; i++ { 1071 line := fetchLines[i] 1072 if j >= len(line) || len(line[j]) == 0 { 1073 nulls.Add(vec.Nsp, uint64(i)) 1074 } else { 1075 field := line[j] 1076 if field == "true" || field == "1" { 1077 cols[i] = true 1078 } else if field == "false" || field == "0" { 1079 cols[i] = false 1080 } else { 1081 return moerr.NewInternalError(proc.Ctx, "the input value '%s' is not bool type", field) 1082 } 1083 } 1084 } 1085 case types.T_int8: 1086 cols := vector.MustTCols[int8](vec) 1087 //row 1088 for i := 0; i < countOfLineArray; i++ { 1089 line := fetchLines[i] 1090 if j >= len(line) || len(line[j]) == 0 { 1091 nulls.Add(vec.Nsp, uint64(i)) 1092 } else { 1093 field := line[j] 1094 if judgeInterge(field) { 1095 d, err := strconv.ParseInt(field, 10, 8) 1096 if err != nil { 1097 logutil.Errorf("parse field[%v] err:%v", field, err) 1098 if !ignoreFieldError { 1099 return err 1100 } 1101 result.Warnings++ 1102 d = 0 1103 } 1104 cols[i] = int8(d) 1105 } else { 1106 d, err := strconv.ParseFloat(field, 64) 1107 if err != nil || d < math.MinInt8 || d > math.MaxInt8 { 1108 logutil.Errorf("parse field[%v] err:%v", field, err) 1109 if !ignoreFieldError { 1110 return err 1111 } 1112 result.Warnings++ 1113 d = 0 1114 } 1115 cols[i] = int8(d) 1116 } 1117 } 1118 } 1119 case types.T_int16: 1120 cols := vector.MustTCols[int16](vec) 1121 //row 1122 for i := 0; i < countOfLineArray; i++ { 1123 line := fetchLines[i] 1124 if j >= len(line) || len(line[j]) == 0 { 1125 nulls.Add(vec.Nsp, uint64(i)) 1126 } else { 1127 field := line[j] 1128 if judgeInterge(field) { 1129 d, err := strconv.ParseInt(field, 10, 16) 1130 if err != nil { 1131 logutil.Errorf("parse field[%v] err:%v", field, err) 1132 if !ignoreFieldError { 1133 return err 1134 } 1135 result.Warnings++ 1136 d = 0 1137 } 1138 cols[i] = int16(d) 1139 } else { 1140 d, err := strconv.ParseFloat(field, 64) 1141 if err != nil || d < math.MinInt16 || d > math.MaxInt16 { 1142 logutil.Errorf("parse field[%v] err:%v", field, err) 1143 if !ignoreFieldError { 1144 return err 1145 } 1146 result.Warnings++ 1147 d = 0 1148 } 1149 cols[i] = int16(d) 1150 } 1151 } 1152 } 1153 case types.T_int32: 1154 cols := vector.MustTCols[int32](vec) 1155 //row 1156 for i := 0; i < countOfLineArray; i++ { 1157 line := fetchLines[i] 1158 if j >= len(line) || len(line[j]) == 0 { 1159 nulls.Add(vec.Nsp, uint64(i)) 1160 } else { 1161 field := line[j] 1162 if judgeInterge(field) { 1163 d, err := strconv.ParseInt(field, 10, 32) 1164 if err != nil { 1165 logutil.Errorf("parse field[%v] err:%v", field, err) 1166 if !ignoreFieldError { 1167 return err 1168 } 1169 result.Warnings++ 1170 d = 0 1171 } 1172 cols[i] = int32(d) 1173 } else { 1174 d, err := strconv.ParseFloat(field, 64) 1175 if err != nil || d < math.MinInt32 || d > math.MaxInt32 { 1176 logutil.Errorf("parse field[%v] err:%v", field, err) 1177 if !ignoreFieldError { 1178 return err 1179 } 1180 result.Warnings++ 1181 d = 0 1182 } 1183 cols[i] = int32(d) 1184 } 1185 } 1186 } 1187 case types.T_int64: 1188 cols := vector.MustTCols[int64](vec) 1189 //row 1190 for i := 0; i < countOfLineArray; i++ { 1191 line := fetchLines[i] 1192 if j >= len(line) || len(line[j]) == 0 { 1193 nulls.Add(vec.Nsp, uint64(i)) 1194 } else { 1195 field := line[j] 1196 if judgeInterge(field) { 1197 d, err := strconv.ParseInt(field, 10, 64) 1198 if err != nil { 1199 logutil.Errorf("parse field[%v] err:%v", field, err) 1200 if !ignoreFieldError { 1201 return err 1202 } 1203 result.Warnings++ 1204 d = 0 1205 } 1206 cols[i] = d 1207 } else { 1208 d, err := strconv.ParseFloat(field, 64) 1209 if err != nil || d < math.MinInt64 || d > math.MaxInt64 { 1210 logutil.Errorf("parse field[%v] err:%v", field, err) 1211 if !ignoreFieldError { 1212 return err 1213 } 1214 result.Warnings++ 1215 d = 0 1216 } 1217 cols[i] = int64(d) 1218 } 1219 } 1220 } 1221 case types.T_uint8: 1222 cols := vector.MustTCols[uint8](vec) 1223 //row 1224 for i := 0; i < countOfLineArray; i++ { 1225 line := fetchLines[i] 1226 if j >= len(line) || len(line[j]) == 0 { 1227 nulls.Add(vec.Nsp, uint64(i)) 1228 } else { 1229 field := line[j] 1230 if judgeInterge(field) { 1231 d, err := strconv.ParseUint(field, 10, 8) 1232 if err != nil { 1233 logutil.Errorf("parse field[%v] err:%v", field, err) 1234 if !ignoreFieldError { 1235 return err 1236 } 1237 result.Warnings++ 1238 d = 0 1239 } 1240 cols[i] = uint8(d) 1241 } else { 1242 d, err := strconv.ParseFloat(field, 64) 1243 if err != nil || d < 0 || d > math.MaxUint8 { 1244 logutil.Errorf("parse field[%v] err:%v", field, err) 1245 if !ignoreFieldError { 1246 return err 1247 } 1248 result.Warnings++ 1249 d = 0 1250 } 1251 cols[i] = uint8(d) 1252 } 1253 } 1254 } 1255 case types.T_uint16: 1256 cols := vector.MustTCols[uint16](vec) 1257 //row 1258 for i := 0; i < countOfLineArray; i++ { 1259 line := fetchLines[i] 1260 if j >= len(line) || len(line[j]) == 0 { 1261 nulls.Add(vec.Nsp, uint64(i)) 1262 } else { 1263 field := line[j] 1264 if judgeInterge(field) { 1265 d, err := strconv.ParseUint(field, 10, 16) 1266 if err != nil { 1267 logutil.Errorf("parse field[%v] err:%v", field, err) 1268 if !ignoreFieldError { 1269 return err 1270 } 1271 result.Warnings++ 1272 d = 0 1273 } 1274 cols[i] = uint16(d) 1275 } else { 1276 d, err := strconv.ParseFloat(field, 64) 1277 if err != nil || d < 0 || d > math.MaxUint16 { 1278 logutil.Errorf("parse field[%v] err:%v", field, err) 1279 if !ignoreFieldError { 1280 return err 1281 } 1282 result.Warnings++ 1283 d = 0 1284 } 1285 cols[i] = uint16(d) 1286 } 1287 } 1288 } 1289 case types.T_uint32: 1290 cols := vector.MustTCols[uint32](vec) 1291 //row 1292 for i := 0; i < countOfLineArray; i++ { 1293 line := fetchLines[i] 1294 if j >= len(line) || len(line[j]) == 0 { 1295 nulls.Add(vec.Nsp, uint64(i)) 1296 } else { 1297 field := line[j] 1298 if judgeInterge(field) { 1299 d, err := strconv.ParseUint(field, 10, 32) 1300 if err != nil { 1301 logutil.Errorf("parse field[%v] err:%v", field, err) 1302 if !ignoreFieldError { 1303 return err 1304 } 1305 result.Warnings++ 1306 d = 0 1307 } 1308 cols[i] = uint32(d) 1309 } else { 1310 d, err := strconv.ParseFloat(field, 64) 1311 if err != nil || d < 0 || d > math.MaxUint32 { 1312 logutil.Errorf("parse field[%v] err:%v", field, err) 1313 if !ignoreFieldError { 1314 return err 1315 } 1316 result.Warnings++ 1317 d = 0 1318 //break 1319 } 1320 cols[i] = uint32(d) 1321 } 1322 } 1323 } 1324 case types.T_uint64: 1325 cols := vector.MustTCols[uint64](vec) 1326 //row 1327 for i := 0; i < countOfLineArray; i++ { 1328 line := fetchLines[i] 1329 if j >= len(line) || len(line[j]) == 0 { 1330 nulls.Add(vec.Nsp, uint64(i)) 1331 } else { 1332 field := line[j] 1333 if judgeInterge(field) { 1334 d, err := strconv.ParseUint(field, 10, 64) 1335 if err != nil { 1336 logutil.Errorf("parse field[%v] err:%v", field, err) 1337 if !ignoreFieldError { 1338 return err 1339 } 1340 result.Warnings++ 1341 d = 0 1342 } 1343 cols[i] = d 1344 } else { 1345 d, err := strconv.ParseFloat(field, 64) 1346 if err != nil || d < 0 || d > math.MaxUint64 { 1347 logutil.Errorf("parse field[%v] err:%v", field, err) 1348 if !ignoreFieldError { 1349 return err 1350 } 1351 result.Warnings++ 1352 d = 0 1353 //break 1354 } 1355 cols[i] = uint64(d) 1356 } 1357 } 1358 } 1359 case types.T_float32: 1360 cols := vector.MustTCols[float32](vec) 1361 //row 1362 for i := 0; i < countOfLineArray; i++ { 1363 line := fetchLines[i] 1364 if j >= len(line) || len(line[j]) == 0 { 1365 nulls.Add(vec.Nsp, uint64(i)) 1366 } else { 1367 field := line[j] 1368 d, err := strconv.ParseFloat(field, 32) 1369 if err != nil { 1370 logutil.Errorf("parse field[%v] err:%v", field, err) 1371 if !ignoreFieldError { 1372 return err 1373 } 1374 result.Warnings++ 1375 d = 0 1376 //break 1377 } 1378 cols[i] = float32(d) 1379 } 1380 } 1381 case types.T_float64: 1382 cols := vector.MustTCols[float64](vec) 1383 //row 1384 for i := 0; i < countOfLineArray; i++ { 1385 line := fetchLines[i] 1386 if j >= len(line) || len(line[j]) == 0 { 1387 nulls.Add(vec.Nsp, uint64(i)) 1388 } else { 1389 field := line[j] 1390 //logutil.Infof("==== > field string [%s] ",fs) 1391 d, err := strconv.ParseFloat(field, 64) 1392 if err != nil { 1393 logutil.Errorf("parse field[%v] err:%v", field, err) 1394 if !ignoreFieldError { 1395 return err 1396 } 1397 result.Warnings++ 1398 d = 0 1399 //break 1400 } 1401 cols[i] = d 1402 } 1403 } 1404 case types.T_char, types.T_varchar: 1405 //row 1406 for i := 0; i < countOfLineArray; i++ { 1407 line := fetchLines[i] 1408 if j >= len(line) || len(line[j]) == 0 { 1409 nulls.Add(vec.Nsp, uint64(i)) 1410 } else { 1411 // XXX memory 1412 vector.SetStringAt(vec, i, line[j], nil) 1413 } 1414 } 1415 case types.T_json: 1416 //row 1417 for i := 0; i < countOfLineArray; i++ { 1418 line := fetchLines[i] 1419 if j >= len(line) || len(line[j]) == 0 { 1420 nulls.Add(vec.Nsp, uint64(i)) 1421 } else { 1422 field := line[j] 1423 json, err := types.ParseStringToByteJson(field) 1424 if err != nil { 1425 logutil.Errorf("parse field[%v] err:%v", field, err) 1426 if !ignoreFieldError { 1427 return moerr.NewInvalidInput(proc.Ctx, "Invalid %s text: '%s' for column '%s' at row '%d'", vec.Typ.String(), field, vecAttr, i) 1428 } 1429 result.Warnings++ 1430 //break 1431 } 1432 jsonBytes, err := types.EncodeJson(json) 1433 if err != nil { 1434 logutil.Errorf("encode field[%v] err:%v", field, err) 1435 if !ignoreFieldError { 1436 return moerr.NewInvalidInput(proc.Ctx, "Invalid %s text: '%s' for column '%s' at row '%d'", vec.Typ.String(), field, vecAttr, i) 1437 } 1438 result.Warnings++ 1439 //break 1440 } 1441 // XXX Memory. 1442 vector.SetBytesAt(vec, i, jsonBytes, nil) 1443 } 1444 } 1445 case types.T_date: 1446 cols := vector.MustTCols[types.Date](vec) 1447 //row 1448 for i := 0; i < countOfLineArray; i++ { 1449 line := fetchLines[i] 1450 if j >= len(line) || len(line[j]) == 0 { 1451 nulls.Add(vec.Nsp, uint64(i)) 1452 } else { 1453 field := line[j] 1454 //logutil.Infof("==== > field string [%s] ",fs) 1455 d, err := types.ParseDateCast(field) 1456 if err != nil { 1457 logutil.Errorf("parse field[%v] err:%v", field, err) 1458 if !ignoreFieldError { 1459 return err 1460 } 1461 result.Warnings++ 1462 d = 0 1463 //break 1464 } 1465 cols[i] = d 1466 } 1467 } 1468 case types.T_time: 1469 cols := vector.MustTCols[types.Time](vec) 1470 for i := 0; i < countOfLineArray; i++ { 1471 line := fetchLines[i] 1472 if j >= len(line) || len(line[j]) == 0 { 1473 nulls.Add(vec.Nsp, uint64(i)) 1474 } else { 1475 field := line[j] 1476 //logutil.Infof("==== > field string [%s] ",fs) 1477 d, err := types.ParseTime(field, vec.Typ.Precision) 1478 if err != nil { 1479 logutil.Errorf("parse field[%v] err:%v", field, err) 1480 if !ignoreFieldError { 1481 return err 1482 } 1483 result.Warnings++ 1484 d = 0 1485 //break 1486 } 1487 cols[i] = d 1488 } 1489 } 1490 case types.T_datetime: 1491 cols := vector.MustTCols[types.Datetime](vec) 1492 for i := 0; i < countOfLineArray; i++ { 1493 line := fetchLines[i] 1494 if j >= len(line) || len(line[j]) == 0 { 1495 nulls.Add(vec.Nsp, uint64(i)) 1496 } else { 1497 field := line[j] 1498 //logutil.Infof("==== > field string [%s] ",fs) 1499 d, err := types.ParseDatetime(field, vec.Typ.Precision) 1500 if err != nil { 1501 logutil.Errorf("parse field[%v] err:%v", field, err) 1502 if !ignoreFieldError { 1503 return err 1504 } 1505 result.Warnings++ 1506 d = 0 1507 //break 1508 } 1509 cols[i] = d 1510 } 1511 } 1512 case types.T_decimal64: 1513 cols := vector.MustTCols[types.Decimal64](vec) 1514 for i := 0; i < countOfLineArray; i++ { 1515 line := fetchLines[i] 1516 if j >= len(line) || len(line[j]) == 0 { 1517 nulls.Add(vec.Nsp, uint64(i)) 1518 } else { 1519 field := line[j] 1520 //logutil.Infof("==== > field string [%s] ",fs) 1521 d, err := types.ParseStringToDecimal64(field, vec.Typ.Width, vec.Typ.Scale, vec.GetIsBin()) 1522 if err != nil { 1523 logutil.Errorf("parse field[%v] err:%v", field, err) 1524 if !ignoreFieldError { 1525 return err 1526 } 1527 result.Warnings++ 1528 d = types.Decimal64_Zero 1529 //break 1530 } 1531 cols[i] = d 1532 } 1533 } 1534 case types.T_decimal128: 1535 cols := vector.MustTCols[types.Decimal128](vec) 1536 for i := 0; i < countOfLineArray; i++ { 1537 line := fetchLines[i] 1538 if j >= len(line) || len(line[j]) == 0 { 1539 nulls.Add(vec.Nsp, uint64(i)) 1540 } else { 1541 field := line[j] 1542 //logutil.Infof("==== > field string [%s] ",fs) 1543 d, err := types.ParseStringToDecimal128(field, vec.Typ.Width, vec.Typ.Scale, vec.GetIsBin()) 1544 if err != nil { 1545 logutil.Errorf("parse field[%v] err:%v", field, err) 1546 if !ignoreFieldError { 1547 return err 1548 } 1549 result.Warnings++ 1550 d = types.Decimal128_Zero 1551 //break 1552 } 1553 cols[i] = d 1554 } 1555 } 1556 case types.T_timestamp: 1557 cols := vector.MustTCols[types.Timestamp](vec) 1558 for i := 0; i < countOfLineArray; i++ { 1559 line := fetchLines[i] 1560 if j >= len(line) || len(line[j]) == 0 { 1561 nulls.Add(vec.Nsp, uint64(i)) 1562 } else { 1563 field := line[j] 1564 //logutil.Infof("==== > field string [%s] ",fs) 1565 d, err := types.ParseTimestamp(timeZone, field, vec.Typ.Precision) 1566 if err != nil { 1567 logutil.Errorf("parse field[%v] err:%v", field, err) 1568 if !ignoreFieldError { 1569 return err 1570 } 1571 result.Warnings++ 1572 d = 0 1573 //break 1574 } 1575 cols[i] = d 1576 } 1577 } 1578 case types.T_uuid: 1579 cols := vector.MustTCols[types.Uuid](vec) 1580 for i := 0; i < countOfLineArray; i++ { 1581 line := fetchLines[i] 1582 if j >= len(line) || len(line[j]) == 0 { 1583 nulls.Add(vec.Nsp, uint64(i)) 1584 } else { 1585 field := line[j] 1586 //logutil.Infof("==== > field string [%s] ",fs) 1587 d, err := types.ParseUuid(field) 1588 //d, err := types.ParseStringToDecimal128(field, vec.Typ.Width, vec.Typ.Scale) 1589 if err != nil { 1590 logutil.Errorf("parse field[%v] err:%v", field, err) 1591 if !ignoreFieldError { 1592 return err 1593 } 1594 result.Warnings++ 1595 d = types.Uuid{} 1596 //break 1597 } 1598 cols[i] = d 1599 } 1600 } 1601 default: 1602 panic("unsupported oid") 1603 } 1604 } 1605 row2col += time.Since(wait_a) 1606 1607 wait_b := time.Now() 1608 //the row does not have field 1609 for k := 0; k < len(columnFLags); k++ { 1610 if columnFLags[k] == 0 { 1611 vec := batchData.Vecs[k] 1612 //row 1613 for i := 0; i < countOfLineArray; i++ { 1614 nulls.Add(vec.Nsp, uint64(i)) 1615 } 1616 } 1617 } 1618 fillBlank += time.Since(wait_b) 1619 handler.choose_false += time.Since(wait_d) 1620 } 1621 1622 handler.batchFilled = batchBegin + fetchCnt 1623 { 1624 handler.batchData.InitZsOne(handler.batchSize) 1625 handler.batchData.ExpandNulls() 1626 } 1627 1628 //if handler.batchFilled == handler.batchSize { 1629 // minLen := math.MaxInt64 1630 // maxLen := 0 1631 // for _, vec := range batchData.Vecs { 1632 // logutil.Infof("len %d type %d %s ",vec.Length(),vec.Typ.Oid,vec.Typ.String()) 1633 // minLen = Min(vec.Length(),int(minLen)) 1634 // maxLen = Max(vec.Length(),int(maxLen)) 1635 // } 1636 // 1637 // if minLen != maxLen{ 1638 // logutil.Errorf("vector length mis equal %d %d",minLen,maxLen) 1639 // return moerr.NewInternalError("vector length mis equal %d %d",minLen,maxLen) 1640 // } 1641 //} 1642 1643 wait_c := time.Now() 1644 /* 1645 write batch into the engine 1646 */ 1647 //the second parameter must be FALSE here 1648 err = writeBatchToStorage(handler, proc, forceConvert) 1649 1650 toStorage += time.Since(wait_c) 1651 1652 allFetchCnt += fetchCnt 1653 //} 1654 1655 handler.row2col += row2col 1656 handler.fillBlank += fillBlank 1657 handler.toStorage += toStorage 1658 1659 //logutil.Infof("----- row2col %s fillBlank %s toStorage %s", 1660 // row2col,fillBlank,toStorage) 1661 1662 if err != nil { 1663 logutil.Errorf("saveBatchToStorage failed. err:%v", err) 1664 return err 1665 } 1666 1667 if allFetchCnt != countOfLineArray { 1668 return moerr.NewInternalError(proc.Ctx, "allFetchCnt %d != countOfLineArray %d ", allFetchCnt, countOfLineArray) 1669 } 1670 return nil 1671 } 1672 1673 /* 1674 save batch to storage. 1675 when force is true, batchsize will be changed. 1676 */ 1677 func writeBatchToStorage(handler *WriteBatchHandler, proc *process.Process, force bool) error { 1678 var err error = nil 1679 1680 ctx := handler.loadCtx 1681 if handler.batchFilled == handler.batchSize { 1682 //batchBytes := 0 1683 //for _, vec := range handler.batchData.Vecs { 1684 // //logutil.Infof("len %d type %d %s ",vec.Length(),vec.Typ.Oid,vec.Typ.String()) 1685 // switch vec.Typ.Oid { 1686 // case types.T_char, types.T_varchar: 1687 // vBytes := vec.Col.(*types.Bytes) 1688 // batchBytes += len(vBytes.Data) 1689 // default: 1690 // batchBytes += vec.Length() * int(vec.Typ.Size) 1691 // } 1692 //} 1693 // 1694 //logutil.Infof("----batchBytes %v B %v MB",batchBytes,batchBytes / 1024.0 / 1024.0) 1695 // 1696 wait_a := time.Now() 1697 handler.ThreadInfo.SetTime(wait_a) 1698 handler.ThreadInfo.SetCnt(1) 1699 //dbHandler := handler.dbHandler 1700 var dbHandler engine.Database 1701 var txnHandler *TxnHandler 1702 var txn TxnOperator 1703 tableHandler := handler.tableHandler 1704 initSes := handler.ses 1705 // XXX run backgroup session using initSes.Mp, is this correct thing? 1706 tmpSes := NewBackgroundSession(ctx, initSes.GetMemPool(), initSes.GetParameterUnit(), GSysVariables, proc.SessionInfo.AutoIncrCaches) 1707 if e, ok := initSes.storage.(*engine.EntireEngine); ok { 1708 tmpSes.storage = e 1709 tmpSes.txnHandler = initSes.txnHandler 1710 } 1711 defer tmpSes.Close() 1712 if !handler.skipWriteBatch { 1713 if handler.oneTxnPerBatch { 1714 txnHandler = tmpSes.GetTxnHandler() 1715 txn, err = txnHandler.GetTxn() 1716 if err != nil { 1717 goto handleError 1718 } 1719 dbHandler, err = tmpSes.GetStorage().Database(ctx, handler.dbName, txn) 1720 if err != nil { 1721 goto handleError 1722 } 1723 tableHandler, err = dbHandler.Relation(ctx, handler.tableName) 1724 if err != nil { 1725 goto handleError 1726 } 1727 } 1728 err = tableHandler.Write(ctx, handler.batchData) 1729 handler.batchData.Clean(proc.Mp()) 1730 if handler.oneTxnPerBatch { 1731 if err != nil { 1732 goto handleError 1733 } 1734 err = tmpSes.TxnCommitSingleStatement(nil) 1735 if err != nil { 1736 goto handleError 1737 } 1738 } 1739 } 1740 1741 handleError: 1742 handler.ThreadInfo.SetCnt(0) 1743 if err == nil { 1744 handler.result.Records += uint64(handler.batchSize) 1745 } else if isWriteBatchTimeoutError(err) { 1746 logutil.Errorf("write failed. err: %v", err) 1747 handler.result.WriteTimeout += uint64(handler.batchSize) 1748 //clean timeout error 1749 err = nil 1750 } else { 1751 logutil.Errorf("write failed. err: %v", err) 1752 handler.result.Skipped += uint64(handler.batchSize) 1753 } 1754 1755 if handler.oneTxnPerBatch && err != nil { 1756 err2 := tmpSes.TxnRollbackSingleStatement(nil) 1757 if err2 != nil { 1758 logutil.Errorf("rollback failed.error:%v", err2) 1759 } 1760 } 1761 1762 handler.writeBatch += time.Since(wait_a) 1763 1764 wait_b := time.Now() 1765 //clear batch 1766 //clear vector.nulls.Nulls 1767 for _, vec := range handler.batchData.Vecs { 1768 vec.Nsp = &nulls.Nulls{} 1769 } 1770 handler.batchFilled = 0 1771 1772 handler.resetBatch += time.Since(wait_b) 1773 } else { 1774 if force { 1775 //first, remove redundant rows at last 1776 needLen := handler.batchFilled 1777 if needLen > 0 { 1778 //logutil.Infof("needLen: %d batchSize %d", needLen, handler.batchSize) 1779 for _, vec := range handler.batchData.Vecs { 1780 //logutil.Infof("needLen %d %d type %d %s ",needLen,i,vec.Typ.Oid,vec.Typ.String()) 1781 //remove nulls.NUlls 1782 for j := uint64(handler.batchFilled); j < uint64(handler.batchSize); j++ { 1783 nulls.Del(vec.Nsp, j) 1784 } 1785 //remove row 1786 switch vec.Typ.Oid { 1787 case types.T_bool: 1788 cols := vector.MustTCols[bool](vec) 1789 vec.Col = cols[:needLen] 1790 case types.T_int8: 1791 cols := vector.MustTCols[int8](vec) 1792 vec.Col = cols[:needLen] 1793 case types.T_int16: 1794 cols := vector.MustTCols[int16](vec) 1795 vec.Col = cols[:needLen] 1796 case types.T_int32: 1797 cols := vector.MustTCols[int32](vec) 1798 vec.Col = cols[:needLen] 1799 case types.T_int64: 1800 cols := vector.MustTCols[int64](vec) 1801 vec.Col = cols[:needLen] 1802 case types.T_uint8: 1803 cols := vector.MustTCols[uint8](vec) 1804 vec.Col = cols[:needLen] 1805 case types.T_uint16: 1806 cols := vector.MustTCols[uint16](vec) 1807 vec.Col = cols[:needLen] 1808 case types.T_uint32: 1809 cols := vector.MustTCols[uint32](vec) 1810 vec.Col = cols[:needLen] 1811 case types.T_uint64: 1812 cols := vector.MustTCols[uint64](vec) 1813 vec.Col = cols[:needLen] 1814 case types.T_float32: 1815 cols := vector.MustTCols[float32](vec) 1816 vec.Col = cols[:needLen] 1817 case types.T_float64: 1818 cols := vector.MustTCols[float64](vec) 1819 vec.Col = cols[:needLen] 1820 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: //bytes is different 1821 cols := vector.MustTCols[types.Varlena](vec) 1822 vec.Col = cols[:needLen] 1823 case types.T_date: 1824 cols := vector.MustTCols[types.Date](vec) 1825 vec.Col = cols[:needLen] 1826 case types.T_time: 1827 cols := vector.MustTCols[types.Time](vec) 1828 vec.Col = cols[:needLen] 1829 case types.T_datetime: 1830 cols := vector.MustTCols[types.Datetime](vec) 1831 vec.Col = cols[:needLen] 1832 case types.T_decimal64: 1833 cols := vector.MustTCols[types.Decimal64](vec) 1834 vec.Col = cols[:needLen] 1835 case types.T_decimal128: 1836 cols := vector.MustTCols[types.Decimal128](vec) 1837 vec.Col = cols[:needLen] 1838 case types.T_timestamp: 1839 cols := vector.MustTCols[types.Timestamp](vec) 1840 vec.Col = cols[:needLen] 1841 case types.T_uuid: 1842 cols := vector.MustTCols[types.Uuid](vec) 1843 vec.Col = cols[:needLen] 1844 default: 1845 // XXX 1846 panic("unhandled vec type") 1847 } 1848 } 1849 1850 //for _, vec := range handler.batchData.Vecs { 1851 // logutil.Infof("len %d type %d %s ",vec.Length(),vec.Typ.Oid,vec.Typ.String()) 1852 //} 1853 1854 wait_a := time.Now() 1855 handler.ThreadInfo.SetTime(wait_a) 1856 handler.ThreadInfo.SetCnt(1) 1857 var txnHandler *TxnHandler 1858 tableHandler := handler.tableHandler 1859 // dbHandler := handler.dbHandler 1860 initSes := handler.ses 1861 // XXX: Using initSes.Mp 1862 tmpSes := NewBackgroundSession(ctx, initSes.GetMemPool(), initSes.GetParameterUnit(), GSysVariables, proc.SessionInfo.AutoIncrCaches) 1863 if e, ok := initSes.storage.(*engine.EntireEngine); ok { 1864 tmpSes.storage = e 1865 tmpSes.txnHandler = initSes.txnHandler 1866 } 1867 defer tmpSes.Close() 1868 var dbHandler engine.Database 1869 var txn TxnOperator 1870 if !handler.skipWriteBatch { 1871 if handler.oneTxnPerBatch { 1872 txnHandler = tmpSes.GetTxnHandler() 1873 txn, err = txnHandler.GetTxn() 1874 if err != nil { 1875 goto handleError2 1876 } 1877 dbHandler, err = tmpSes.GetStorage().Database(ctx, handler.dbName, txn) 1878 if err != nil { 1879 goto handleError2 1880 } 1881 //new relation 1882 tableHandler, err = dbHandler.Relation(ctx, handler.tableName) 1883 if err != nil { 1884 goto handleError2 1885 } 1886 } 1887 handler.batchData.Zs = handler.batchData.Zs[:handler.batchFilled] 1888 err = tableHandler.Write(ctx, handler.batchData) 1889 handler.batchData.Clean(proc.Mp()) 1890 if handler.oneTxnPerBatch { 1891 if err != nil { 1892 goto handleError2 1893 } 1894 err = tmpSes.TxnCommitSingleStatement(nil) 1895 if err != nil { 1896 goto handleError2 1897 } 1898 } 1899 } 1900 handleError2: 1901 handler.ThreadInfo.SetCnt(0) 1902 if err == nil { 1903 handler.result.Records += uint64(needLen) 1904 } else if isWriteBatchTimeoutError(err) { 1905 logutil.Errorf("write failed. err: %v", err) 1906 handler.result.WriteTimeout += uint64(needLen) 1907 //clean timeout error 1908 err = nil 1909 } else { 1910 logutil.Errorf("write failed. err:%v \n", err) 1911 handler.result.Skipped += uint64(needLen) 1912 } 1913 1914 if handler.oneTxnPerBatch && err != nil { 1915 err2 := tmpSes.TxnRollbackSingleStatement(nil) 1916 if err2 != nil { 1917 logutil.Errorf("rollback failed.error:%v", err2) 1918 } 1919 } 1920 } 1921 } 1922 } 1923 return err 1924 } 1925 1926 // row2col algorithm 1927 var row2colChoose bool = true 1928 1929 var saveLinesToStorage = func(handler *ParseLineHandler, force bool) error { 1930 writeHandler := &WriteBatchHandler{ 1931 SharePart: SharePart{ 1932 lineIdx: handler.lineIdx, 1933 maxFieldCnt: handler.maxFieldCnt, 1934 }, 1935 } 1936 err := initWriteBatchHandler(handler, writeHandler) 1937 if err != nil { 1938 writeHandler.simdCsvErr = err 1939 return err 1940 } 1941 1942 handler.simdCsvWaitWriteRoutineToQuit.Add(1) 1943 go func() { 1944 defer handler.simdCsvWaitWriteRoutineToQuit.Done() 1945 1946 //step 3 : save into storage 1947 err = rowToColumnAndSaveToStorage(writeHandler, handler.proc, force, row2colChoose) 1948 writeHandler.simdCsvErr = err 1949 1950 releaseBatch(handler, writeHandler.pl) 1951 writeHandler.batchData = nil 1952 writeHandler.simdCsvLineArray = nil 1953 1954 if err != nil { 1955 handler.simdCsvNotiyEventChan <- newNotifyEvent(NOTIFY_EVENT_WRITE_BATCH_ERROR, err, writeHandler) 1956 } else { 1957 handler.simdCsvNotiyEventChan <- newNotifyEvent(NOTIFY_EVENT_WRITE_BATCH_RESULT, nil, writeHandler) 1958 } 1959 }() 1960 return nil 1961 } 1962 1963 func PrintThreadInfo(handler *ParseLineHandler, close *CloseFlag, a time.Duration) { 1964 for { 1965 if close.IsClosed() { 1966 logutil.Infof("load stream is over, start to leave.") 1967 return 1968 } else { 1969 for i, v := range handler.threadInfo { 1970 ret := v.GetTime() 1971 if ret == nil { 1972 continue 1973 } else { 1974 startTime := ret.(time.Time) 1975 threadCnt := v.GetCnt() 1976 if threadCnt == 1 { 1977 logutil.Infof("Print the ThreadInfo. id:%v, startTime:%v, spendTime:%v", i, startTime, time.Since(startTime)) 1978 } 1979 } 1980 } 1981 time.Sleep(a * time.Second) 1982 } 1983 } 1984 } 1985 1986 /* 1987 LoadLoop reads data from stream, extracts the fields, and saves into the table 1988 */ 1989 func LoadLoop(requestCtx context.Context, ses *Session, proc *process.Process, load *tree.Import, dbHandler engine.Database, tableHandler engine.Relation, dbName string) (*LoadResult, error) { 1990 result := &LoadResult{} 1991 1992 /* 1993 step1 : read block from file 1994 */ 1995 dataFile, err := os.Open(load.Param.Filepath) 1996 if err != nil { 1997 logutil.Errorf("open file failed. err:%v", err) 1998 return nil, err 1999 } 2000 defer func() { 2001 err := dataFile.Close() 2002 if err != nil { 2003 logutil.Errorf("close file failed. err:%v", err) 2004 } 2005 }() 2006 2007 //processTime := time.Now() 2008 process_block := time.Duration(0) 2009 pu := ses.GetParameterUnit() 2010 curBatchSize := int(pu.SV.BatchSizeInLoadData) 2011 //simdcsv 2012 handler := &ParseLineHandler{ 2013 SharePart: SharePart{ 2014 load: load, 2015 lineIdx: 0, 2016 simdCsvLineArray: make([][]string, curBatchSize), 2017 storage: pu.StorageEngine, 2018 dbHandler: dbHandler, 2019 tableHandler: tableHandler, 2020 tableName: string(load.Table.Name()), 2021 dbName: dbName, 2022 txnHandler: ses.GetTxnHandler(), 2023 ses: ses, 2024 oneTxnPerBatch: !pu.SV.DisableOneTxnPerBatchDuringLoad, 2025 lineCount: 0, 2026 batchSize: curBatchSize, 2027 result: result, 2028 skipWriteBatch: pu.SV.LoadDataSkipWritingBatch, 2029 loadCtx: requestCtx, 2030 }, 2031 threadInfo: make(map[int]*ThreadInfo), 2032 simdCsvWaitWriteRoutineToQuit: &sync.WaitGroup{}, 2033 proc: proc, 2034 } 2035 2036 handler.simdCsvConcurrencyCountOfWriteBatch = Min(int(pu.SV.LoadDataConcurrencyCount), runtime.NumCPU()) 2037 handler.simdCsvConcurrencyCountOfWriteBatch = Max(1, handler.simdCsvConcurrencyCountOfWriteBatch) 2038 handler.simdCsvBatchPool = make(chan *PoolElement, handler.simdCsvConcurrencyCountOfWriteBatch) 2039 for i := 0; i < handler.simdCsvConcurrencyCountOfWriteBatch; i++ { 2040 handler.threadInfo[i] = &ThreadInfo{} 2041 } 2042 2043 //logutil.Infof("-----write concurrent count %d ",handler.simdCsvConcurrencyCountOfWriteBatch) 2044 2045 handler.ignoreFieldError = true 2046 dh := handler.load.DuplicateHandling 2047 if dh != nil { 2048 switch dh.(type) { 2049 case *tree.DuplicateKeyIgnore: 2050 handler.ignoreFieldError = true 2051 case *tree.DuplicateKeyError, *tree.DuplicateKeyReplace: 2052 handler.ignoreFieldError = false 2053 } 2054 } 2055 2056 notifyChanSize := handler.simdCsvConcurrencyCountOfWriteBatch * 2 2057 notifyChanSize = Max(100, notifyChanSize) 2058 2059 handler.simdCsvReader = simdcsv.NewReaderWithOptions(dataFile, 2060 rune(load.Param.Tail.Fields.Terminated[0]), 2061 '#', 2062 true, 2063 true) 2064 2065 /* 2066 error channel 2067 */ 2068 handler.simdCsvNotiyEventChan = make(chan *notifyEvent, notifyChanSize) 2069 2070 //release resources of handler 2071 defer handler.close() 2072 2073 err = initParseLineHandler(requestCtx, proc, handler) 2074 if err != nil { 2075 return nil, err 2076 } 2077 2078 //TODO: remove it after tae is ready 2079 if handler.oneTxnPerBatch { 2080 err = ses.TxnCommitSingleStatement(nil) 2081 if err != nil { 2082 return nil, err 2083 } 2084 } 2085 2086 wg := sync.WaitGroup{} 2087 2088 /* 2089 get lines from simdcsv, deliver them to the output channel. 2090 */ 2091 wg.Add(1) 2092 go func() { 2093 defer wg.Done() 2094 wait_b := time.Now() 2095 err = handler.simdCsvReader.ReadLoop(requestCtx, nil, handler.getLineOutCallback) 2096 //last batch 2097 err = saveLinesToStorage(handler, true) 2098 if err != nil { 2099 logutil.Errorf("get line from simdcsv failed. err:%v", err) 2100 handler.simdCsvNotiyEventChan <- newNotifyEvent(NOTIFY_EVENT_OUTPUT_SIMDCSV_ERROR, err, nil) 2101 } 2102 if err != nil { 2103 handler.simdCsvNotiyEventChan <- newNotifyEvent(NOTIFY_EVENT_READ_SIMDCSV_ERROR, err, nil) 2104 } 2105 process_block += time.Since(wait_b) 2106 }() 2107 2108 var statsWg sync.WaitGroup 2109 statsWg.Add(1) 2110 2111 closechannel := CloseFlag{} 2112 var retErr error = nil 2113 go func() { 2114 defer statsWg.Done() 2115 /* 2116 collect statistics from every batch. 2117 */ 2118 var ne *notifyEvent = nil 2119 for { 2120 quit := false 2121 select { 2122 case <-requestCtx.Done(): 2123 logutil.Info("cancel the load") 2124 retErr = moerr.NewQueryInterrupted(proc.Ctx) 2125 quit = true 2126 case ne = <-handler.simdCsvNotiyEventChan: 2127 switch ne.neType { 2128 case NOTIFY_EVENT_WRITE_BATCH_RESULT: 2129 collectWriteBatchResult(handler, ne.wbh, nil) 2130 case NOTIFY_EVENT_END: 2131 retErr = nil 2132 quit = true 2133 case NOTIFY_EVENT_READ_SIMDCSV_ERROR, 2134 NOTIFY_EVENT_OUTPUT_SIMDCSV_ERROR, 2135 NOTIFY_EVENT_WRITE_BATCH_ERROR: 2136 if ses.IsTaeEngine() || !errorCanBeIgnored(ne.err) { 2137 retErr = ne.err 2138 quit = true 2139 } 2140 collectWriteBatchResult(handler, ne.wbh, ne.err) 2141 default: 2142 logutil.Errorf("get unsupported notify event %d", ne.neType) 2143 quit = true 2144 } 2145 } 2146 2147 if quit { 2148 handler.simdCsvReader.Close() 2149 2150 go func() { 2151 for closechannel.IsOpened() { 2152 select { 2153 case <-handler.simdCsvNotiyEventChan: 2154 default: 2155 } 2156 } 2157 }() 2158 break 2159 } 2160 } 2161 }() 2162 2163 close := CloseFlag{} 2164 var a = time.Duration(pu.SV.PrintLogInterVal) 2165 go func() { 2166 PrintThreadInfo(handler, &close, a) 2167 }() 2168 2169 //until now, the last writer has been counted. 2170 //There are no more new threads can be spawned. 2171 //wait csvReader and rowConverter to quit. 2172 wg.Wait() 2173 2174 //until now, csvReader and rowConverter has quit. 2175 //wait writers to quit 2176 handler.simdCsvWaitWriteRoutineToQuit.Wait() 2177 2178 //until now, all writers has quit. 2179 //tell stats to quit. NOTIFY_EVENT_END must be the last event in the queue. 2180 handler.simdCsvNotiyEventChan <- newNotifyEvent(NOTIFY_EVENT_END, nil, nil) 2181 2182 //wait stats to quit 2183 statsWg.Wait() 2184 close.Close() 2185 closechannel.Close() 2186 return result, retErr 2187 }