github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/external/external.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package external 16 17 import ( 18 "bufio" 19 "bytes" 20 "compress/bzip2" 21 "compress/flate" 22 "compress/gzip" 23 "compress/zlib" 24 "context" 25 "encoding/json" 26 "errors" 27 "fmt" 28 "io" 29 "math" 30 "os" 31 "path/filepath" 32 "strconv" 33 "strings" 34 "sync/atomic" 35 "time" 36 37 "github.com/matrixorigin/matrixone/pkg/util/errutil" 38 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/index" 39 40 "github.com/matrixorigin/matrixone/pkg/catalog" 41 "github.com/matrixorigin/matrixone/pkg/common/moerr" 42 "github.com/matrixorigin/matrixone/pkg/common/morpc" 43 "github.com/matrixorigin/matrixone/pkg/common/mpool" 44 "github.com/matrixorigin/matrixone/pkg/container/batch" 45 "github.com/matrixorigin/matrixone/pkg/container/bytejson" 46 "github.com/matrixorigin/matrixone/pkg/container/nulls" 47 "github.com/matrixorigin/matrixone/pkg/container/types" 48 "github.com/matrixorigin/matrixone/pkg/container/vector" 49 "github.com/matrixorigin/matrixone/pkg/fileservice" 50 "github.com/matrixorigin/matrixone/pkg/logutil" 51 "github.com/matrixorigin/matrixone/pkg/objectio" 52 "github.com/matrixorigin/matrixone/pkg/pb/plan" 53 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 54 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 55 plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan" 56 "github.com/matrixorigin/matrixone/pkg/vm/process" 57 "github.com/matrixorigin/simdcsv" 58 "github.com/pierrec/lz4" 59 ) 60 61 var ( 62 ONE_BATCH_MAX_ROW = 40000 63 S3_PARALLEL_MAXNUM = 10 64 ) 65 66 var ( 67 STATEMENT_ACCOUNT = "account" 68 ) 69 70 func String(arg any, buf *bytes.Buffer) { 71 buf.WriteString("external output") 72 } 73 74 func Prepare(proc *process.Process, arg any) error { 75 param := arg.(*Argument).Es 76 if proc.Lim.MaxMsgSize == 0 { 77 param.maxBatchSize = uint64(morpc.GetMessageSize()) 78 } else { 79 param.maxBatchSize = proc.Lim.MaxMsgSize 80 } 81 param.maxBatchSize = uint64(float64(param.maxBatchSize) * 0.6) 82 if param.Extern.Format == tree.JSONLINE { 83 if param.Extern.JsonData != tree.OBJECT && param.Extern.JsonData != tree.ARRAY { 84 param.Fileparam.End = true 85 return moerr.NewNotSupported(proc.Ctx, "the jsonline format '%s' is not supported now", param.Extern.JsonData) 86 } 87 } 88 param.Extern.FileService = proc.FileService 89 param.Extern.Ctx = proc.Ctx 90 param.IgnoreLineTag = int(param.Extern.Tail.IgnoredLines) 91 param.IgnoreLine = param.IgnoreLineTag 92 if len(param.FileList) == 0 { 93 logutil.Warnf("no such file '%s'", param.Extern.Filepath) 94 param.Fileparam.End = true 95 } 96 param.Fileparam.FileCnt = len(param.FileList) 97 param.Ctx = proc.Ctx 98 param.Zoneparam = &ZonemapFileparam{} 99 name2ColIndex := make(map[string]int32, len(param.Cols)) 100 for i := 0; i < len(param.Cols); i++ { 101 name2ColIndex[param.Cols[i].Name] = int32(i) 102 } 103 param.tableDef = &plan.TableDef{ 104 Name2ColIndex: name2ColIndex, 105 } 106 var columns []int 107 param.Filter.columnMap, columns, param.Filter.maxCol = plan2.GetColumnsByExpr(param.Filter.FilterExpr, param.tableDef) 108 param.Filter.columns = make([]uint16, len(columns)) 109 param.Filter.defColumns = make([]uint16, len(columns)) 110 for i := 0; i < len(columns); i++ { 111 col := param.Cols[columns[i]] 112 param.Filter.columns[i] = uint16(param.Name2ColIndex[col.Name]) 113 param.Filter.defColumns[i] = uint16(columns[i]) 114 } 115 116 param.Filter.exprMono = plan2.CheckExprIsMonotonic(proc.Ctx, param.Filter.FilterExpr) 117 param.Filter.File2Size = make(map[string]int64) 118 return nil 119 } 120 121 func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) { 122 select { 123 case <-proc.Ctx.Done(): 124 proc.SetInputBatch(nil) 125 return true, nil 126 default: 127 } 128 t1 := time.Now() 129 anal := proc.GetAnalyze(idx) 130 anal.Start() 131 defer func() { 132 anal.Stop() 133 anal.AddScanTime(t1) 134 }() 135 anal.Input(nil, isFirst) 136 param := arg.(*Argument).Es 137 if param.Fileparam.End { 138 proc.SetInputBatch(nil) 139 return true, nil 140 } 141 if param.plh == nil { 142 if param.Fileparam.FileIndex >= len(param.FileList) { 143 proc.SetInputBatch(nil) 144 return true, nil 145 } 146 param.Fileparam.Filepath = param.FileList[param.Fileparam.FileIndex] 147 param.Fileparam.FileIndex++ 148 } 149 bat, err := ScanFileData(param, proc) 150 if err != nil { 151 param.Fileparam.End = true 152 return false, err 153 } 154 proc.SetInputBatch(bat) 155 if bat != nil { 156 anal.Output(bat, isLast) 157 anal.Alloc(int64(bat.Size())) 158 } 159 return false, nil 160 } 161 162 func containColname(col string) bool { 163 return strings.Contains(col, STATEMENT_ACCOUNT) || strings.Contains(col, catalog.ExternalFilePath) 164 } 165 166 func judgeContainColname(expr *plan.Expr) bool { 167 expr_F, ok := expr.Expr.(*plan.Expr_F) 168 if !ok { 169 return false 170 } 171 if expr_F.F.Func.ObjName == "or" { 172 flag := true 173 for i := 0; i < len(expr_F.F.Args); i++ { 174 flag = flag && judgeContainColname(expr_F.F.Args[i]) 175 } 176 return flag 177 } 178 expr_Col, ok := expr_F.F.Args[0].Expr.(*plan.Expr_Col) 179 if ok && containColname(expr_Col.Col.Name) { 180 return true 181 } 182 for _, arg := range expr_F.F.Args { 183 if judgeContainColname(arg) { 184 return true 185 } 186 } 187 return false 188 } 189 190 func getAccountCol(filepath string) string { 191 pathDir := strings.Split(filepath, "/") 192 if len(pathDir) < 2 { 193 return "" 194 } 195 return pathDir[1] 196 } 197 198 func makeFilepathBatch(node *plan.Node, proc *process.Process, filterList []*plan.Expr, fileList []string) *batch.Batch { 199 num := len(node.TableDef.Cols) 200 bat := &batch.Batch{ 201 Attrs: make([]string, num), 202 Vecs: make([]*vector.Vector, num), 203 Zs: make([]int64, len(fileList)), 204 } 205 for i := 0; i < num; i++ { 206 bat.Attrs[i] = node.TableDef.Cols[i].Name 207 if bat.Attrs[i] == STATEMENT_ACCOUNT { 208 typ := types.Type{ 209 Oid: types.T(node.TableDef.Cols[i].Typ.Id), 210 Width: node.TableDef.Cols[i].Typ.Width, 211 Scale: node.TableDef.Cols[i].Typ.Scale, 212 } 213 vec := vector.NewOriginal(typ) 214 vector.PreAlloc(vec, len(fileList), len(fileList), proc.Mp()) 215 vec.SetOriginal(false) 216 for j := 0; j < len(fileList); j++ { 217 vector.SetStringAt(vec, j, getAccountCol(fileList[j]), proc.Mp()) 218 } 219 bat.Vecs[i] = vec 220 } else if bat.Attrs[i] == catalog.ExternalFilePath { 221 typ := types.Type{ 222 Oid: types.T_varchar, 223 Width: types.MaxVarcharLen, 224 Scale: 0, 225 } 226 vec := vector.NewOriginal(typ) 227 vector.PreAlloc(vec, len(fileList), len(fileList), proc.Mp()) 228 vec.SetOriginal(false) 229 for j := 0; j < len(fileList); j++ { 230 vector.SetStringAt(vec, j, fileList[j], proc.Mp()) 231 } 232 bat.Vecs[i] = vec 233 } 234 } 235 for k := 0; k < len(fileList); k++ { 236 bat.Zs[k] = 1 237 } 238 return bat 239 } 240 241 func filterByAccountAndFilename(node *plan.Node, proc *process.Process, fileList []string, fileSize []int64) ([]string, []int64, error) { 242 filterList := make([]*plan.Expr, 0) 243 filterList2 := make([]*plan.Expr, 0) 244 for i := 0; i < len(node.FilterList); i++ { 245 if judgeContainColname(node.FilterList[i]) { 246 filterList = append(filterList, node.FilterList[i]) 247 } else { 248 filterList2 = append(filterList2, node.FilterList[i]) 249 } 250 } 251 if len(filterList) == 0 { 252 return fileList, fileSize, nil 253 } 254 bat := makeFilepathBatch(node, proc, filterList, fileList) 255 filter := colexec.RewriteFilterExprList(filterList) 256 vec, err := colexec.EvalExpr(bat, proc, filter) 257 if err != nil { 258 return nil, fileSize, err 259 } 260 fileListTmp := make([]string, 0) 261 fileSizeTmp := make([]int64, 0) 262 bs := vector.GetColumn[bool](vec) 263 for i := 0; i < len(bs); i++ { 264 if bs[i] { 265 fileListTmp = append(fileListTmp, fileList[i]) 266 fileSizeTmp = append(fileSizeTmp, fileSize[i]) 267 } 268 } 269 node.FilterList = filterList2 270 return fileListTmp, fileSizeTmp, nil 271 } 272 273 func FilterFileList(node *plan.Node, proc *process.Process, fileList []string, fileSize []int64) ([]string, []int64, error) { 274 return filterByAccountAndFilename(node, proc, fileList, fileSize) 275 } 276 277 func IsSysTable(dbName string, tableName string) bool { 278 if dbName == "system" { 279 return tableName == "statement_info" || tableName == "rawlog" 280 } else if dbName == "system_metrics" { 281 return tableName == "metric" 282 } 283 return false 284 } 285 286 func ReadFile(param *ExternalParam, proc *process.Process) (io.ReadCloser, error) { 287 if param.Extern.Local { 288 return io.NopCloser(proc.LoadLocalReader), nil 289 } 290 fs, readPath, err := plan2.GetForETLWithType(param.Extern, param.Fileparam.Filepath) 291 if err != nil { 292 return nil, err 293 } 294 var r io.ReadCloser 295 vec := fileservice.IOVector{ 296 FilePath: readPath, 297 Entries: []fileservice.IOEntry{ 298 0: { 299 Offset: 0, 300 Size: -1, 301 ReadCloserForRead: &r, 302 }, 303 }, 304 } 305 if param.Extern.Parallel { 306 vec.Entries[0].Offset = int64(param.FileOffset[param.Fileparam.FileIndex-1][0]) 307 vec.Entries[0].Size = int64(param.FileOffset[param.Fileparam.FileIndex-1][1] - param.FileOffset[param.Fileparam.FileIndex-1][0]) 308 } 309 if vec.Entries[0].Size == 0 || vec.Entries[0].Offset >= param.FileSize[param.Fileparam.FileIndex-1] { 310 return nil, nil 311 } 312 err = fs.Read(param.Ctx, &vec) 313 if err != nil { 314 return nil, err 315 } 316 return r, nil 317 } 318 319 func ReadFileOffset(param *tree.ExternParam, proc *process.Process, mcpu int, fileSize int64) ([][2]int, error) { 320 arr := make([][2]int, 0) 321 322 fs, readPath, err := plan2.GetForETLWithType(param, param.Filepath) 323 if err != nil { 324 return nil, err 325 } 326 var r io.ReadCloser 327 vec := fileservice.IOVector{ 328 FilePath: readPath, 329 Entries: []fileservice.IOEntry{ 330 0: { 331 Offset: 0, 332 Size: -1, 333 ReadCloserForRead: &r, 334 }, 335 }, 336 } 337 var tailSize []int64 338 var offset []int64 339 for i := 0; i < mcpu; i++ { 340 vec.Entries[0].Offset = int64(i) * (fileSize / int64(mcpu)) 341 if err = fs.Read(param.Ctx, &vec); err != nil { 342 return nil, err 343 } 344 r2 := bufio.NewReader(r) 345 line, _ := r2.ReadString('\n') 346 tailSize = append(tailSize, int64(len(line))) 347 offset = append(offset, vec.Entries[0].Offset) 348 } 349 350 start := 0 351 for i := 0; i < mcpu; i++ { 352 if i+1 < mcpu { 353 arr = append(arr, [2]int{start, int(offset[i+1] + tailSize[i+1])}) 354 start = int(offset[i+1] + tailSize[i+1]) 355 } else { 356 arr = append(arr, [2]int{start, -1}) 357 } 358 } 359 return arr, nil 360 } 361 362 func getCompressType(param *tree.ExternParam, filepath string) string { 363 if param.CompressType != "" && param.CompressType != tree.AUTO { 364 return param.CompressType 365 } 366 index := strings.LastIndex(filepath, ".") 367 if index == -1 { 368 return tree.NOCOMPRESS 369 } 370 tail := string([]byte(filepath)[index+1:]) 371 switch tail { 372 case "gz", "gzip": 373 return tree.GZIP 374 case "bz2", "bzip2": 375 return tree.BZIP2 376 case "lz4": 377 return tree.LZ4 378 default: 379 return tree.NOCOMPRESS 380 } 381 } 382 383 func getUnCompressReader(param *tree.ExternParam, filepath string, r io.ReadCloser) (io.ReadCloser, error) { 384 switch strings.ToLower(getCompressType(param, filepath)) { 385 case tree.NOCOMPRESS: 386 return r, nil 387 case tree.GZIP, tree.GZ: 388 r, err := gzip.NewReader(r) 389 if err != nil { 390 return nil, err 391 } 392 return r, nil 393 case tree.BZIP2, tree.BZ2: 394 return io.NopCloser(bzip2.NewReader(r)), nil 395 case tree.FLATE: 396 r = flate.NewReader(r) 397 return r, nil 398 case tree.ZLIB: 399 r, err := zlib.NewReader(r) 400 if err != nil { 401 return nil, err 402 } 403 return r, nil 404 case tree.LZ4: 405 return io.NopCloser(lz4.NewReader(r)), nil 406 case tree.LZW: 407 return nil, moerr.NewInternalError(param.Ctx, "the compress type '%s' is not support now", param.CompressType) 408 default: 409 return nil, moerr.NewInternalError(param.Ctx, "the compress type '%s' is not support now", param.CompressType) 410 } 411 } 412 413 func makeType(Cols []*plan.ColDef, index int) types.Type { 414 return types.New(types.T(Cols[index].Typ.Id), Cols[index].Typ.Width, Cols[index].Typ.Scale, Cols[index].Typ.Precision) 415 } 416 417 func makeBatch(param *ExternalParam, batchSize int, mp *mpool.MPool) *batch.Batch { 418 batchData := batch.New(true, param.Attrs) 419 //alloc space for vector 420 for i := 0; i < len(param.Attrs); i++ { 421 typ := makeType(param.Cols, i) 422 vec := vector.NewOriginal(typ) 423 vector.PreAlloc(vec, batchSize, batchSize, mp) 424 vec.SetOriginal(false) 425 batchData.Vecs[i] = vec 426 } 427 return batchData 428 } 429 430 func deleteEnclosed(param *ExternalParam, plh *ParseLineHandler) { 431 close := param.Extern.Tail.Fields.EnclosedBy 432 if close == '"' || close == 0 { 433 return 434 } 435 for rowIdx := 0; rowIdx < plh.batchSize; rowIdx++ { 436 Line := plh.simdCsvLineArray[rowIdx] 437 for i := 0; i < len(Line); i++ { 438 len := len(Line[i]) 439 if len < 2 { 440 continue 441 } 442 if Line[i][0] == close && Line[i][len-1] == close { 443 Line[i] = Line[i][1 : len-1] 444 } 445 } 446 } 447 } 448 449 func getRealAttrCnt(attrs []string) int { 450 cnt := 0 451 for i := 0; i < len(attrs); i++ { 452 if catalog.ContainExternalHidenCol(attrs[i]) { 453 cnt++ 454 } 455 } 456 return len(attrs) - cnt 457 } 458 459 func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Process) (*batch.Batch, error) { 460 bat := makeBatch(param, plh.batchSize, proc.Mp()) 461 var ( 462 Line []string 463 err error 464 ) 465 deleteEnclosed(param, plh) 466 unexpectEOF := false 467 for rowIdx := 0; rowIdx < plh.batchSize; rowIdx++ { 468 Line = plh.simdCsvLineArray[rowIdx] 469 if param.Extern.Format == tree.JSONLINE { 470 Line, err = transJson2Lines(proc.Ctx, Line[0], param.Attrs, param.Cols, param.Extern.JsonData, param) 471 if err != nil { 472 if errors.Is(err, io.ErrUnexpectedEOF) { 473 logutil.Infof("unexpected EOF, wait for next batch") 474 unexpectEOF = true 475 continue 476 } 477 return nil, err 478 } 479 plh.simdCsvLineArray[rowIdx] = Line 480 } 481 if param.ClusterTable != nil && param.ClusterTable.GetIsClusterTable() { 482 //the column account_id of the cluster table do need to be filled here 483 if len(Line)+1 < getRealAttrCnt(param.Attrs) { 484 return nil, moerr.NewInternalError(proc.Ctx, ColumnCntLargerErrorInfo()) 485 } 486 } else { 487 if !param.Extern.SysTable && len(Line) < getRealAttrCnt(param.Attrs) { 488 return nil, moerr.NewInternalError(proc.Ctx, ColumnCntLargerErrorInfo()) 489 } 490 } 491 err = getOneRowData(bat, Line, rowIdx, param, proc.Mp()) 492 if err != nil { 493 return nil, err 494 } 495 } 496 497 n := vector.Length(bat.Vecs[0]) 498 if unexpectEOF && n > 0 { 499 n-- 500 for i := 0; i < len(bat.Vecs); i++ { 501 newVec := vector.NewOriginal(bat.Vecs[i].Typ) 502 vector.PreAlloc(newVec, n, n, proc.Mp()) 503 newVec.Nsp = bat.Vecs[i].Nsp 504 for j := int64(0); j < int64(n); j++ { 505 if newVec.Nsp.Contains(uint64(j)) { 506 continue 507 } 508 err := vector.Copy(newVec, bat.Vecs[i], j, j, proc.Mp()) 509 if err != nil { 510 return nil, err 511 } 512 } 513 bat.Vecs[i].Free(proc.Mp()) 514 bat.Vecs[i] = newVec 515 } 516 } 517 sels := proc.Mp().GetSels() 518 if n > cap(sels) { 519 proc.Mp().PutSels(sels) 520 sels = make([]int64, n) 521 } 522 bat.Zs = sels[:n] 523 for k := 0; k < n; k++ { 524 bat.Zs[k] = 1 525 } 526 return bat, nil 527 } 528 529 // GetSimdcsvReader get file reader from external file 530 func GetSimdcsvReader(param *ExternalParam, proc *process.Process) (*ParseLineHandler, error) { 531 var err error 532 param.reader, err = ReadFile(param, proc) 533 if err != nil || param.reader == nil { 534 return nil, err 535 } 536 param.reader, err = getUnCompressReader(param.Extern, param.Fileparam.Filepath, param.reader) 537 if err != nil { 538 return nil, err 539 } 540 541 channelSize := 100 542 plh := &ParseLineHandler{} 543 plh.simdCsvGetParsedLinesChan = atomic.Value{} 544 plh.simdCsvGetParsedLinesChan.Store(make(chan simdcsv.LineOut, channelSize)) 545 if param.Extern.Tail.Fields == nil { 546 param.Extern.Tail.Fields = &tree.Fields{Terminated: ","} 547 } 548 if param.Extern.Format == tree.JSONLINE { 549 param.Extern.Tail.Fields.Terminated = "\t" 550 } 551 plh.simdCsvReader = simdcsv.NewReaderWithOptions(param.reader, 552 rune(param.Extern.Tail.Fields.Terminated[0]), 553 '#', 554 true, 555 false) 556 557 return plh, nil 558 } 559 560 func ScanCsvFile(param *ExternalParam, proc *process.Process) (*batch.Batch, error) { 561 var bat *batch.Batch 562 var err error 563 var cnt int 564 if param.plh == nil { 565 param.IgnoreLine = param.IgnoreLineTag 566 param.plh, err = GetSimdcsvReader(param, proc) 567 if err != nil || param.plh == nil { 568 return nil, err 569 } 570 } 571 plh := param.plh 572 plh.simdCsvLineArray = make([][]string, ONE_BATCH_MAX_ROW) 573 finish := false 574 plh.simdCsvLineArray, cnt, finish, err = plh.simdCsvReader.ReadLimitSize(ONE_BATCH_MAX_ROW, proc.Ctx, param.maxBatchSize, plh.simdCsvLineArray) 575 if err != nil { 576 return nil, err 577 } 578 579 if finish { 580 err := param.reader.Close() 581 if err != nil { 582 logutil.Errorf("close file failed. err:%v", err) 583 } 584 plh.simdCsvReader.Close() 585 param.plh = nil 586 param.Fileparam.FileFin++ 587 if param.Fileparam.FileFin >= param.Fileparam.FileCnt { 588 param.Fileparam.End = true 589 } 590 } 591 if param.IgnoreLine != 0 { 592 if !param.Extern.Parallel || param.FileOffset[param.Fileparam.FileIndex-1][0] == 0 { 593 if cnt >= param.IgnoreLine { 594 plh.simdCsvLineArray = plh.simdCsvLineArray[param.IgnoreLine:cnt] 595 cnt -= param.IgnoreLine 596 } else { 597 plh.simdCsvLineArray = nil 598 cnt = 0 599 } 600 param.IgnoreLine = 0 601 } 602 } 603 plh.batchSize = cnt 604 bat, err = GetBatchData(param, plh, proc) 605 if err != nil { 606 return nil, err 607 } 608 bat.Cnt = 1 609 return bat, nil 610 } 611 612 func getBatchFromZonemapFile(param *ExternalParam, proc *process.Process, objectReader objectio.Reader) (*batch.Batch, error) { 613 bat := makeBatch(param, 0, proc.Mp()) 614 if param.Zoneparam.offset >= len(param.Zoneparam.bs) { 615 return bat, nil 616 } 617 618 rows := 0 619 620 idxs := make([]uint16, len(param.Attrs)) 621 meta := param.Zoneparam.bs[param.Zoneparam.offset].GetMeta() 622 header := meta.GetHeader() 623 colCnt := header.GetColumnCount() 624 for i := 0; i < len(param.Attrs); i++ { 625 idxs[i] = uint16(param.Name2ColIndex[param.Attrs[i]]) 626 if param.Extern.SysTable && idxs[i] >= colCnt { 627 idxs[i] = 0 628 } 629 } 630 631 vec, err := objectReader.Read(param.Ctx, param.Zoneparam.bs[param.Zoneparam.offset].GetExtent(), idxs, proc.GetMPool()) 632 if err != nil { 633 return nil, err 634 } 635 for i := 0; i < len(param.Attrs); i++ { 636 var vecTmp *vector.Vector 637 if param.Extern.SysTable && uint16(param.Name2ColIndex[param.Attrs[i]]) >= colCnt { 638 vecTmp = vector.New(makeType(param.Cols, i)) 639 vector.PreAlloc(vecTmp, rows, rows, proc.GetMPool()) 640 for j := 0; j < rows; j++ { 641 nulls.Add(vecTmp.Nsp, uint64(j)) 642 } 643 } else if catalog.ContainExternalHidenCol(param.Attrs[i]) { 644 if rows == 0 { 645 vecTmp = vector.New(makeType(param.OriginCols, 0)) 646 err = vecTmp.Read(vec.Entries[i].Object.([]byte)) 647 if err != nil { 648 return nil, err 649 } 650 rows = vecTmp.Length() 651 } 652 vecTmp = vector.New(makeType(param.Cols, i)) 653 vector.PreAlloc(vecTmp, rows, rows, proc.GetMPool()) 654 for j := 0; j < rows; j++ { 655 err := vector.SetStringAt(vecTmp, j, param.Fileparam.Filepath, proc.GetMPool()) 656 if err != nil { 657 return nil, err 658 } 659 } 660 } else { 661 vecTmp = vector.New(bat.Vecs[i].Typ) 662 err = vecTmp.Read(vec.Entries[i].Object.([]byte)) 663 if err != nil { 664 return nil, err 665 } 666 rows = vecTmp.Length() 667 } 668 sels := make([]int64, vecTmp.Length()) 669 for j := 0; j < len(sels); j++ { 670 sels[j] = int64(j) 671 } 672 vector.Union(bat.Vecs[i], vecTmp, sels, true, proc.GetMPool()) 673 } 674 675 n := vector.Length(bat.Vecs[0]) 676 sels := proc.Mp().GetSels() 677 if n > cap(sels) { 678 proc.Mp().PutSels(sels) 679 sels = make([]int64, n) 680 } 681 bat.Zs = sels[:n] 682 for k := 0; k < n; k++ { 683 bat.Zs[k] = 1 684 } 685 if !param.Extern.QueryResult { 686 param.Zoneparam.offset++ 687 } 688 return bat, nil 689 } 690 691 func needRead(param *ExternalParam, proc *process.Process, objectReader objectio.Reader) bool { 692 if param.Zoneparam.offset >= len(param.Zoneparam.bs) { 693 return true 694 } 695 indexes, err := objectReader.ReadIndex(context.Background(), param.Zoneparam.bs[param.Zoneparam.offset].GetExtent(), 696 param.Filter.columns, objectio.ZoneMapType, proc.GetMPool()) 697 if err != nil { 698 return true 699 } 700 701 notReportErrCtx := errutil.ContextWithNoReport(proc.Ctx, true) 702 // if expr match no columns, just eval expr 703 if len(param.Filter.columns) == 0 { 704 bat := batch.NewWithSize(0) 705 defer bat.Clean(proc.Mp()) 706 ifNeed, err := plan2.EvalFilterExpr(notReportErrCtx, param.Filter.FilterExpr, bat, proc) 707 if err != nil { 708 return true 709 } 710 return ifNeed 711 } 712 713 dataLength := len(param.Filter.columns) 714 datas := make([][2]any, dataLength) 715 dataTypes := make([]uint8, dataLength) 716 for i := 0; i < dataLength; i++ { 717 idx := param.Filter.defColumns[i] 718 dataTypes[i] = uint8(param.Cols[idx].Typ.Id) 719 typ := types.T(dataTypes[i]).ToType() 720 721 zm := index.NewZoneMap(typ) 722 err = zm.Unmarshal(indexes[i].(*objectio.ZoneMap).GetData()) 723 if err != nil { 724 return true 725 } 726 min := zm.GetMin() 727 max := zm.GetMax() 728 if min == nil || max == nil { 729 return true 730 } 731 datas[i] = [2]any{min, max} 732 } 733 // use all min/max data to build []vectors. 734 buildVectors := plan2.BuildVectorsByData(datas, dataTypes, proc.Mp()) 735 bat := batch.NewWithSize(param.Filter.maxCol + 1) 736 defer bat.Clean(proc.Mp()) 737 for k, v := range param.Filter.columnMap { 738 for i, realIdx := range param.Filter.defColumns { 739 if int(realIdx) == v { 740 bat.SetVector(int32(k), buildVectors[i]) 741 break 742 } 743 } 744 } 745 bat.SetZs(buildVectors[0].Length(), proc.Mp()) 746 747 ifNeed, err := plan2.EvalFilterExpr(notReportErrCtx, param.Filter.FilterExpr, bat, proc) 748 if err != nil { 749 return true 750 } 751 return ifNeed 752 } 753 754 func getZonemapBatch(param *ExternalParam, proc *process.Process, size int64, objectReader objectio.Reader) (*batch.Batch, error) { 755 var err error 756 if param.Extern.QueryResult { 757 param.Zoneparam.bs, err = objectReader.ReadAllMeta(param.Ctx, size, proc.GetMPool()) 758 if err != nil { 759 return nil, err 760 } 761 } else if param.Zoneparam.bs == nil { 762 param.plh = &ParseLineHandler{} 763 var err error 764 param.Zoneparam.bs, err = objectReader.ReadAllMeta(param.Ctx, size, proc.GetMPool()) 765 if err != nil { 766 return nil, err 767 } 768 } 769 if param.Zoneparam.offset >= len(param.Zoneparam.bs) { 770 bat := makeBatch(param, 0, proc.Mp()) 771 return bat, nil 772 } 773 774 if param.Filter.exprMono { 775 for !needRead(param, proc, objectReader) { 776 param.Zoneparam.offset++ 777 } 778 return getBatchFromZonemapFile(param, proc, objectReader) 779 } else { 780 return getBatchFromZonemapFile(param, proc, objectReader) 781 } 782 } 783 784 func ScanZonemapFile(param *ExternalParam, proc *process.Process) (*batch.Batch, error) { 785 if param.Filter.objectReader == nil || param.Extern.QueryResult { 786 dir, _ := filepath.Split(param.Fileparam.Filepath) 787 var service fileservice.FileService 788 var err error 789 var p fileservice.Path 790 791 if param.Extern.QueryResult { 792 service = param.Extern.FileService 793 } else { 794 795 // format filepath for local file 796 fp := param.Extern.Filepath 797 if p, err = fileservice.ParsePath(param.Extern.Filepath); err != nil { 798 return nil, err 799 } else if p.Service == "" { 800 if os.IsPathSeparator(filepath.Clean(param.Extern.Filepath)[0]) { 801 // absolute path 802 fp = "/" 803 } else { 804 // relative path. 805 // PS: this loop never trigger, caused by ReadDir() only support local file with absolute path 806 fp = "." 807 } 808 } 809 810 service, _, err = plan2.GetForETLWithType(param.Extern, fp) 811 if err != nil { 812 return nil, err 813 } 814 } 815 _, ok := param.Filter.File2Size[param.Fileparam.Filepath] 816 if !ok { 817 fs := objectio.NewObjectFS(service, dir) 818 dirs, err := fs.ListDir(dir) 819 if err != nil { 820 return nil, err 821 } 822 for i := 0; i < len(dirs); i++ { 823 param.Filter.File2Size[dir+dirs[i].Name] = dirs[i].Size 824 } 825 } 826 827 param.Filter.objectReader, err = objectio.NewObjectReader(param.Fileparam.Filepath, service) 828 if err != nil { 829 return nil, err 830 } 831 } 832 833 size, ok := param.Filter.File2Size[param.Fileparam.Filepath] 834 if !ok { 835 return nil, moerr.NewInternalErrorNoCtx("can' t find the filepath %s", param.Fileparam.Filepath) 836 } 837 bat, err := getZonemapBatch(param, proc, size, param.Filter.objectReader) 838 if err != nil { 839 return nil, err 840 } 841 842 if param.Zoneparam.offset >= len(param.Zoneparam.bs) { 843 param.Filter.objectReader = nil 844 param.Zoneparam.bs = nil 845 param.plh = nil 846 param.Fileparam.FileFin++ 847 if param.Fileparam.FileFin >= param.Fileparam.FileCnt { 848 param.Fileparam.End = true 849 } 850 param.Zoneparam.offset = 0 851 } 852 return bat, nil 853 } 854 855 // ScanFileData read batch data from external file 856 func ScanFileData(param *ExternalParam, proc *process.Process) (*batch.Batch, error) { 857 if strings.HasSuffix(param.Fileparam.Filepath, ".tae") || param.Extern.QueryResult { 858 return ScanZonemapFile(param, proc) 859 } else { 860 return ScanCsvFile(param, proc) 861 } 862 } 863 864 func transJson2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, jsonData string, param *ExternalParam) ([]string, error) { 865 switch jsonData { 866 case tree.OBJECT: 867 return transJsonObject2Lines(ctx, str, attrs, cols, param) 868 case tree.ARRAY: 869 return transJsonArray2Lines(ctx, str, attrs, cols, param) 870 default: 871 return nil, moerr.NewNotSupported(ctx, "the jsonline format '%s' is not support now", jsonData) 872 } 873 } 874 875 func transJsonObject2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, param *ExternalParam) ([]string, error) { 876 var ( 877 err error 878 res = make([]string, 0, len(attrs)) 879 ) 880 if param.prevStr != "" { 881 str = param.prevStr + str 882 param.prevStr = "" 883 } 884 var jsonMap map[string]interface{} 885 var decoder = json.NewDecoder(bytes.NewReader([]byte(str))) 886 decoder.UseNumber() 887 err = decoder.Decode(&jsonMap) 888 if err != nil { 889 logutil.Errorf("json unmarshal err:%v", err) 890 param.prevStr = str 891 return nil, err 892 } 893 if len(jsonMap) < len(attrs) { 894 return nil, moerr.NewInternalError(ctx, ColumnCntLargerErrorInfo()) 895 } 896 for idx, attr := range attrs { 897 if val, ok := jsonMap[attr]; ok { 898 if val == nil { 899 res = append(res, NULL_FLAG) 900 continue 901 } 902 tp := cols[idx].Typ.Id 903 if tp != int32(types.T_json) { 904 res = append(res, fmt.Sprintf("%v", val)) 905 continue 906 } 907 var bj bytejson.ByteJson 908 err = bj.UnmarshalObject(val) 909 if err != nil { 910 return nil, err 911 } 912 dt, err := bj.Marshal() 913 if err != nil { 914 return nil, err 915 } 916 res = append(res, string(dt)) 917 } else { 918 return nil, moerr.NewInvalidInput(ctx, "the attr %s is not in json", attr) 919 } 920 } 921 return res, nil 922 } 923 924 func transJsonArray2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, param *ExternalParam) ([]string, error) { 925 var ( 926 err error 927 res = make([]string, 0, len(attrs)) 928 ) 929 if param.prevStr != "" { 930 str = param.prevStr + str 931 param.prevStr = "" 932 } 933 var jsonArray []interface{} 934 var decoder = json.NewDecoder(bytes.NewReader([]byte(str))) 935 decoder.UseNumber() 936 err = decoder.Decode(&jsonArray) 937 if err != nil { 938 param.prevStr = str 939 return nil, err 940 } 941 if len(jsonArray) < len(attrs) { 942 return nil, moerr.NewInternalError(ctx, ColumnCntLargerErrorInfo()) 943 } 944 for idx, val := range jsonArray { 945 if val == nil { 946 res = append(res, NULL_FLAG) 947 continue 948 } 949 tp := cols[idx].Typ.Id 950 if tp != int32(types.T_json) { 951 res = append(res, fmt.Sprintf("%v", val)) 952 continue 953 } 954 var bj bytejson.ByteJson 955 err = bj.UnmarshalObject(val) 956 if err != nil { 957 return nil, err 958 } 959 dt, err := bj.Marshal() 960 if err != nil { 961 return nil, err 962 } 963 res = append(res, string(dt)) 964 } 965 return res, nil 966 } 967 968 func getNullFlag(param *ExternalParam, attr, field string) bool { 969 list := param.Extern.NullMap[attr] 970 for i := 0; i < len(list); i++ { 971 field = strings.ToLower(field) 972 if list[i] == field { 973 return true 974 } 975 } 976 return false 977 } 978 979 const NULL_FLAG = "\\N" 980 981 func judgeInteger(field string) bool { 982 for i := 0; i < len(field); i++ { 983 if field[i] == '-' || field[i] == '+' { 984 continue 985 } 986 if field[i] > '9' || field[i] < '0' { 987 return false 988 } 989 } 990 return true 991 } 992 993 func getStrFromLine(Line []string, colIdx int, param *ExternalParam) string { 994 if catalog.ContainExternalHidenCol(param.Attrs[colIdx]) { 995 return param.Fileparam.Filepath 996 } else { 997 var str string 998 if param.Extern.SysTable && int(param.Name2ColIndex[param.Attrs[colIdx]]) >= len(Line) { 999 str = "\\N" 1000 } else { 1001 str = Line[param.Name2ColIndex[param.Attrs[colIdx]]] 1002 } 1003 if param.Extern.Tail.Fields.EnclosedBy != 0 { 1004 tmp := strings.TrimSpace(str) 1005 if len(tmp) >= 2 && tmp[0] == param.Extern.Tail.Fields.EnclosedBy && tmp[len(tmp)-1] == param.Extern.Tail.Fields.EnclosedBy { 1006 return tmp[1 : len(tmp)-1] 1007 } 1008 } 1009 return str 1010 } 1011 } 1012 1013 func getOneRowData(bat *batch.Batch, Line []string, rowIdx int, param *ExternalParam, mp *mpool.MPool) error { 1014 for colIdx := range param.Attrs { 1015 //for cluster table, the column account_id need not be filled here 1016 if param.ClusterTable.GetIsClusterTable() && int(param.ClusterTable.GetColumnIndexOfAccountId()) == colIdx { 1017 continue 1018 } 1019 field := getStrFromLine(Line, colIdx, param) 1020 id := types.T(param.Cols[colIdx].Typ.Id) 1021 if id != types.T_char && id != types.T_varchar && id != types.T_json && id != types.T_blob && id != types.T_text { 1022 field = strings.TrimSpace(field) 1023 } 1024 vec := bat.Vecs[colIdx] 1025 isNullOrEmpty := field == NULL_FLAG 1026 if id != types.T_char && id != types.T_varchar && id != types.T_json && id != types.T_blob && id != types.T_text { 1027 isNullOrEmpty = isNullOrEmpty || len(field) == 0 1028 } 1029 isNullOrEmpty = isNullOrEmpty || (getNullFlag(param, param.Attrs[colIdx], field)) 1030 switch id { 1031 case types.T_bool: 1032 cols := vector.MustTCols[bool](vec) 1033 if isNullOrEmpty { 1034 nulls.Add(vec.Nsp, uint64(rowIdx)) 1035 } else { 1036 if field == "true" || field == "1" { 1037 cols[rowIdx] = true 1038 } else if field == "false" || field == "0" { 1039 cols[rowIdx] = false 1040 } else { 1041 return moerr.NewInternalError(param.Ctx, "the input value '%s' is not bool type for column %d", field, colIdx) 1042 } 1043 } 1044 case types.T_int8: 1045 cols := vector.MustTCols[int8](vec) 1046 if isNullOrEmpty { 1047 nulls.Add(vec.Nsp, uint64(rowIdx)) 1048 } else { 1049 if judgeInteger(field) { 1050 d, err := strconv.ParseInt(field, 10, 8) 1051 if err != nil { 1052 logutil.Errorf("parse field[%v] err:%v", field, err) 1053 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int8 type for column %d", field, colIdx) 1054 } 1055 cols[rowIdx] = int8(d) 1056 } else { 1057 d, err := strconv.ParseFloat(field, 64) 1058 if err != nil || d < math.MinInt8 || d > math.MaxInt8 { 1059 logutil.Errorf("parse field[%v] err:%v", field, err) 1060 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int8 type for column %d", field, colIdx) 1061 } 1062 cols[rowIdx] = int8(d) 1063 } 1064 } 1065 case types.T_int16: 1066 cols := vector.MustTCols[int16](vec) 1067 if isNullOrEmpty { 1068 nulls.Add(vec.Nsp, uint64(rowIdx)) 1069 } else { 1070 if judgeInteger(field) { 1071 d, err := strconv.ParseInt(field, 10, 16) 1072 if err != nil { 1073 logutil.Errorf("parse field[%v] err:%v", field, err) 1074 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int16 type for column %d", field, colIdx) 1075 } 1076 cols[rowIdx] = int16(d) 1077 } else { 1078 d, err := strconv.ParseFloat(field, 64) 1079 if err != nil || d < math.MinInt16 || d > math.MaxInt16 { 1080 logutil.Errorf("parse field[%v] err:%v", field, err) 1081 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int16 type for column %d", field, colIdx) 1082 } 1083 cols[rowIdx] = int16(d) 1084 } 1085 } 1086 case types.T_int32: 1087 cols := vector.MustTCols[int32](vec) 1088 if isNullOrEmpty { 1089 nulls.Add(vec.Nsp, uint64(rowIdx)) 1090 } else { 1091 if judgeInteger(field) { 1092 d, err := strconv.ParseInt(field, 10, 32) 1093 if err != nil { 1094 logutil.Errorf("parse field[%v] err:%v", field, err) 1095 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int32 type for column %d", field, colIdx) 1096 } 1097 cols[rowIdx] = int32(d) 1098 } else { 1099 d, err := strconv.ParseFloat(field, 64) 1100 if err != nil || d < math.MinInt32 || d > math.MaxInt32 { 1101 logutil.Errorf("parse field[%v] err:%v", field, err) 1102 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int32 type for column %d", field, colIdx) 1103 } 1104 cols[rowIdx] = int32(d) 1105 } 1106 } 1107 case types.T_int64: 1108 cols := vector.MustTCols[int64](vec) 1109 if isNullOrEmpty { 1110 nulls.Add(vec.Nsp, uint64(rowIdx)) 1111 } else { 1112 if judgeInteger(field) { 1113 d, err := strconv.ParseInt(field, 10, 64) 1114 if err != nil { 1115 logutil.Errorf("parse field[%v] err:%v", field, err) 1116 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int64 type for column %d", field, colIdx) 1117 } 1118 cols[rowIdx] = d 1119 } else { 1120 d, err := strconv.ParseFloat(field, 64) 1121 if err != nil || d < math.MinInt64 || d > math.MaxInt64 { 1122 logutil.Errorf("parse field[%v] err:%v", field, err) 1123 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int64 type for column %d", field, colIdx) 1124 } 1125 cols[rowIdx] = int64(d) 1126 } 1127 } 1128 case types.T_uint8: 1129 cols := vector.MustTCols[uint8](vec) 1130 if isNullOrEmpty { 1131 nulls.Add(vec.Nsp, uint64(rowIdx)) 1132 } else { 1133 if judgeInteger(field) { 1134 d, err := strconv.ParseUint(field, 10, 8) 1135 if err != nil { 1136 logutil.Errorf("parse field[%v] err:%v", field, err) 1137 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint8 type for column %d", field, colIdx) 1138 } 1139 cols[rowIdx] = uint8(d) 1140 } else { 1141 d, err := strconv.ParseFloat(field, 64) 1142 if err != nil || d < 0 || d > math.MaxUint8 { 1143 logutil.Errorf("parse field[%v] err:%v", field, err) 1144 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint8 type for column %d", field, colIdx) 1145 } 1146 cols[rowIdx] = uint8(d) 1147 } 1148 } 1149 case types.T_uint16: 1150 cols := vector.MustTCols[uint16](vec) 1151 if isNullOrEmpty { 1152 nulls.Add(vec.Nsp, uint64(rowIdx)) 1153 } else { 1154 if judgeInteger(field) { 1155 d, err := strconv.ParseUint(field, 10, 16) 1156 if err != nil { 1157 logutil.Errorf("parse field[%v] err:%v", field, err) 1158 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint16 type for column %d", field, colIdx) 1159 } 1160 cols[rowIdx] = uint16(d) 1161 } else { 1162 d, err := strconv.ParseFloat(field, 64) 1163 if err != nil || d < 0 || d > math.MaxUint16 { 1164 logutil.Errorf("parse field[%v] err:%v", field, err) 1165 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint16 type for column %d", field, colIdx) 1166 } 1167 cols[rowIdx] = uint16(d) 1168 } 1169 } 1170 case types.T_uint32: 1171 cols := vector.MustTCols[uint32](vec) 1172 if isNullOrEmpty { 1173 nulls.Add(vec.Nsp, uint64(rowIdx)) 1174 } else { 1175 if judgeInteger(field) { 1176 d, err := strconv.ParseUint(field, 10, 32) 1177 if err != nil { 1178 logutil.Errorf("parse field[%v] err:%v", field, err) 1179 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint32 type for column %d", field, colIdx) 1180 } 1181 cols[rowIdx] = uint32(d) 1182 } else { 1183 d, err := strconv.ParseFloat(field, 64) 1184 if err != nil || d < 0 || d > math.MaxUint32 { 1185 logutil.Errorf("parse field[%v] err:%v", field, err) 1186 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint32 type for column %d", field, colIdx) 1187 } 1188 cols[rowIdx] = uint32(d) 1189 } 1190 } 1191 case types.T_uint64: 1192 cols := vector.MustTCols[uint64](vec) 1193 if isNullOrEmpty { 1194 nulls.Add(vec.Nsp, uint64(rowIdx)) 1195 } else { 1196 if judgeInteger(field) { 1197 d, err := strconv.ParseUint(field, 10, 64) 1198 if err != nil { 1199 logutil.Errorf("parse field[%v] err:%v", field, err) 1200 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint64 type for column %d", field, colIdx) 1201 } 1202 cols[rowIdx] = d 1203 } else { 1204 d, err := strconv.ParseFloat(field, 64) 1205 if err != nil || d < 0 || d > math.MaxUint64 { 1206 logutil.Errorf("parse field[%v] err:%v", field, err) 1207 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint64 type for column %d", field, colIdx) 1208 } 1209 cols[rowIdx] = uint64(d) 1210 } 1211 } 1212 case types.T_float32: 1213 cols := vector.MustTCols[float32](vec) 1214 if isNullOrEmpty { 1215 nulls.Add(vec.Nsp, uint64(rowIdx)) 1216 } else { 1217 // origin float32 data type 1218 if vec.Typ.Precision < 0 { 1219 d, err := strconv.ParseFloat(field, 32) 1220 if err != nil { 1221 logutil.Errorf("parse field[%v] err:%v", field, err) 1222 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float32 type for column %d", field, colIdx) 1223 } 1224 cols[rowIdx] = float32(d) 1225 continue 1226 } 1227 d, err := types.Decimal128_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Precision) 1228 if err != nil { 1229 logutil.Errorf("parse field[%v] err:%v", field, err) 1230 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float32 type for column %d", field, colIdx) 1231 } 1232 cols[rowIdx] = float32(d.ToFloat64()) 1233 } 1234 case types.T_float64: 1235 cols := vector.MustTCols[float64](vec) 1236 if isNullOrEmpty { 1237 nulls.Add(vec.Nsp, uint64(rowIdx)) 1238 } else { 1239 // origin float64 data type 1240 if vec.Typ.Precision < 0 { 1241 d, err := strconv.ParseFloat(field, 64) 1242 if err != nil { 1243 logutil.Errorf("parse field[%v] err:%v", field, err) 1244 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float64 type for column %d", field, colIdx) 1245 } 1246 cols[rowIdx] = d 1247 continue 1248 } 1249 d, err := types.Decimal128_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Precision) 1250 if err != nil { 1251 logutil.Errorf("parse field[%v] err:%v", field, err) 1252 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float64 type for column %d", field, colIdx) 1253 } 1254 cols[rowIdx] = d.ToFloat64() 1255 } 1256 case types.T_char, types.T_varchar, types.T_blob, types.T_text: 1257 if isNullOrEmpty { 1258 nulls.Add(vec.Nsp, uint64(rowIdx)) 1259 } else { 1260 // XXX Memory accounting? 1261 err := vector.SetStringAt(vec, rowIdx, field, mp) 1262 if err != nil { 1263 return err 1264 } 1265 } 1266 case types.T_json: 1267 if isNullOrEmpty { 1268 nulls.Add(vec.Nsp, uint64(rowIdx)) 1269 } else { 1270 var ( 1271 byteJson bytejson.ByteJson 1272 err error 1273 jsonBytes []byte 1274 ) 1275 if param.Extern.Format == tree.CSV { 1276 byteJson, err = types.ParseStringToByteJson(field) 1277 if err != nil { 1278 logutil.Errorf("parse field[%v] err:%v", field, err) 1279 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not json type for column %d", field, colIdx) 1280 } 1281 jsonBytes, err = types.EncodeJson(byteJson) 1282 if err != nil { 1283 logutil.Errorf("encode json[%v] err:%v", field, err) 1284 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not json type for column %d", field, colIdx) 1285 } 1286 } else { //jsonline 1287 jsonBytes = []byte(field) 1288 } 1289 err = vector.SetBytesAt(vec, rowIdx, jsonBytes, mp) 1290 if err != nil { 1291 return err 1292 } 1293 } 1294 case types.T_date: 1295 cols := vector.MustTCols[types.Date](vec) 1296 if isNullOrEmpty { 1297 nulls.Add(vec.Nsp, uint64(rowIdx)) 1298 } else { 1299 d, err := types.ParseDateCast(field) 1300 if err != nil { 1301 logutil.Errorf("parse field[%v] err:%v", field, err) 1302 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Date type for column %d", field, colIdx) 1303 } 1304 cols[rowIdx] = d 1305 } 1306 case types.T_time: 1307 cols := vector.MustTCols[types.Time](vec) 1308 if isNullOrEmpty { 1309 nulls.Add(vec.Nsp, uint64(rowIdx)) 1310 } else { 1311 d, err := types.ParseTime(field, vec.Typ.Precision) 1312 if err != nil { 1313 logutil.Errorf("parse field[%v] err:%v", field, err) 1314 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Time type for column %d", field, colIdx) 1315 } 1316 cols[rowIdx] = d 1317 } 1318 case types.T_datetime: 1319 cols := vector.MustTCols[types.Datetime](vec) 1320 if isNullOrEmpty { 1321 nulls.Add(vec.Nsp, uint64(rowIdx)) 1322 } else { 1323 d, err := types.ParseDatetime(field, vec.Typ.Precision) 1324 if err != nil { 1325 logutil.Errorf("parse field[%v] err:%v", field, err) 1326 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Datetime type for column %d", field, colIdx) 1327 } 1328 cols[rowIdx] = d 1329 } 1330 case types.T_decimal64: 1331 cols := vector.MustTCols[types.Decimal64](vec) 1332 if isNullOrEmpty { 1333 nulls.Add(vec.Nsp, uint64(rowIdx)) 1334 } else { 1335 d, err := types.Decimal64_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Scale) 1336 if err != nil { 1337 // we tolerate loss of digits. 1338 if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) { 1339 logutil.Errorf("parse field[%v] err:%v", field, err) 1340 return moerr.NewInternalError(param.Ctx, "the input value '%v' is invalid Decimal64 type for column %d", field, colIdx) 1341 } 1342 } 1343 cols[rowIdx] = d 1344 } 1345 case types.T_decimal128: 1346 cols := vector.MustTCols[types.Decimal128](vec) 1347 if isNullOrEmpty { 1348 nulls.Add(vec.Nsp, uint64(rowIdx)) 1349 } else { 1350 d, err := types.Decimal128_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Scale) 1351 if err != nil { 1352 // we tolerate loss of digits. 1353 if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) { 1354 logutil.Errorf("parse field[%v] err:%v", field, err) 1355 return moerr.NewInternalError(param.Ctx, "the input value '%v' is invalid Decimal128 type for column %d", field, colIdx) 1356 } 1357 } 1358 cols[rowIdx] = d 1359 } 1360 case types.T_timestamp: 1361 cols := vector.MustTCols[types.Timestamp](vec) 1362 if isNullOrEmpty { 1363 nulls.Add(vec.Nsp, uint64(rowIdx)) 1364 } else { 1365 t := time.Local 1366 d, err := types.ParseTimestamp(t, field, vec.Typ.Precision) 1367 if err != nil { 1368 logutil.Errorf("parse field[%v] err:%v", field, err) 1369 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Timestamp type for column %d", field, colIdx) 1370 } 1371 cols[rowIdx] = d 1372 } 1373 case types.T_uuid: 1374 cols := vector.MustTCols[types.Uuid](vec) 1375 if isNullOrEmpty { 1376 nulls.Add(vec.Nsp, uint64(rowIdx)) 1377 } else { 1378 d, err := types.ParseUuid(field) 1379 if err != nil { 1380 logutil.Errorf("parse field[%v] err:%v", field, err) 1381 return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uuid type for column %d", field, colIdx) 1382 } 1383 cols[rowIdx] = d 1384 } 1385 default: 1386 return moerr.NewInternalError(param.Ctx, "the value type %d is not support now", param.Cols[rowIdx].Typ.Id) 1387 } 1388 } 1389 return nil 1390 }