github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/external/external.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package external
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"compress/bzip2"
    21  	"compress/flate"
    22  	"compress/gzip"
    23  	"compress/zlib"
    24  	"context"
    25  	"encoding/json"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"math"
    30  	"os"
    31  	"path/filepath"
    32  	"strconv"
    33  	"strings"
    34  	"sync/atomic"
    35  	"time"
    36  
    37  	"github.com/matrixorigin/matrixone/pkg/util/errutil"
    38  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/index"
    39  
    40  	"github.com/matrixorigin/matrixone/pkg/catalog"
    41  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    42  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    43  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    44  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    45  	"github.com/matrixorigin/matrixone/pkg/container/bytejson"
    46  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    47  	"github.com/matrixorigin/matrixone/pkg/container/types"
    48  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    49  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    50  	"github.com/matrixorigin/matrixone/pkg/logutil"
    51  	"github.com/matrixorigin/matrixone/pkg/objectio"
    52  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    53  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    54  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    55  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    56  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    57  	"github.com/matrixorigin/simdcsv"
    58  	"github.com/pierrec/lz4"
    59  )
    60  
    61  var (
    62  	ONE_BATCH_MAX_ROW  = 40000
    63  	S3_PARALLEL_MAXNUM = 10
    64  )
    65  
    66  var (
    67  	STATEMENT_ACCOUNT = "account"
    68  )
    69  
    70  func String(arg any, buf *bytes.Buffer) {
    71  	buf.WriteString("external output")
    72  }
    73  
    74  func Prepare(proc *process.Process, arg any) error {
    75  	param := arg.(*Argument).Es
    76  	if proc.Lim.MaxMsgSize == 0 {
    77  		param.maxBatchSize = uint64(morpc.GetMessageSize())
    78  	} else {
    79  		param.maxBatchSize = proc.Lim.MaxMsgSize
    80  	}
    81  	param.maxBatchSize = uint64(float64(param.maxBatchSize) * 0.6)
    82  	if param.Extern.Format == tree.JSONLINE {
    83  		if param.Extern.JsonData != tree.OBJECT && param.Extern.JsonData != tree.ARRAY {
    84  			param.Fileparam.End = true
    85  			return moerr.NewNotSupported(proc.Ctx, "the jsonline format '%s' is not supported now", param.Extern.JsonData)
    86  		}
    87  	}
    88  	param.Extern.FileService = proc.FileService
    89  	param.Extern.Ctx = proc.Ctx
    90  	param.IgnoreLineTag = int(param.Extern.Tail.IgnoredLines)
    91  	param.IgnoreLine = param.IgnoreLineTag
    92  	if len(param.FileList) == 0 {
    93  		logutil.Warnf("no such file '%s'", param.Extern.Filepath)
    94  		param.Fileparam.End = true
    95  	}
    96  	param.Fileparam.FileCnt = len(param.FileList)
    97  	param.Ctx = proc.Ctx
    98  	param.Zoneparam = &ZonemapFileparam{}
    99  	name2ColIndex := make(map[string]int32, len(param.Cols))
   100  	for i := 0; i < len(param.Cols); i++ {
   101  		name2ColIndex[param.Cols[i].Name] = int32(i)
   102  	}
   103  	param.tableDef = &plan.TableDef{
   104  		Name2ColIndex: name2ColIndex,
   105  	}
   106  	var columns []int
   107  	param.Filter.columnMap, columns, param.Filter.maxCol = plan2.GetColumnsByExpr(param.Filter.FilterExpr, param.tableDef)
   108  	param.Filter.columns = make([]uint16, len(columns))
   109  	param.Filter.defColumns = make([]uint16, len(columns))
   110  	for i := 0; i < len(columns); i++ {
   111  		col := param.Cols[columns[i]]
   112  		param.Filter.columns[i] = uint16(param.Name2ColIndex[col.Name])
   113  		param.Filter.defColumns[i] = uint16(columns[i])
   114  	}
   115  
   116  	param.Filter.exprMono = plan2.CheckExprIsMonotonic(proc.Ctx, param.Filter.FilterExpr)
   117  	param.Filter.File2Size = make(map[string]int64)
   118  	return nil
   119  }
   120  
   121  func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) {
   122  	select {
   123  	case <-proc.Ctx.Done():
   124  		proc.SetInputBatch(nil)
   125  		return true, nil
   126  	default:
   127  	}
   128  	t1 := time.Now()
   129  	anal := proc.GetAnalyze(idx)
   130  	anal.Start()
   131  	defer func() {
   132  		anal.Stop()
   133  		anal.AddScanTime(t1)
   134  	}()
   135  	anal.Input(nil, isFirst)
   136  	param := arg.(*Argument).Es
   137  	if param.Fileparam.End {
   138  		proc.SetInputBatch(nil)
   139  		return true, nil
   140  	}
   141  	if param.plh == nil {
   142  		if param.Fileparam.FileIndex >= len(param.FileList) {
   143  			proc.SetInputBatch(nil)
   144  			return true, nil
   145  		}
   146  		param.Fileparam.Filepath = param.FileList[param.Fileparam.FileIndex]
   147  		param.Fileparam.FileIndex++
   148  	}
   149  	bat, err := ScanFileData(param, proc)
   150  	if err != nil {
   151  		param.Fileparam.End = true
   152  		return false, err
   153  	}
   154  	proc.SetInputBatch(bat)
   155  	if bat != nil {
   156  		anal.Output(bat, isLast)
   157  		anal.Alloc(int64(bat.Size()))
   158  	}
   159  	return false, nil
   160  }
   161  
   162  func containColname(col string) bool {
   163  	return strings.Contains(col, STATEMENT_ACCOUNT) || strings.Contains(col, catalog.ExternalFilePath)
   164  }
   165  
   166  func judgeContainColname(expr *plan.Expr) bool {
   167  	expr_F, ok := expr.Expr.(*plan.Expr_F)
   168  	if !ok {
   169  		return false
   170  	}
   171  	if expr_F.F.Func.ObjName == "or" {
   172  		flag := true
   173  		for i := 0; i < len(expr_F.F.Args); i++ {
   174  			flag = flag && judgeContainColname(expr_F.F.Args[i])
   175  		}
   176  		return flag
   177  	}
   178  	expr_Col, ok := expr_F.F.Args[0].Expr.(*plan.Expr_Col)
   179  	if ok && containColname(expr_Col.Col.Name) {
   180  		return true
   181  	}
   182  	for _, arg := range expr_F.F.Args {
   183  		if judgeContainColname(arg) {
   184  			return true
   185  		}
   186  	}
   187  	return false
   188  }
   189  
   190  func getAccountCol(filepath string) string {
   191  	pathDir := strings.Split(filepath, "/")
   192  	if len(pathDir) < 2 {
   193  		return ""
   194  	}
   195  	return pathDir[1]
   196  }
   197  
   198  func makeFilepathBatch(node *plan.Node, proc *process.Process, filterList []*plan.Expr, fileList []string) *batch.Batch {
   199  	num := len(node.TableDef.Cols)
   200  	bat := &batch.Batch{
   201  		Attrs: make([]string, num),
   202  		Vecs:  make([]*vector.Vector, num),
   203  		Zs:    make([]int64, len(fileList)),
   204  	}
   205  	for i := 0; i < num; i++ {
   206  		bat.Attrs[i] = node.TableDef.Cols[i].Name
   207  		if bat.Attrs[i] == STATEMENT_ACCOUNT {
   208  			typ := types.Type{
   209  				Oid:   types.T(node.TableDef.Cols[i].Typ.Id),
   210  				Width: node.TableDef.Cols[i].Typ.Width,
   211  				Scale: node.TableDef.Cols[i].Typ.Scale,
   212  			}
   213  			vec := vector.NewOriginal(typ)
   214  			vector.PreAlloc(vec, len(fileList), len(fileList), proc.Mp())
   215  			vec.SetOriginal(false)
   216  			for j := 0; j < len(fileList); j++ {
   217  				vector.SetStringAt(vec, j, getAccountCol(fileList[j]), proc.Mp())
   218  			}
   219  			bat.Vecs[i] = vec
   220  		} else if bat.Attrs[i] == catalog.ExternalFilePath {
   221  			typ := types.Type{
   222  				Oid:   types.T_varchar,
   223  				Width: types.MaxVarcharLen,
   224  				Scale: 0,
   225  			}
   226  			vec := vector.NewOriginal(typ)
   227  			vector.PreAlloc(vec, len(fileList), len(fileList), proc.Mp())
   228  			vec.SetOriginal(false)
   229  			for j := 0; j < len(fileList); j++ {
   230  				vector.SetStringAt(vec, j, fileList[j], proc.Mp())
   231  			}
   232  			bat.Vecs[i] = vec
   233  		}
   234  	}
   235  	for k := 0; k < len(fileList); k++ {
   236  		bat.Zs[k] = 1
   237  	}
   238  	return bat
   239  }
   240  
   241  func filterByAccountAndFilename(node *plan.Node, proc *process.Process, fileList []string, fileSize []int64) ([]string, []int64, error) {
   242  	filterList := make([]*plan.Expr, 0)
   243  	filterList2 := make([]*plan.Expr, 0)
   244  	for i := 0; i < len(node.FilterList); i++ {
   245  		if judgeContainColname(node.FilterList[i]) {
   246  			filterList = append(filterList, node.FilterList[i])
   247  		} else {
   248  			filterList2 = append(filterList2, node.FilterList[i])
   249  		}
   250  	}
   251  	if len(filterList) == 0 {
   252  		return fileList, fileSize, nil
   253  	}
   254  	bat := makeFilepathBatch(node, proc, filterList, fileList)
   255  	filter := colexec.RewriteFilterExprList(filterList)
   256  	vec, err := colexec.EvalExpr(bat, proc, filter)
   257  	if err != nil {
   258  		return nil, fileSize, err
   259  	}
   260  	fileListTmp := make([]string, 0)
   261  	fileSizeTmp := make([]int64, 0)
   262  	bs := vector.GetColumn[bool](vec)
   263  	for i := 0; i < len(bs); i++ {
   264  		if bs[i] {
   265  			fileListTmp = append(fileListTmp, fileList[i])
   266  			fileSizeTmp = append(fileSizeTmp, fileSize[i])
   267  		}
   268  	}
   269  	node.FilterList = filterList2
   270  	return fileListTmp, fileSizeTmp, nil
   271  }
   272  
   273  func FilterFileList(node *plan.Node, proc *process.Process, fileList []string, fileSize []int64) ([]string, []int64, error) {
   274  	return filterByAccountAndFilename(node, proc, fileList, fileSize)
   275  }
   276  
   277  func IsSysTable(dbName string, tableName string) bool {
   278  	if dbName == "system" {
   279  		return tableName == "statement_info" || tableName == "rawlog"
   280  	} else if dbName == "system_metrics" {
   281  		return tableName == "metric"
   282  	}
   283  	return false
   284  }
   285  
   286  func ReadFile(param *ExternalParam, proc *process.Process) (io.ReadCloser, error) {
   287  	if param.Extern.Local {
   288  		return io.NopCloser(proc.LoadLocalReader), nil
   289  	}
   290  	fs, readPath, err := plan2.GetForETLWithType(param.Extern, param.Fileparam.Filepath)
   291  	if err != nil {
   292  		return nil, err
   293  	}
   294  	var r io.ReadCloser
   295  	vec := fileservice.IOVector{
   296  		FilePath: readPath,
   297  		Entries: []fileservice.IOEntry{
   298  			0: {
   299  				Offset:            0,
   300  				Size:              -1,
   301  				ReadCloserForRead: &r,
   302  			},
   303  		},
   304  	}
   305  	if param.Extern.Parallel {
   306  		vec.Entries[0].Offset = int64(param.FileOffset[param.Fileparam.FileIndex-1][0])
   307  		vec.Entries[0].Size = int64(param.FileOffset[param.Fileparam.FileIndex-1][1] - param.FileOffset[param.Fileparam.FileIndex-1][0])
   308  	}
   309  	if vec.Entries[0].Size == 0 || vec.Entries[0].Offset >= param.FileSize[param.Fileparam.FileIndex-1] {
   310  		return nil, nil
   311  	}
   312  	err = fs.Read(param.Ctx, &vec)
   313  	if err != nil {
   314  		return nil, err
   315  	}
   316  	return r, nil
   317  }
   318  
   319  func ReadFileOffset(param *tree.ExternParam, proc *process.Process, mcpu int, fileSize int64) ([][2]int, error) {
   320  	arr := make([][2]int, 0)
   321  
   322  	fs, readPath, err := plan2.GetForETLWithType(param, param.Filepath)
   323  	if err != nil {
   324  		return nil, err
   325  	}
   326  	var r io.ReadCloser
   327  	vec := fileservice.IOVector{
   328  		FilePath: readPath,
   329  		Entries: []fileservice.IOEntry{
   330  			0: {
   331  				Offset:            0,
   332  				Size:              -1,
   333  				ReadCloserForRead: &r,
   334  			},
   335  		},
   336  	}
   337  	var tailSize []int64
   338  	var offset []int64
   339  	for i := 0; i < mcpu; i++ {
   340  		vec.Entries[0].Offset = int64(i) * (fileSize / int64(mcpu))
   341  		if err = fs.Read(param.Ctx, &vec); err != nil {
   342  			return nil, err
   343  		}
   344  		r2 := bufio.NewReader(r)
   345  		line, _ := r2.ReadString('\n')
   346  		tailSize = append(tailSize, int64(len(line)))
   347  		offset = append(offset, vec.Entries[0].Offset)
   348  	}
   349  
   350  	start := 0
   351  	for i := 0; i < mcpu; i++ {
   352  		if i+1 < mcpu {
   353  			arr = append(arr, [2]int{start, int(offset[i+1] + tailSize[i+1])})
   354  			start = int(offset[i+1] + tailSize[i+1])
   355  		} else {
   356  			arr = append(arr, [2]int{start, -1})
   357  		}
   358  	}
   359  	return arr, nil
   360  }
   361  
   362  func getCompressType(param *tree.ExternParam, filepath string) string {
   363  	if param.CompressType != "" && param.CompressType != tree.AUTO {
   364  		return param.CompressType
   365  	}
   366  	index := strings.LastIndex(filepath, ".")
   367  	if index == -1 {
   368  		return tree.NOCOMPRESS
   369  	}
   370  	tail := string([]byte(filepath)[index+1:])
   371  	switch tail {
   372  	case "gz", "gzip":
   373  		return tree.GZIP
   374  	case "bz2", "bzip2":
   375  		return tree.BZIP2
   376  	case "lz4":
   377  		return tree.LZ4
   378  	default:
   379  		return tree.NOCOMPRESS
   380  	}
   381  }
   382  
   383  func getUnCompressReader(param *tree.ExternParam, filepath string, r io.ReadCloser) (io.ReadCloser, error) {
   384  	switch strings.ToLower(getCompressType(param, filepath)) {
   385  	case tree.NOCOMPRESS:
   386  		return r, nil
   387  	case tree.GZIP, tree.GZ:
   388  		r, err := gzip.NewReader(r)
   389  		if err != nil {
   390  			return nil, err
   391  		}
   392  		return r, nil
   393  	case tree.BZIP2, tree.BZ2:
   394  		return io.NopCloser(bzip2.NewReader(r)), nil
   395  	case tree.FLATE:
   396  		r = flate.NewReader(r)
   397  		return r, nil
   398  	case tree.ZLIB:
   399  		r, err := zlib.NewReader(r)
   400  		if err != nil {
   401  			return nil, err
   402  		}
   403  		return r, nil
   404  	case tree.LZ4:
   405  		return io.NopCloser(lz4.NewReader(r)), nil
   406  	case tree.LZW:
   407  		return nil, moerr.NewInternalError(param.Ctx, "the compress type '%s' is not support now", param.CompressType)
   408  	default:
   409  		return nil, moerr.NewInternalError(param.Ctx, "the compress type '%s' is not support now", param.CompressType)
   410  	}
   411  }
   412  
   413  func makeType(Cols []*plan.ColDef, index int) types.Type {
   414  	return types.New(types.T(Cols[index].Typ.Id), Cols[index].Typ.Width, Cols[index].Typ.Scale, Cols[index].Typ.Precision)
   415  }
   416  
   417  func makeBatch(param *ExternalParam, batchSize int, mp *mpool.MPool) *batch.Batch {
   418  	batchData := batch.New(true, param.Attrs)
   419  	//alloc space for vector
   420  	for i := 0; i < len(param.Attrs); i++ {
   421  		typ := makeType(param.Cols, i)
   422  		vec := vector.NewOriginal(typ)
   423  		vector.PreAlloc(vec, batchSize, batchSize, mp)
   424  		vec.SetOriginal(false)
   425  		batchData.Vecs[i] = vec
   426  	}
   427  	return batchData
   428  }
   429  
   430  func deleteEnclosed(param *ExternalParam, plh *ParseLineHandler) {
   431  	close := param.Extern.Tail.Fields.EnclosedBy
   432  	if close == '"' || close == 0 {
   433  		return
   434  	}
   435  	for rowIdx := 0; rowIdx < plh.batchSize; rowIdx++ {
   436  		Line := plh.simdCsvLineArray[rowIdx]
   437  		for i := 0; i < len(Line); i++ {
   438  			len := len(Line[i])
   439  			if len < 2 {
   440  				continue
   441  			}
   442  			if Line[i][0] == close && Line[i][len-1] == close {
   443  				Line[i] = Line[i][1 : len-1]
   444  			}
   445  		}
   446  	}
   447  }
   448  
   449  func getRealAttrCnt(attrs []string) int {
   450  	cnt := 0
   451  	for i := 0; i < len(attrs); i++ {
   452  		if catalog.ContainExternalHidenCol(attrs[i]) {
   453  			cnt++
   454  		}
   455  	}
   456  	return len(attrs) - cnt
   457  }
   458  
   459  func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Process) (*batch.Batch, error) {
   460  	bat := makeBatch(param, plh.batchSize, proc.Mp())
   461  	var (
   462  		Line []string
   463  		err  error
   464  	)
   465  	deleteEnclosed(param, plh)
   466  	unexpectEOF := false
   467  	for rowIdx := 0; rowIdx < plh.batchSize; rowIdx++ {
   468  		Line = plh.simdCsvLineArray[rowIdx]
   469  		if param.Extern.Format == tree.JSONLINE {
   470  			Line, err = transJson2Lines(proc.Ctx, Line[0], param.Attrs, param.Cols, param.Extern.JsonData, param)
   471  			if err != nil {
   472  				if errors.Is(err, io.ErrUnexpectedEOF) {
   473  					logutil.Infof("unexpected EOF, wait for next batch")
   474  					unexpectEOF = true
   475  					continue
   476  				}
   477  				return nil, err
   478  			}
   479  			plh.simdCsvLineArray[rowIdx] = Line
   480  		}
   481  		if param.ClusterTable != nil && param.ClusterTable.GetIsClusterTable() {
   482  			//the column account_id of the cluster table do need to be filled here
   483  			if len(Line)+1 < getRealAttrCnt(param.Attrs) {
   484  				return nil, moerr.NewInternalError(proc.Ctx, ColumnCntLargerErrorInfo())
   485  			}
   486  		} else {
   487  			if !param.Extern.SysTable && len(Line) < getRealAttrCnt(param.Attrs) {
   488  				return nil, moerr.NewInternalError(proc.Ctx, ColumnCntLargerErrorInfo())
   489  			}
   490  		}
   491  		err = getOneRowData(bat, Line, rowIdx, param, proc.Mp())
   492  		if err != nil {
   493  			return nil, err
   494  		}
   495  	}
   496  
   497  	n := vector.Length(bat.Vecs[0])
   498  	if unexpectEOF && n > 0 {
   499  		n--
   500  		for i := 0; i < len(bat.Vecs); i++ {
   501  			newVec := vector.NewOriginal(bat.Vecs[i].Typ)
   502  			vector.PreAlloc(newVec, n, n, proc.Mp())
   503  			newVec.Nsp = bat.Vecs[i].Nsp
   504  			for j := int64(0); j < int64(n); j++ {
   505  				if newVec.Nsp.Contains(uint64(j)) {
   506  					continue
   507  				}
   508  				err := vector.Copy(newVec, bat.Vecs[i], j, j, proc.Mp())
   509  				if err != nil {
   510  					return nil, err
   511  				}
   512  			}
   513  			bat.Vecs[i].Free(proc.Mp())
   514  			bat.Vecs[i] = newVec
   515  		}
   516  	}
   517  	sels := proc.Mp().GetSels()
   518  	if n > cap(sels) {
   519  		proc.Mp().PutSels(sels)
   520  		sels = make([]int64, n)
   521  	}
   522  	bat.Zs = sels[:n]
   523  	for k := 0; k < n; k++ {
   524  		bat.Zs[k] = 1
   525  	}
   526  	return bat, nil
   527  }
   528  
   529  // GetSimdcsvReader get file reader from external file
   530  func GetSimdcsvReader(param *ExternalParam, proc *process.Process) (*ParseLineHandler, error) {
   531  	var err error
   532  	param.reader, err = ReadFile(param, proc)
   533  	if err != nil || param.reader == nil {
   534  		return nil, err
   535  	}
   536  	param.reader, err = getUnCompressReader(param.Extern, param.Fileparam.Filepath, param.reader)
   537  	if err != nil {
   538  		return nil, err
   539  	}
   540  
   541  	channelSize := 100
   542  	plh := &ParseLineHandler{}
   543  	plh.simdCsvGetParsedLinesChan = atomic.Value{}
   544  	plh.simdCsvGetParsedLinesChan.Store(make(chan simdcsv.LineOut, channelSize))
   545  	if param.Extern.Tail.Fields == nil {
   546  		param.Extern.Tail.Fields = &tree.Fields{Terminated: ","}
   547  	}
   548  	if param.Extern.Format == tree.JSONLINE {
   549  		param.Extern.Tail.Fields.Terminated = "\t"
   550  	}
   551  	plh.simdCsvReader = simdcsv.NewReaderWithOptions(param.reader,
   552  		rune(param.Extern.Tail.Fields.Terminated[0]),
   553  		'#',
   554  		true,
   555  		false)
   556  
   557  	return plh, nil
   558  }
   559  
   560  func ScanCsvFile(param *ExternalParam, proc *process.Process) (*batch.Batch, error) {
   561  	var bat *batch.Batch
   562  	var err error
   563  	var cnt int
   564  	if param.plh == nil {
   565  		param.IgnoreLine = param.IgnoreLineTag
   566  		param.plh, err = GetSimdcsvReader(param, proc)
   567  		if err != nil || param.plh == nil {
   568  			return nil, err
   569  		}
   570  	}
   571  	plh := param.plh
   572  	plh.simdCsvLineArray = make([][]string, ONE_BATCH_MAX_ROW)
   573  	finish := false
   574  	plh.simdCsvLineArray, cnt, finish, err = plh.simdCsvReader.ReadLimitSize(ONE_BATCH_MAX_ROW, proc.Ctx, param.maxBatchSize, plh.simdCsvLineArray)
   575  	if err != nil {
   576  		return nil, err
   577  	}
   578  
   579  	if finish {
   580  		err := param.reader.Close()
   581  		if err != nil {
   582  			logutil.Errorf("close file failed. err:%v", err)
   583  		}
   584  		plh.simdCsvReader.Close()
   585  		param.plh = nil
   586  		param.Fileparam.FileFin++
   587  		if param.Fileparam.FileFin >= param.Fileparam.FileCnt {
   588  			param.Fileparam.End = true
   589  		}
   590  	}
   591  	if param.IgnoreLine != 0 {
   592  		if !param.Extern.Parallel || param.FileOffset[param.Fileparam.FileIndex-1][0] == 0 {
   593  			if cnt >= param.IgnoreLine {
   594  				plh.simdCsvLineArray = plh.simdCsvLineArray[param.IgnoreLine:cnt]
   595  				cnt -= param.IgnoreLine
   596  			} else {
   597  				plh.simdCsvLineArray = nil
   598  				cnt = 0
   599  			}
   600  			param.IgnoreLine = 0
   601  		}
   602  	}
   603  	plh.batchSize = cnt
   604  	bat, err = GetBatchData(param, plh, proc)
   605  	if err != nil {
   606  		return nil, err
   607  	}
   608  	bat.Cnt = 1
   609  	return bat, nil
   610  }
   611  
   612  func getBatchFromZonemapFile(param *ExternalParam, proc *process.Process, objectReader objectio.Reader) (*batch.Batch, error) {
   613  	bat := makeBatch(param, 0, proc.Mp())
   614  	if param.Zoneparam.offset >= len(param.Zoneparam.bs) {
   615  		return bat, nil
   616  	}
   617  
   618  	rows := 0
   619  
   620  	idxs := make([]uint16, len(param.Attrs))
   621  	meta := param.Zoneparam.bs[param.Zoneparam.offset].GetMeta()
   622  	header := meta.GetHeader()
   623  	colCnt := header.GetColumnCount()
   624  	for i := 0; i < len(param.Attrs); i++ {
   625  		idxs[i] = uint16(param.Name2ColIndex[param.Attrs[i]])
   626  		if param.Extern.SysTable && idxs[i] >= colCnt {
   627  			idxs[i] = 0
   628  		}
   629  	}
   630  
   631  	vec, err := objectReader.Read(param.Ctx, param.Zoneparam.bs[param.Zoneparam.offset].GetExtent(), idxs, proc.GetMPool())
   632  	if err != nil {
   633  		return nil, err
   634  	}
   635  	for i := 0; i < len(param.Attrs); i++ {
   636  		var vecTmp *vector.Vector
   637  		if param.Extern.SysTable && uint16(param.Name2ColIndex[param.Attrs[i]]) >= colCnt {
   638  			vecTmp = vector.New(makeType(param.Cols, i))
   639  			vector.PreAlloc(vecTmp, rows, rows, proc.GetMPool())
   640  			for j := 0; j < rows; j++ {
   641  				nulls.Add(vecTmp.Nsp, uint64(j))
   642  			}
   643  		} else if catalog.ContainExternalHidenCol(param.Attrs[i]) {
   644  			if rows == 0 {
   645  				vecTmp = vector.New(makeType(param.OriginCols, 0))
   646  				err = vecTmp.Read(vec.Entries[i].Object.([]byte))
   647  				if err != nil {
   648  					return nil, err
   649  				}
   650  				rows = vecTmp.Length()
   651  			}
   652  			vecTmp = vector.New(makeType(param.Cols, i))
   653  			vector.PreAlloc(vecTmp, rows, rows, proc.GetMPool())
   654  			for j := 0; j < rows; j++ {
   655  				err := vector.SetStringAt(vecTmp, j, param.Fileparam.Filepath, proc.GetMPool())
   656  				if err != nil {
   657  					return nil, err
   658  				}
   659  			}
   660  		} else {
   661  			vecTmp = vector.New(bat.Vecs[i].Typ)
   662  			err = vecTmp.Read(vec.Entries[i].Object.([]byte))
   663  			if err != nil {
   664  				return nil, err
   665  			}
   666  			rows = vecTmp.Length()
   667  		}
   668  		sels := make([]int64, vecTmp.Length())
   669  		for j := 0; j < len(sels); j++ {
   670  			sels[j] = int64(j)
   671  		}
   672  		vector.Union(bat.Vecs[i], vecTmp, sels, true, proc.GetMPool())
   673  	}
   674  
   675  	n := vector.Length(bat.Vecs[0])
   676  	sels := proc.Mp().GetSels()
   677  	if n > cap(sels) {
   678  		proc.Mp().PutSels(sels)
   679  		sels = make([]int64, n)
   680  	}
   681  	bat.Zs = sels[:n]
   682  	for k := 0; k < n; k++ {
   683  		bat.Zs[k] = 1
   684  	}
   685  	if !param.Extern.QueryResult {
   686  		param.Zoneparam.offset++
   687  	}
   688  	return bat, nil
   689  }
   690  
   691  func needRead(param *ExternalParam, proc *process.Process, objectReader objectio.Reader) bool {
   692  	if param.Zoneparam.offset >= len(param.Zoneparam.bs) {
   693  		return true
   694  	}
   695  	indexes, err := objectReader.ReadIndex(context.Background(), param.Zoneparam.bs[param.Zoneparam.offset].GetExtent(),
   696  		param.Filter.columns, objectio.ZoneMapType, proc.GetMPool())
   697  	if err != nil {
   698  		return true
   699  	}
   700  
   701  	notReportErrCtx := errutil.ContextWithNoReport(proc.Ctx, true)
   702  	// if expr match no columns, just eval expr
   703  	if len(param.Filter.columns) == 0 {
   704  		bat := batch.NewWithSize(0)
   705  		defer bat.Clean(proc.Mp())
   706  		ifNeed, err := plan2.EvalFilterExpr(notReportErrCtx, param.Filter.FilterExpr, bat, proc)
   707  		if err != nil {
   708  			return true
   709  		}
   710  		return ifNeed
   711  	}
   712  
   713  	dataLength := len(param.Filter.columns)
   714  	datas := make([][2]any, dataLength)
   715  	dataTypes := make([]uint8, dataLength)
   716  	for i := 0; i < dataLength; i++ {
   717  		idx := param.Filter.defColumns[i]
   718  		dataTypes[i] = uint8(param.Cols[idx].Typ.Id)
   719  		typ := types.T(dataTypes[i]).ToType()
   720  
   721  		zm := index.NewZoneMap(typ)
   722  		err = zm.Unmarshal(indexes[i].(*objectio.ZoneMap).GetData())
   723  		if err != nil {
   724  			return true
   725  		}
   726  		min := zm.GetMin()
   727  		max := zm.GetMax()
   728  		if min == nil || max == nil {
   729  			return true
   730  		}
   731  		datas[i] = [2]any{min, max}
   732  	}
   733  	// use all min/max data to build []vectors.
   734  	buildVectors := plan2.BuildVectorsByData(datas, dataTypes, proc.Mp())
   735  	bat := batch.NewWithSize(param.Filter.maxCol + 1)
   736  	defer bat.Clean(proc.Mp())
   737  	for k, v := range param.Filter.columnMap {
   738  		for i, realIdx := range param.Filter.defColumns {
   739  			if int(realIdx) == v {
   740  				bat.SetVector(int32(k), buildVectors[i])
   741  				break
   742  			}
   743  		}
   744  	}
   745  	bat.SetZs(buildVectors[0].Length(), proc.Mp())
   746  
   747  	ifNeed, err := plan2.EvalFilterExpr(notReportErrCtx, param.Filter.FilterExpr, bat, proc)
   748  	if err != nil {
   749  		return true
   750  	}
   751  	return ifNeed
   752  }
   753  
   754  func getZonemapBatch(param *ExternalParam, proc *process.Process, size int64, objectReader objectio.Reader) (*batch.Batch, error) {
   755  	var err error
   756  	if param.Extern.QueryResult {
   757  		param.Zoneparam.bs, err = objectReader.ReadAllMeta(param.Ctx, size, proc.GetMPool())
   758  		if err != nil {
   759  			return nil, err
   760  		}
   761  	} else if param.Zoneparam.bs == nil {
   762  		param.plh = &ParseLineHandler{}
   763  		var err error
   764  		param.Zoneparam.bs, err = objectReader.ReadAllMeta(param.Ctx, size, proc.GetMPool())
   765  		if err != nil {
   766  			return nil, err
   767  		}
   768  	}
   769  	if param.Zoneparam.offset >= len(param.Zoneparam.bs) {
   770  		bat := makeBatch(param, 0, proc.Mp())
   771  		return bat, nil
   772  	}
   773  
   774  	if param.Filter.exprMono {
   775  		for !needRead(param, proc, objectReader) {
   776  			param.Zoneparam.offset++
   777  		}
   778  		return getBatchFromZonemapFile(param, proc, objectReader)
   779  	} else {
   780  		return getBatchFromZonemapFile(param, proc, objectReader)
   781  	}
   782  }
   783  
   784  func ScanZonemapFile(param *ExternalParam, proc *process.Process) (*batch.Batch, error) {
   785  	if param.Filter.objectReader == nil || param.Extern.QueryResult {
   786  		dir, _ := filepath.Split(param.Fileparam.Filepath)
   787  		var service fileservice.FileService
   788  		var err error
   789  		var p fileservice.Path
   790  
   791  		if param.Extern.QueryResult {
   792  			service = param.Extern.FileService
   793  		} else {
   794  
   795  			// format filepath for local file
   796  			fp := param.Extern.Filepath
   797  			if p, err = fileservice.ParsePath(param.Extern.Filepath); err != nil {
   798  				return nil, err
   799  			} else if p.Service == "" {
   800  				if os.IsPathSeparator(filepath.Clean(param.Extern.Filepath)[0]) {
   801  					// absolute path
   802  					fp = "/"
   803  				} else {
   804  					// relative path.
   805  					// PS: this loop never trigger, caused by ReadDir() only support local file with absolute path
   806  					fp = "."
   807  				}
   808  			}
   809  
   810  			service, _, err = plan2.GetForETLWithType(param.Extern, fp)
   811  			if err != nil {
   812  				return nil, err
   813  			}
   814  		}
   815  		_, ok := param.Filter.File2Size[param.Fileparam.Filepath]
   816  		if !ok {
   817  			fs := objectio.NewObjectFS(service, dir)
   818  			dirs, err := fs.ListDir(dir)
   819  			if err != nil {
   820  				return nil, err
   821  			}
   822  			for i := 0; i < len(dirs); i++ {
   823  				param.Filter.File2Size[dir+dirs[i].Name] = dirs[i].Size
   824  			}
   825  		}
   826  
   827  		param.Filter.objectReader, err = objectio.NewObjectReader(param.Fileparam.Filepath, service)
   828  		if err != nil {
   829  			return nil, err
   830  		}
   831  	}
   832  
   833  	size, ok := param.Filter.File2Size[param.Fileparam.Filepath]
   834  	if !ok {
   835  		return nil, moerr.NewInternalErrorNoCtx("can' t find the filepath %s", param.Fileparam.Filepath)
   836  	}
   837  	bat, err := getZonemapBatch(param, proc, size, param.Filter.objectReader)
   838  	if err != nil {
   839  		return nil, err
   840  	}
   841  
   842  	if param.Zoneparam.offset >= len(param.Zoneparam.bs) {
   843  		param.Filter.objectReader = nil
   844  		param.Zoneparam.bs = nil
   845  		param.plh = nil
   846  		param.Fileparam.FileFin++
   847  		if param.Fileparam.FileFin >= param.Fileparam.FileCnt {
   848  			param.Fileparam.End = true
   849  		}
   850  		param.Zoneparam.offset = 0
   851  	}
   852  	return bat, nil
   853  }
   854  
   855  // ScanFileData read batch data from external file
   856  func ScanFileData(param *ExternalParam, proc *process.Process) (*batch.Batch, error) {
   857  	if strings.HasSuffix(param.Fileparam.Filepath, ".tae") || param.Extern.QueryResult {
   858  		return ScanZonemapFile(param, proc)
   859  	} else {
   860  		return ScanCsvFile(param, proc)
   861  	}
   862  }
   863  
   864  func transJson2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, jsonData string, param *ExternalParam) ([]string, error) {
   865  	switch jsonData {
   866  	case tree.OBJECT:
   867  		return transJsonObject2Lines(ctx, str, attrs, cols, param)
   868  	case tree.ARRAY:
   869  		return transJsonArray2Lines(ctx, str, attrs, cols, param)
   870  	default:
   871  		return nil, moerr.NewNotSupported(ctx, "the jsonline format '%s' is not support now", jsonData)
   872  	}
   873  }
   874  
   875  func transJsonObject2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, param *ExternalParam) ([]string, error) {
   876  	var (
   877  		err error
   878  		res = make([]string, 0, len(attrs))
   879  	)
   880  	if param.prevStr != "" {
   881  		str = param.prevStr + str
   882  		param.prevStr = ""
   883  	}
   884  	var jsonMap map[string]interface{}
   885  	var decoder = json.NewDecoder(bytes.NewReader([]byte(str)))
   886  	decoder.UseNumber()
   887  	err = decoder.Decode(&jsonMap)
   888  	if err != nil {
   889  		logutil.Errorf("json unmarshal err:%v", err)
   890  		param.prevStr = str
   891  		return nil, err
   892  	}
   893  	if len(jsonMap) < len(attrs) {
   894  		return nil, moerr.NewInternalError(ctx, ColumnCntLargerErrorInfo())
   895  	}
   896  	for idx, attr := range attrs {
   897  		if val, ok := jsonMap[attr]; ok {
   898  			if val == nil {
   899  				res = append(res, NULL_FLAG)
   900  				continue
   901  			}
   902  			tp := cols[idx].Typ.Id
   903  			if tp != int32(types.T_json) {
   904  				res = append(res, fmt.Sprintf("%v", val))
   905  				continue
   906  			}
   907  			var bj bytejson.ByteJson
   908  			err = bj.UnmarshalObject(val)
   909  			if err != nil {
   910  				return nil, err
   911  			}
   912  			dt, err := bj.Marshal()
   913  			if err != nil {
   914  				return nil, err
   915  			}
   916  			res = append(res, string(dt))
   917  		} else {
   918  			return nil, moerr.NewInvalidInput(ctx, "the attr %s is not in json", attr)
   919  		}
   920  	}
   921  	return res, nil
   922  }
   923  
   924  func transJsonArray2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, param *ExternalParam) ([]string, error) {
   925  	var (
   926  		err error
   927  		res = make([]string, 0, len(attrs))
   928  	)
   929  	if param.prevStr != "" {
   930  		str = param.prevStr + str
   931  		param.prevStr = ""
   932  	}
   933  	var jsonArray []interface{}
   934  	var decoder = json.NewDecoder(bytes.NewReader([]byte(str)))
   935  	decoder.UseNumber()
   936  	err = decoder.Decode(&jsonArray)
   937  	if err != nil {
   938  		param.prevStr = str
   939  		return nil, err
   940  	}
   941  	if len(jsonArray) < len(attrs) {
   942  		return nil, moerr.NewInternalError(ctx, ColumnCntLargerErrorInfo())
   943  	}
   944  	for idx, val := range jsonArray {
   945  		if val == nil {
   946  			res = append(res, NULL_FLAG)
   947  			continue
   948  		}
   949  		tp := cols[idx].Typ.Id
   950  		if tp != int32(types.T_json) {
   951  			res = append(res, fmt.Sprintf("%v", val))
   952  			continue
   953  		}
   954  		var bj bytejson.ByteJson
   955  		err = bj.UnmarshalObject(val)
   956  		if err != nil {
   957  			return nil, err
   958  		}
   959  		dt, err := bj.Marshal()
   960  		if err != nil {
   961  			return nil, err
   962  		}
   963  		res = append(res, string(dt))
   964  	}
   965  	return res, nil
   966  }
   967  
   968  func getNullFlag(param *ExternalParam, attr, field string) bool {
   969  	list := param.Extern.NullMap[attr]
   970  	for i := 0; i < len(list); i++ {
   971  		field = strings.ToLower(field)
   972  		if list[i] == field {
   973  			return true
   974  		}
   975  	}
   976  	return false
   977  }
   978  
   979  const NULL_FLAG = "\\N"
   980  
   981  func judgeInteger(field string) bool {
   982  	for i := 0; i < len(field); i++ {
   983  		if field[i] == '-' || field[i] == '+' {
   984  			continue
   985  		}
   986  		if field[i] > '9' || field[i] < '0' {
   987  			return false
   988  		}
   989  	}
   990  	return true
   991  }
   992  
   993  func getStrFromLine(Line []string, colIdx int, param *ExternalParam) string {
   994  	if catalog.ContainExternalHidenCol(param.Attrs[colIdx]) {
   995  		return param.Fileparam.Filepath
   996  	} else {
   997  		var str string
   998  		if param.Extern.SysTable && int(param.Name2ColIndex[param.Attrs[colIdx]]) >= len(Line) {
   999  			str = "\\N"
  1000  		} else {
  1001  			str = Line[param.Name2ColIndex[param.Attrs[colIdx]]]
  1002  		}
  1003  		if param.Extern.Tail.Fields.EnclosedBy != 0 {
  1004  			tmp := strings.TrimSpace(str)
  1005  			if len(tmp) >= 2 && tmp[0] == param.Extern.Tail.Fields.EnclosedBy && tmp[len(tmp)-1] == param.Extern.Tail.Fields.EnclosedBy {
  1006  				return tmp[1 : len(tmp)-1]
  1007  			}
  1008  		}
  1009  		return str
  1010  	}
  1011  }
  1012  
  1013  func getOneRowData(bat *batch.Batch, Line []string, rowIdx int, param *ExternalParam, mp *mpool.MPool) error {
  1014  	for colIdx := range param.Attrs {
  1015  		//for cluster table, the column account_id need not be filled here
  1016  		if param.ClusterTable.GetIsClusterTable() && int(param.ClusterTable.GetColumnIndexOfAccountId()) == colIdx {
  1017  			continue
  1018  		}
  1019  		field := getStrFromLine(Line, colIdx, param)
  1020  		id := types.T(param.Cols[colIdx].Typ.Id)
  1021  		if id != types.T_char && id != types.T_varchar && id != types.T_json && id != types.T_blob && id != types.T_text {
  1022  			field = strings.TrimSpace(field)
  1023  		}
  1024  		vec := bat.Vecs[colIdx]
  1025  		isNullOrEmpty := field == NULL_FLAG
  1026  		if id != types.T_char && id != types.T_varchar && id != types.T_json && id != types.T_blob && id != types.T_text {
  1027  			isNullOrEmpty = isNullOrEmpty || len(field) == 0
  1028  		}
  1029  		isNullOrEmpty = isNullOrEmpty || (getNullFlag(param, param.Attrs[colIdx], field))
  1030  		switch id {
  1031  		case types.T_bool:
  1032  			cols := vector.MustTCols[bool](vec)
  1033  			if isNullOrEmpty {
  1034  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1035  			} else {
  1036  				if field == "true" || field == "1" {
  1037  					cols[rowIdx] = true
  1038  				} else if field == "false" || field == "0" {
  1039  					cols[rowIdx] = false
  1040  				} else {
  1041  					return moerr.NewInternalError(param.Ctx, "the input value '%s' is not bool type for column %d", field, colIdx)
  1042  				}
  1043  			}
  1044  		case types.T_int8:
  1045  			cols := vector.MustTCols[int8](vec)
  1046  			if isNullOrEmpty {
  1047  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1048  			} else {
  1049  				if judgeInteger(field) {
  1050  					d, err := strconv.ParseInt(field, 10, 8)
  1051  					if err != nil {
  1052  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1053  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int8 type for column %d", field, colIdx)
  1054  					}
  1055  					cols[rowIdx] = int8(d)
  1056  				} else {
  1057  					d, err := strconv.ParseFloat(field, 64)
  1058  					if err != nil || d < math.MinInt8 || d > math.MaxInt8 {
  1059  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1060  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int8 type for column %d", field, colIdx)
  1061  					}
  1062  					cols[rowIdx] = int8(d)
  1063  				}
  1064  			}
  1065  		case types.T_int16:
  1066  			cols := vector.MustTCols[int16](vec)
  1067  			if isNullOrEmpty {
  1068  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1069  			} else {
  1070  				if judgeInteger(field) {
  1071  					d, err := strconv.ParseInt(field, 10, 16)
  1072  					if err != nil {
  1073  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1074  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int16 type for column %d", field, colIdx)
  1075  					}
  1076  					cols[rowIdx] = int16(d)
  1077  				} else {
  1078  					d, err := strconv.ParseFloat(field, 64)
  1079  					if err != nil || d < math.MinInt16 || d > math.MaxInt16 {
  1080  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1081  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int16 type for column %d", field, colIdx)
  1082  					}
  1083  					cols[rowIdx] = int16(d)
  1084  				}
  1085  			}
  1086  		case types.T_int32:
  1087  			cols := vector.MustTCols[int32](vec)
  1088  			if isNullOrEmpty {
  1089  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1090  			} else {
  1091  				if judgeInteger(field) {
  1092  					d, err := strconv.ParseInt(field, 10, 32)
  1093  					if err != nil {
  1094  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1095  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int32 type for column %d", field, colIdx)
  1096  					}
  1097  					cols[rowIdx] = int32(d)
  1098  				} else {
  1099  					d, err := strconv.ParseFloat(field, 64)
  1100  					if err != nil || d < math.MinInt32 || d > math.MaxInt32 {
  1101  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1102  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int32 type for column %d", field, colIdx)
  1103  					}
  1104  					cols[rowIdx] = int32(d)
  1105  				}
  1106  			}
  1107  		case types.T_int64:
  1108  			cols := vector.MustTCols[int64](vec)
  1109  			if isNullOrEmpty {
  1110  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1111  			} else {
  1112  				if judgeInteger(field) {
  1113  					d, err := strconv.ParseInt(field, 10, 64)
  1114  					if err != nil {
  1115  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1116  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int64 type for column %d", field, colIdx)
  1117  					}
  1118  					cols[rowIdx] = d
  1119  				} else {
  1120  					d, err := strconv.ParseFloat(field, 64)
  1121  					if err != nil || d < math.MinInt64 || d > math.MaxInt64 {
  1122  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1123  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int64 type for column %d", field, colIdx)
  1124  					}
  1125  					cols[rowIdx] = int64(d)
  1126  				}
  1127  			}
  1128  		case types.T_uint8:
  1129  			cols := vector.MustTCols[uint8](vec)
  1130  			if isNullOrEmpty {
  1131  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1132  			} else {
  1133  				if judgeInteger(field) {
  1134  					d, err := strconv.ParseUint(field, 10, 8)
  1135  					if err != nil {
  1136  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1137  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint8 type for column %d", field, colIdx)
  1138  					}
  1139  					cols[rowIdx] = uint8(d)
  1140  				} else {
  1141  					d, err := strconv.ParseFloat(field, 64)
  1142  					if err != nil || d < 0 || d > math.MaxUint8 {
  1143  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1144  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint8 type for column %d", field, colIdx)
  1145  					}
  1146  					cols[rowIdx] = uint8(d)
  1147  				}
  1148  			}
  1149  		case types.T_uint16:
  1150  			cols := vector.MustTCols[uint16](vec)
  1151  			if isNullOrEmpty {
  1152  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1153  			} else {
  1154  				if judgeInteger(field) {
  1155  					d, err := strconv.ParseUint(field, 10, 16)
  1156  					if err != nil {
  1157  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1158  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint16 type for column %d", field, colIdx)
  1159  					}
  1160  					cols[rowIdx] = uint16(d)
  1161  				} else {
  1162  					d, err := strconv.ParseFloat(field, 64)
  1163  					if err != nil || d < 0 || d > math.MaxUint16 {
  1164  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1165  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint16 type for column %d", field, colIdx)
  1166  					}
  1167  					cols[rowIdx] = uint16(d)
  1168  				}
  1169  			}
  1170  		case types.T_uint32:
  1171  			cols := vector.MustTCols[uint32](vec)
  1172  			if isNullOrEmpty {
  1173  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1174  			} else {
  1175  				if judgeInteger(field) {
  1176  					d, err := strconv.ParseUint(field, 10, 32)
  1177  					if err != nil {
  1178  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1179  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint32 type for column %d", field, colIdx)
  1180  					}
  1181  					cols[rowIdx] = uint32(d)
  1182  				} else {
  1183  					d, err := strconv.ParseFloat(field, 64)
  1184  					if err != nil || d < 0 || d > math.MaxUint32 {
  1185  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1186  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint32 type for column %d", field, colIdx)
  1187  					}
  1188  					cols[rowIdx] = uint32(d)
  1189  				}
  1190  			}
  1191  		case types.T_uint64:
  1192  			cols := vector.MustTCols[uint64](vec)
  1193  			if isNullOrEmpty {
  1194  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1195  			} else {
  1196  				if judgeInteger(field) {
  1197  					d, err := strconv.ParseUint(field, 10, 64)
  1198  					if err != nil {
  1199  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1200  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint64 type for column %d", field, colIdx)
  1201  					}
  1202  					cols[rowIdx] = d
  1203  				} else {
  1204  					d, err := strconv.ParseFloat(field, 64)
  1205  					if err != nil || d < 0 || d > math.MaxUint64 {
  1206  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1207  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint64 type for column %d", field, colIdx)
  1208  					}
  1209  					cols[rowIdx] = uint64(d)
  1210  				}
  1211  			}
  1212  		case types.T_float32:
  1213  			cols := vector.MustTCols[float32](vec)
  1214  			if isNullOrEmpty {
  1215  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1216  			} else {
  1217  				// origin float32 data type
  1218  				if vec.Typ.Precision < 0 {
  1219  					d, err := strconv.ParseFloat(field, 32)
  1220  					if err != nil {
  1221  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1222  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float32 type for column %d", field, colIdx)
  1223  					}
  1224  					cols[rowIdx] = float32(d)
  1225  					continue
  1226  				}
  1227  				d, err := types.Decimal128_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Precision)
  1228  				if err != nil {
  1229  					logutil.Errorf("parse field[%v] err:%v", field, err)
  1230  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float32 type for column %d", field, colIdx)
  1231  				}
  1232  				cols[rowIdx] = float32(d.ToFloat64())
  1233  			}
  1234  		case types.T_float64:
  1235  			cols := vector.MustTCols[float64](vec)
  1236  			if isNullOrEmpty {
  1237  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1238  			} else {
  1239  				// origin float64 data type
  1240  				if vec.Typ.Precision < 0 {
  1241  					d, err := strconv.ParseFloat(field, 64)
  1242  					if err != nil {
  1243  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1244  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float64 type for column %d", field, colIdx)
  1245  					}
  1246  					cols[rowIdx] = d
  1247  					continue
  1248  				}
  1249  				d, err := types.Decimal128_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Precision)
  1250  				if err != nil {
  1251  					logutil.Errorf("parse field[%v] err:%v", field, err)
  1252  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float64 type for column %d", field, colIdx)
  1253  				}
  1254  				cols[rowIdx] = d.ToFloat64()
  1255  			}
  1256  		case types.T_char, types.T_varchar, types.T_blob, types.T_text:
  1257  			if isNullOrEmpty {
  1258  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1259  			} else {
  1260  				// XXX Memory accounting?
  1261  				err := vector.SetStringAt(vec, rowIdx, field, mp)
  1262  				if err != nil {
  1263  					return err
  1264  				}
  1265  			}
  1266  		case types.T_json:
  1267  			if isNullOrEmpty {
  1268  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1269  			} else {
  1270  				var (
  1271  					byteJson  bytejson.ByteJson
  1272  					err       error
  1273  					jsonBytes []byte
  1274  				)
  1275  				if param.Extern.Format == tree.CSV {
  1276  					byteJson, err = types.ParseStringToByteJson(field)
  1277  					if err != nil {
  1278  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1279  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not json type for column %d", field, colIdx)
  1280  					}
  1281  					jsonBytes, err = types.EncodeJson(byteJson)
  1282  					if err != nil {
  1283  						logutil.Errorf("encode json[%v] err:%v", field, err)
  1284  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is not json type for column %d", field, colIdx)
  1285  					}
  1286  				} else { //jsonline
  1287  					jsonBytes = []byte(field)
  1288  				}
  1289  				err = vector.SetBytesAt(vec, rowIdx, jsonBytes, mp)
  1290  				if err != nil {
  1291  					return err
  1292  				}
  1293  			}
  1294  		case types.T_date:
  1295  			cols := vector.MustTCols[types.Date](vec)
  1296  			if isNullOrEmpty {
  1297  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1298  			} else {
  1299  				d, err := types.ParseDateCast(field)
  1300  				if err != nil {
  1301  					logutil.Errorf("parse field[%v] err:%v", field, err)
  1302  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Date type for column %d", field, colIdx)
  1303  				}
  1304  				cols[rowIdx] = d
  1305  			}
  1306  		case types.T_time:
  1307  			cols := vector.MustTCols[types.Time](vec)
  1308  			if isNullOrEmpty {
  1309  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1310  			} else {
  1311  				d, err := types.ParseTime(field, vec.Typ.Precision)
  1312  				if err != nil {
  1313  					logutil.Errorf("parse field[%v] err:%v", field, err)
  1314  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Time type for column %d", field, colIdx)
  1315  				}
  1316  				cols[rowIdx] = d
  1317  			}
  1318  		case types.T_datetime:
  1319  			cols := vector.MustTCols[types.Datetime](vec)
  1320  			if isNullOrEmpty {
  1321  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1322  			} else {
  1323  				d, err := types.ParseDatetime(field, vec.Typ.Precision)
  1324  				if err != nil {
  1325  					logutil.Errorf("parse field[%v] err:%v", field, err)
  1326  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Datetime type for column %d", field, colIdx)
  1327  				}
  1328  				cols[rowIdx] = d
  1329  			}
  1330  		case types.T_decimal64:
  1331  			cols := vector.MustTCols[types.Decimal64](vec)
  1332  			if isNullOrEmpty {
  1333  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1334  			} else {
  1335  				d, err := types.Decimal64_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Scale)
  1336  				if err != nil {
  1337  					// we tolerate loss of digits.
  1338  					if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) {
  1339  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1340  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is invalid Decimal64 type for column %d", field, colIdx)
  1341  					}
  1342  				}
  1343  				cols[rowIdx] = d
  1344  			}
  1345  		case types.T_decimal128:
  1346  			cols := vector.MustTCols[types.Decimal128](vec)
  1347  			if isNullOrEmpty {
  1348  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1349  			} else {
  1350  				d, err := types.Decimal128_FromStringWithScale(field, vec.Typ.Width, vec.Typ.Scale)
  1351  				if err != nil {
  1352  					// we tolerate loss of digits.
  1353  					if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) {
  1354  						logutil.Errorf("parse field[%v] err:%v", field, err)
  1355  						return moerr.NewInternalError(param.Ctx, "the input value '%v' is invalid Decimal128 type for column %d", field, colIdx)
  1356  					}
  1357  				}
  1358  				cols[rowIdx] = d
  1359  			}
  1360  		case types.T_timestamp:
  1361  			cols := vector.MustTCols[types.Timestamp](vec)
  1362  			if isNullOrEmpty {
  1363  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1364  			} else {
  1365  				t := time.Local
  1366  				d, err := types.ParseTimestamp(t, field, vec.Typ.Precision)
  1367  				if err != nil {
  1368  					logutil.Errorf("parse field[%v] err:%v", field, err)
  1369  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Timestamp type for column %d", field, colIdx)
  1370  				}
  1371  				cols[rowIdx] = d
  1372  			}
  1373  		case types.T_uuid:
  1374  			cols := vector.MustTCols[types.Uuid](vec)
  1375  			if isNullOrEmpty {
  1376  				nulls.Add(vec.Nsp, uint64(rowIdx))
  1377  			} else {
  1378  				d, err := types.ParseUuid(field)
  1379  				if err != nil {
  1380  					logutil.Errorf("parse field[%v] err:%v", field, err)
  1381  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uuid type for column %d", field, colIdx)
  1382  				}
  1383  				cols[rowIdx] = d
  1384  			}
  1385  		default:
  1386  			return moerr.NewInternalError(param.Ctx, "the value type %d is not support now", param.Cols[rowIdx].Typ.Id)
  1387  		}
  1388  	}
  1389  	return nil
  1390  }