github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/external/external.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package external
    16  
    17  import (
    18  	"archive/tar"
    19  	"bufio"
    20  	"bytes"
    21  	"compress/bzip2"
    22  	"compress/flate"
    23  	"compress/gzip"
    24  	"compress/zlib"
    25  	"context"
    26  	"encoding/json"
    27  	"errors"
    28  	"fmt"
    29  	"io"
    30  	"math"
    31  	"strconv"
    32  	"strings"
    33  	"time"
    34  
    35  	"github.com/matrixorigin/matrixone/pkg/catalog"
    36  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    37  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    38  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    39  	"github.com/matrixorigin/matrixone/pkg/common/util"
    40  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    41  	"github.com/matrixorigin/matrixone/pkg/container/bytejson"
    42  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    43  	"github.com/matrixorigin/matrixone/pkg/container/types"
    44  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    45  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    46  	"github.com/matrixorigin/matrixone/pkg/logutil"
    47  	"github.com/matrixorigin/matrixone/pkg/objectio"
    48  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    49  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    50  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    51  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    52  	"github.com/matrixorigin/matrixone/pkg/sql/util/csvparser"
    53  	"github.com/matrixorigin/matrixone/pkg/util/errutil"
    54  	v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    55  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    56  	"github.com/matrixorigin/matrixone/pkg/vm"
    57  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio"
    58  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/options"
    59  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    60  	"github.com/pierrec/lz4/v4"
    61  )
    62  
    63  var (
    64  	OneBatchMaxRow   = int(options.DefaultBlockMaxRows)
    65  	S3ParallelMaxnum = 10
    66  )
    67  
    68  var (
    69  	STATEMENT_ACCOUNT = "account"
    70  )
    71  
    72  const argName = "external"
    73  
    74  func (arg *Argument) String(buf *bytes.Buffer) {
    75  	buf.WriteString(argName)
    76  	buf.WriteString(": external output")
    77  }
    78  
    79  func (arg *Argument) Prepare(proc *process.Process) error {
    80  	_, span := trace.Start(proc.Ctx, "ExternalPrepare")
    81  	defer span.End()
    82  	param := arg.Es
    83  	if proc.Lim.MaxMsgSize == 0 {
    84  		param.maxBatchSize = uint64(morpc.GetMessageSize())
    85  	} else {
    86  		param.maxBatchSize = proc.Lim.MaxMsgSize
    87  	}
    88  	param.maxBatchSize = uint64(float64(param.maxBatchSize) * 0.6)
    89  	if param.Extern == nil {
    90  		param.Extern = &tree.ExternParam{}
    91  		if err := json.Unmarshal([]byte(param.CreateSql), param.Extern); err != nil {
    92  			return err
    93  		}
    94  		if err := plan2.InitS3Param(param.Extern); err != nil {
    95  			return err
    96  		}
    97  		param.Extern.FileService = proc.FileService
    98  	}
    99  	if !loadFormatIsValid(param.Extern) {
   100  		return moerr.NewNYI(proc.Ctx, "load format '%s'", param.Extern.Format)
   101  	}
   102  
   103  	if param.Extern.Format != tree.PARQUET {
   104  		if param.Extern.Format == tree.JSONLINE {
   105  			if param.Extern.JsonData != tree.OBJECT && param.Extern.JsonData != tree.ARRAY {
   106  				param.Fileparam.End = true
   107  				return moerr.NewNotSupported(proc.Ctx, "the jsonline format '%s' is not supported now", param.Extern.JsonData)
   108  			}
   109  		}
   110  		param.IgnoreLineTag = int(param.Extern.Tail.IgnoredLines)
   111  		param.IgnoreLine = param.IgnoreLineTag
   112  		param.MoCsvLineArray = make([][]csvparser.Field, OneBatchMaxRow)
   113  	}
   114  
   115  	if len(param.FileList) == 0 && param.Extern.ScanType != tree.INLINE {
   116  		logutil.Warnf("no such file '%s'", param.Extern.Filepath)
   117  		param.Fileparam.End = true
   118  	}
   119  	param.Fileparam.FileCnt = len(param.FileList)
   120  	param.Ctx = proc.Ctx
   121  	param.Zoneparam = &ZonemapFileparam{}
   122  	name2ColIndex := make(map[string]int32, len(param.Cols))
   123  	for i, col := range param.Cols {
   124  		name2ColIndex[col.Name] = int32(i)
   125  	}
   126  	param.tableDef = &plan.TableDef{
   127  		Name2ColIndex: name2ColIndex,
   128  	}
   129  	param.Filter.columnMap, _, _, _ = plan2.GetColumnsByExpr(param.Filter.FilterExpr, param.tableDef)
   130  	param.Filter.zonemappable = plan2.ExprIsZonemappable(proc.Ctx, param.Filter.FilterExpr)
   131  	return nil
   132  }
   133  
   134  func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) {
   135  	if err, isCancel := vm.CancelCheck(proc); isCancel {
   136  		return vm.CancelResult, err
   137  	}
   138  
   139  	t := time.Now()
   140  	ctx, span := trace.Start(proc.Ctx, "ExternalCall")
   141  	t1 := time.Now()
   142  	anal := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor())
   143  	anal.Start()
   144  	defer func() {
   145  		anal.Stop()
   146  		anal.AddScanTime(t1)
   147  		span.End()
   148  		v2.TxnStatementExternalScanDurationHistogram.Observe(time.Since(t).Seconds())
   149  	}()
   150  	anal.Input(nil, arg.GetIsFirst())
   151  
   152  	var err error
   153  	result := vm.NewCallResult()
   154  	param := arg.Es
   155  	if param.Fileparam.End {
   156  		result.Status = vm.ExecStop
   157  		return result, nil
   158  	}
   159  	if (param.plh == nil && param.parqh == nil) && param.Extern.ScanType != tree.INLINE {
   160  		if param.Fileparam.FileIndex >= len(param.FileList) {
   161  			result.Status = vm.ExecStop
   162  			return result, nil
   163  		}
   164  		param.Fileparam.Filepath = param.FileList[param.Fileparam.FileIndex]
   165  		param.Fileparam.FileIndex++
   166  	}
   167  	if arg.buf != nil {
   168  		proc.PutBatch(arg.buf)
   169  		arg.buf = nil
   170  	}
   171  	arg.buf, err = scanFileData(ctx, param, proc)
   172  	if err != nil {
   173  		param.Fileparam.End = true
   174  		return result, err
   175  	}
   176  
   177  	if arg.buf != nil {
   178  		anal.Output(arg.buf, arg.GetIsLast())
   179  		arg.maxAllocSize = max(arg.maxAllocSize, arg.buf.Size())
   180  	}
   181  	result.Batch = arg.buf
   182  	if result.Batch != nil {
   183  		result.Batch.ShuffleIDX = param.Idx
   184  	}
   185  	return result, nil
   186  }
   187  
   188  func containColname(col string) bool {
   189  	return strings.Contains(col, STATEMENT_ACCOUNT) || strings.Contains(col, catalog.ExternalFilePath)
   190  }
   191  
   192  func judgeContainColname(expr *plan.Expr) bool {
   193  	expr_F, ok := expr.Expr.(*plan.Expr_F)
   194  	if !ok {
   195  		return false
   196  	}
   197  	if expr_F.F.Func.ObjName == "or" {
   198  		flag := true
   199  		for i := 0; i < len(expr_F.F.Args); i++ {
   200  			flag = flag && judgeContainColname(expr_F.F.Args[i])
   201  		}
   202  		return flag
   203  	}
   204  	expr_Col, ok := expr_F.F.Args[0].Expr.(*plan.Expr_Col)
   205  	if ok && containColname(expr_Col.Col.Name) {
   206  		return true
   207  	}
   208  	for _, arg := range expr_F.F.Args {
   209  		if judgeContainColname(arg) {
   210  			return true
   211  		}
   212  	}
   213  	return false
   214  }
   215  
   216  func getAccountCol(filepath string) string {
   217  	pathDir := strings.Split(filepath, "/")
   218  	if len(pathDir) < 2 {
   219  		return ""
   220  	}
   221  	return pathDir[1]
   222  }
   223  
   224  func makeFilepathBatch(node *plan.Node, proc *process.Process, fileList []string) (bat *batch.Batch, err error) {
   225  	num := len(node.TableDef.Cols)
   226  	bat = &batch.Batch{
   227  		Attrs: make([]string, num),
   228  		Vecs:  make([]*vector.Vector, num),
   229  		Cnt:   1,
   230  	}
   231  
   232  	var buf bytes.Buffer
   233  	mp := proc.GetMPool()
   234  	for i := 0; i < num; i++ {
   235  		bat.Attrs[i] = node.TableDef.Cols[i].Name
   236  		if bat.Attrs[i] == STATEMENT_ACCOUNT {
   237  			typ := types.New(types.T(node.TableDef.Cols[i].Typ.Id), node.TableDef.Cols[i].Typ.Width, node.TableDef.Cols[i].Typ.Scale)
   238  			bat.Vecs[i], err = proc.AllocVectorOfRows(typ, len(fileList), nil)
   239  			if err != nil {
   240  				bat.Clean(mp)
   241  				return nil, err
   242  			}
   243  
   244  			for j := 0; j < len(fileList); j++ {
   245  				buf.WriteString(getAccountCol(fileList[j]))
   246  				bs := buf.Bytes()
   247  				if err = vector.SetBytesAt(bat.Vecs[i], j, bs, mp); err != nil {
   248  					bat.Clean(mp)
   249  					return nil, err
   250  				}
   251  				buf.Reset()
   252  			}
   253  		} else if bat.Attrs[i] == catalog.ExternalFilePath {
   254  			typ := types.T_varchar.ToType()
   255  			bat.Vecs[i], err = proc.AllocVectorOfRows(typ, len(fileList), nil)
   256  			if err != nil {
   257  				bat.Clean(mp)
   258  				return nil, err
   259  			}
   260  
   261  			for j := 0; j < len(fileList); j++ {
   262  				buf.WriteString(fileList[j])
   263  				bs := buf.Bytes()
   264  				if err = vector.SetBytesAt(bat.Vecs[i], j, bs, mp); err != nil {
   265  					bat.Clean(mp)
   266  					return nil, err
   267  				}
   268  				buf.Reset()
   269  			}
   270  		}
   271  	}
   272  	bat.SetRowCount(len(fileList))
   273  	return bat, nil
   274  }
   275  
   276  func filterByAccountAndFilename(ctx context.Context, node *plan.Node, proc *process.Process, fileList []string, fileSize []int64) ([]string, []int64, error) {
   277  	_, span := trace.Start(ctx, "filterByAccountAndFilename")
   278  	defer span.End()
   279  	filterList := make([]*plan.Expr, 0)
   280  	filterList2 := make([]*plan.Expr, 0)
   281  	for i := 0; i < len(node.FilterList); i++ {
   282  		if judgeContainColname(node.FilterList[i]) {
   283  			filterList = append(filterList, node.FilterList[i])
   284  		} else {
   285  			filterList2 = append(filterList2, node.FilterList[i])
   286  		}
   287  	}
   288  	if len(filterList) == 0 {
   289  		return fileList, fileSize, nil
   290  	}
   291  	bat, err := makeFilepathBatch(node, proc, fileList)
   292  	if err != nil {
   293  		return nil, nil, err
   294  	}
   295  	filter := colexec.RewriteFilterExprList(filterList)
   296  
   297  	executor, err := colexec.NewExpressionExecutor(proc, filter)
   298  	if err != nil {
   299  		return nil, nil, err
   300  	}
   301  	vec, err := executor.Eval(proc, []*batch.Batch{bat})
   302  	if err != nil {
   303  		executor.Free()
   304  		return nil, nil, err
   305  	}
   306  
   307  	fileListTmp := make([]string, 0)
   308  	fileSizeTmp := make([]int64, 0)
   309  	bs := vector.MustFixedCol[bool](vec)
   310  	for i := 0; i < len(bs); i++ {
   311  		if bs[i] {
   312  			fileListTmp = append(fileListTmp, fileList[i])
   313  			fileSizeTmp = append(fileSizeTmp, fileSize[i])
   314  		}
   315  	}
   316  	executor.Free()
   317  	node.FilterList = filterList2
   318  	return fileListTmp, fileSizeTmp, nil
   319  }
   320  
   321  func FilterFileList(ctx context.Context, node *plan.Node, proc *process.Process, fileList []string, fileSize []int64) ([]string, []int64, error) {
   322  	return filterByAccountAndFilename(ctx, node, proc, fileList, fileSize)
   323  }
   324  
   325  func readFile(param *ExternalParam, proc *process.Process) (io.ReadCloser, error) {
   326  	if param.Extern.ScanType == tree.INLINE {
   327  		return io.NopCloser(bytes.NewReader(util.UnsafeStringToBytes(param.Extern.Data))), nil
   328  	}
   329  	if param.Extern.Local {
   330  		return io.NopCloser(proc.LoadLocalReader), nil
   331  	}
   332  	fs, readPath, err := plan2.GetForETLWithType(param.Extern, param.Fileparam.Filepath)
   333  	if err != nil {
   334  		return nil, err
   335  	}
   336  	var r io.ReadCloser
   337  	vec := fileservice.IOVector{
   338  		FilePath: readPath,
   339  		Entries: []fileservice.IOEntry{
   340  			0: {
   341  				Offset:            0,
   342  				Size:              -1,
   343  				ReadCloserForRead: &r,
   344  			},
   345  		},
   346  	}
   347  	if 2*param.Idx >= len(param.FileOffsetTotal[param.Fileparam.FileIndex-1].Offset) {
   348  		return nil, nil
   349  	}
   350  	param.FileOffset = param.FileOffsetTotal[param.Fileparam.FileIndex-1].Offset[2*param.Idx : 2*param.Idx+2]
   351  	if param.Extern.Parallel {
   352  		vec.Entries[0].Offset = param.FileOffset[0]
   353  		vec.Entries[0].Size = param.FileOffset[1] - param.FileOffset[0]
   354  	}
   355  	if vec.Entries[0].Size == 0 || vec.Entries[0].Offset >= param.FileSize[param.Fileparam.FileIndex-1] {
   356  		return nil, nil
   357  	}
   358  	err = fs.Read(param.Ctx, &vec)
   359  	if err != nil {
   360  		return nil, err
   361  	}
   362  	return r, nil
   363  }
   364  
   365  // TODO : merge below two functions
   366  func ReadFileOffsetNoStrict(param *tree.ExternParam, mcpu int, fileSize int64) ([]int64, error) {
   367  	arr := make([]int64, 0)
   368  
   369  	fs, readPath, err := plan2.GetForETLWithType(param, param.Filepath)
   370  	if err != nil {
   371  		return nil, err
   372  	}
   373  	var r io.ReadCloser
   374  	vec := fileservice.IOVector{
   375  		FilePath: readPath,
   376  		Entries: []fileservice.IOEntry{
   377  			0: {
   378  				Offset:            0,
   379  				Size:              -1,
   380  				ReadCloserForRead: &r,
   381  			},
   382  		},
   383  	}
   384  	var tailSize []int64
   385  	var offset []int64
   386  	for i := 0; i < mcpu; i++ {
   387  		vec.Entries[0].Offset = int64(i) * (fileSize / int64(mcpu))
   388  		if err = fs.Read(param.Ctx, &vec); err != nil {
   389  			return nil, err
   390  		}
   391  		r2 := bufio.NewReader(r)
   392  		line, _ := r2.ReadString('\n')
   393  		tailSize = append(tailSize, int64(len(line)))
   394  		offset = append(offset, vec.Entries[0].Offset)
   395  	}
   396  
   397  	start := int64(0)
   398  	for i := 0; i < mcpu; i++ {
   399  		if i+1 < mcpu {
   400  			arr = append(arr, start)
   401  			arr = append(arr, offset[i+1]+tailSize[i+1])
   402  			start = offset[i+1] + tailSize[i+1]
   403  		} else {
   404  			arr = append(arr, start)
   405  			arr = append(arr, -1)
   406  		}
   407  	}
   408  	return arr, nil
   409  }
   410  
   411  func ReadFileOffsetStrict(param *tree.ExternParam, mcpu int, fileSize int64, visibleCols []*plan.ColDef) ([]int64, error) {
   412  	arr := make([]int64, 0)
   413  
   414  	fs, readPath, err := plan2.GetForETLWithType(param, param.Filepath)
   415  	if err != nil {
   416  		return nil, err
   417  	}
   418  	var r io.ReadCloser
   419  	vec := fileservice.IOVector{
   420  		FilePath: readPath,
   421  		Entries: []fileservice.IOEntry{
   422  			0: {
   423  				Offset:            0,
   424  				Size:              -1,
   425  				ReadCloserForRead: &r,
   426  			},
   427  		},
   428  	}
   429  
   430  	var offset []int64
   431  	batchSize := fileSize / int64(mcpu)
   432  
   433  	offset = append(offset, 0)
   434  
   435  	for i := 1; i < mcpu; i++ {
   436  		vec.Entries[0].Offset = offset[i-1] + batchSize
   437  		if vec.Entries[0].Offset >= fileSize {
   438  			break
   439  		}
   440  		if err = fs.Read(param.Ctx, &vec); err != nil {
   441  			return nil, err
   442  		}
   443  		tailSize, err := getTailSize(param, visibleCols, r)
   444  		if err != nil {
   445  			break
   446  		}
   447  		offset = append(offset, vec.Entries[0].Offset+tailSize)
   448  	}
   449  
   450  	for i := 0; i < len(offset); i++ {
   451  		if i+1 < len(offset) {
   452  			arr = append(arr, offset[i])
   453  			arr = append(arr, offset[i+1])
   454  		} else {
   455  			arr = append(arr, offset[i])
   456  			arr = append(arr, -1)
   457  		}
   458  	}
   459  	return arr, nil
   460  }
   461  
   462  func getTailSize(param *tree.ExternParam, cols []*plan.ColDef, r io.ReadCloser) (int64, error) {
   463  	bufR := bufio.NewReader(r)
   464  	// ensure the first character is not field quote symbol
   465  	quoteByte := byte('"')
   466  	if param.Tail.Fields != nil {
   467  		if enclosed := param.Tail.Fields.EnclosedBy; enclosed != nil && enclosed.Value != 0 {
   468  			quoteByte = enclosed.Value
   469  		}
   470  	}
   471  	skipCount := int64(0)
   472  	for {
   473  		ch, err := bufR.ReadByte()
   474  		if err != nil {
   475  			return 0, err
   476  		}
   477  		if ch != quoteByte {
   478  			err = bufR.UnreadByte()
   479  			if err != nil {
   480  				return 0, err
   481  			}
   482  			break
   483  		}
   484  		skipCount++
   485  	}
   486  	csvReader, err := newReaderWithParam(&ExternalParam{
   487  		ExParamConst: ExParamConst{Extern: param},
   488  		ExParam:      ExParam{reader: io.NopCloser(bufR)},
   489  	}, true)
   490  	if err != nil {
   491  		return 0, err
   492  	}
   493  	var fields []csvparser.Field
   494  	for {
   495  		fields, err = csvReader.Read()
   496  		if err != nil {
   497  			return 0, err
   498  		}
   499  		if len(fields) < len(cols) {
   500  			continue
   501  		}
   502  		if isLegalLine(param, cols, fields) {
   503  			return csvReader.Pos() + skipCount, nil
   504  		}
   505  	}
   506  }
   507  
   508  func isLegalLine(param *tree.ExternParam, cols []*plan.ColDef, fields []csvparser.Field) bool {
   509  	for idx, col := range cols {
   510  		field := fields[idx]
   511  		id := types.T(col.Typ.Id)
   512  		if id != types.T_char && id != types.T_varchar && id != types.T_json &&
   513  			id != types.T_binary && id != types.T_varbinary && id != types.T_blob && id != types.T_text {
   514  			field.Val = strings.TrimSpace(field.Val)
   515  		}
   516  		isNullOrEmpty := field.IsNull || (getNullFlag(param.NullMap, col.Name, field.Val))
   517  		if id != types.T_char && id != types.T_varchar &&
   518  			id != types.T_binary && id != types.T_varbinary && id != types.T_json && id != types.T_blob && id != types.T_text {
   519  			isNullOrEmpty = isNullOrEmpty || len(field.Val) == 0
   520  		}
   521  		if isNullOrEmpty {
   522  			continue
   523  		}
   524  		switch id {
   525  		case types.T_bool:
   526  			_, err := types.ParseBool(field.Val)
   527  			if err != nil {
   528  				return false
   529  			}
   530  		case types.T_bit:
   531  			if len(field.Val) > 8 {
   532  				return false
   533  			}
   534  			width := col.Typ.Width
   535  			var val uint64
   536  			for i := 0; i < len(field.Val); i++ {
   537  				val = (val << 8) | uint64(field.Val[i])
   538  			}
   539  			if val > uint64(1<<width-1) {
   540  				return false
   541  			}
   542  		case types.T_int8:
   543  			_, err := strconv.ParseInt(field.Val, 10, 8)
   544  			if err != nil {
   545  				if errors.Is(err, strconv.ErrRange) {
   546  					return false
   547  				}
   548  				f, err := strconv.ParseFloat(field.Val, 64)
   549  				if err != nil || f < math.MinInt8 || f > math.MaxInt8 {
   550  					return false
   551  				}
   552  			}
   553  		case types.T_int16:
   554  			_, err := strconv.ParseInt(field.Val, 10, 16)
   555  			if err != nil {
   556  				if errors.Is(err, strconv.ErrRange) {
   557  					return false
   558  				}
   559  				f, err := strconv.ParseFloat(field.Val, 64)
   560  				if err != nil || f < math.MinInt16 || f > math.MaxInt16 {
   561  					return false
   562  				}
   563  			}
   564  		case types.T_int32:
   565  			_, err := strconv.ParseInt(field.Val, 10, 32)
   566  			if err != nil {
   567  				if errors.Is(err, strconv.ErrRange) {
   568  					return false
   569  				}
   570  				f, err := strconv.ParseFloat(field.Val, 64)
   571  				if err != nil || f < math.MinInt32 || f > math.MaxInt32 {
   572  					return false
   573  				}
   574  			}
   575  		case types.T_int64:
   576  			_, err := strconv.ParseInt(field.Val, 10, 64)
   577  			if err != nil {
   578  				if errors.Is(err, strconv.ErrRange) {
   579  					return false
   580  				}
   581  				f, err := strconv.ParseFloat(field.Val, 64)
   582  				if err != nil || f < math.MinInt64 || f > math.MaxInt64 {
   583  					return false
   584  				}
   585  			}
   586  		case types.T_uint8:
   587  			_, err := strconv.ParseUint(field.Val, 10, 8)
   588  			if err != nil {
   589  				if errors.Is(err, strconv.ErrRange) {
   590  					return false
   591  				}
   592  				f, err := strconv.ParseFloat(field.Val, 64)
   593  				if err != nil || f < 0 || f > math.MaxUint8 {
   594  					return false
   595  				}
   596  			}
   597  		case types.T_uint16:
   598  			_, err := strconv.ParseUint(field.Val, 10, 16)
   599  			if err != nil {
   600  				if errors.Is(err, strconv.ErrRange) {
   601  					return false
   602  				}
   603  				f, err := strconv.ParseFloat(field.Val, 64)
   604  				if err != nil || f < 0 || f > math.MaxUint16 {
   605  					return false
   606  				}
   607  			}
   608  		case types.T_uint32:
   609  			_, err := strconv.ParseUint(field.Val, 10, 32)
   610  			if err != nil {
   611  				if errors.Is(err, strconv.ErrRange) {
   612  					return false
   613  				}
   614  				f, err := strconv.ParseFloat(field.Val, 64)
   615  				if err != nil || f < 0 || f > math.MaxUint32 {
   616  					return false
   617  				}
   618  			}
   619  		case types.T_uint64:
   620  			_, err := strconv.ParseUint(field.Val, 10, 64)
   621  			if err != nil {
   622  				if errors.Is(err, strconv.ErrRange) {
   623  					return false
   624  				}
   625  				f, err := strconv.ParseFloat(field.Val, 64)
   626  				if err != nil || f < 0 || f > math.MaxUint64 {
   627  					return false
   628  				}
   629  			}
   630  		case types.T_float32:
   631  			// origin float32 data type
   632  			if col.Typ.Scale < 0 || col.Typ.Width == 0 {
   633  				_, err := strconv.ParseFloat(field.Val, 32)
   634  				if err != nil {
   635  					return false
   636  				}
   637  			} else {
   638  				_, err := types.ParseDecimal128(field.Val, col.Typ.Width, col.Typ.Scale)
   639  				if err != nil {
   640  					return false
   641  				}
   642  			}
   643  		case types.T_float64:
   644  			// origin float64 data type
   645  			if col.Typ.Scale < 0 || col.Typ.Width == 0 {
   646  				_, err := strconv.ParseFloat(field.Val, 64)
   647  				if err != nil {
   648  					return false
   649  				}
   650  			} else {
   651  				_, err := types.ParseDecimal128(field.Val, col.Typ.Width, col.Typ.Scale)
   652  				if err != nil {
   653  					return false
   654  				}
   655  
   656  			}
   657  		case types.T_char, types.T_varchar, types.T_binary, types.T_varbinary, types.T_blob, types.T_text:
   658  			continue
   659  		case types.T_array_float32:
   660  			_, err := types.StringToArrayToBytes[float32](field.Val)
   661  			if err != nil {
   662  				return false
   663  			}
   664  		case types.T_array_float64:
   665  			_, err := types.StringToArrayToBytes[float64](field.Val)
   666  			if err != nil {
   667  				return false
   668  			}
   669  		case types.T_json:
   670  			if param.Format == tree.CSV {
   671  				field.Val = fmt.Sprintf("%v", strings.Trim(field.Val, "\""))
   672  				byteJson, err := types.ParseStringToByteJson(field.Val)
   673  				if err != nil {
   674  					return false
   675  				}
   676  				_, err = types.EncodeJson(byteJson)
   677  				if err != nil {
   678  					return false
   679  				}
   680  			}
   681  		case types.T_date:
   682  			_, err := types.ParseDateCast(field.Val)
   683  			if err != nil {
   684  				return false
   685  			}
   686  		case types.T_time:
   687  			_, err := types.ParseTime(field.Val, col.Typ.Scale)
   688  			if err != nil {
   689  				return false
   690  			}
   691  		case types.T_datetime:
   692  			_, err := types.ParseDatetime(field.Val, col.Typ.Scale)
   693  			if err != nil {
   694  				return false
   695  			}
   696  		case types.T_enum:
   697  			_, err := strconv.ParseUint(field.Val, 10, 16)
   698  			if err == nil {
   699  				continue
   700  			} else if errors.Is(err, strconv.ErrSyntax) {
   701  				_, err := types.ParseEnum(col.Typ.Enumvalues, field.Val)
   702  				if err != nil {
   703  					return false
   704  				}
   705  			} else {
   706  				if errors.Is(err, strconv.ErrRange) {
   707  					return false
   708  				}
   709  				f, err := strconv.ParseFloat(field.Val, 64)
   710  				if err != nil || f < 0 || f > math.MaxUint16 {
   711  					return false
   712  				}
   713  			}
   714  		case types.T_decimal64:
   715  			_, err := types.ParseDecimal64(field.Val, col.Typ.Width, col.Typ.Scale)
   716  			if err != nil {
   717  				// we tolerate loss of digits.
   718  				if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) {
   719  					return false
   720  				}
   721  			}
   722  		case types.T_decimal128:
   723  			_, err := types.ParseDecimal128(field.Val, col.Typ.Width, col.Typ.Scale)
   724  			if err != nil {
   725  				// we tolerate loss of digits.
   726  				if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) {
   727  					return false
   728  				}
   729  			}
   730  		case types.T_timestamp:
   731  			t := time.Local
   732  			_, err := types.ParseTimestamp(t, field.Val, col.Typ.Scale)
   733  			if err != nil {
   734  				return false
   735  			}
   736  		case types.T_uuid:
   737  			_, err := types.ParseUuid(field.Val)
   738  			if err != nil {
   739  				return false
   740  			}
   741  		default:
   742  			return false
   743  		}
   744  	}
   745  	return true
   746  }
   747  
   748  func GetCompressType(param *tree.ExternParam, filepath string) string {
   749  	if param.CompressType != "" && param.CompressType != tree.AUTO {
   750  		return param.CompressType
   751  	}
   752  
   753  	filepath = strings.ToLower(filepath)
   754  
   755  	switch {
   756  	case strings.HasSuffix(filepath, ".tar.gz") || strings.HasSuffix(filepath, ".tar.gzip"):
   757  		return tree.TAR_GZ
   758  	case strings.HasSuffix(filepath, ".tar.bz2") || strings.HasSuffix(filepath, ".tar.bzip2"):
   759  		return tree.TAR_BZ2
   760  	case strings.HasSuffix(filepath, ".gz") || strings.HasSuffix(filepath, ".gzip"):
   761  		return tree.GZIP
   762  	case strings.HasSuffix(filepath, ".bz2") || strings.HasSuffix(filepath, ".bzip2"):
   763  		return tree.BZIP2
   764  	case strings.HasSuffix(filepath, ".lz4"):
   765  		return tree.LZ4
   766  	default:
   767  		return tree.NOCOMPRESS
   768  	}
   769  }
   770  
   771  func getUnCompressReader(param *tree.ExternParam, filepath string, r io.ReadCloser) (io.ReadCloser, error) {
   772  	switch strings.ToLower(GetCompressType(param, filepath)) {
   773  	case tree.NOCOMPRESS:
   774  		return r, nil
   775  	case tree.GZIP, tree.GZ:
   776  		return gzip.NewReader(r)
   777  	case tree.BZIP2, tree.BZ2:
   778  		return io.NopCloser(bzip2.NewReader(r)), nil
   779  	case tree.FLATE:
   780  		return flate.NewReader(r), nil
   781  	case tree.ZLIB:
   782  		return zlib.NewReader(r)
   783  	case tree.LZ4:
   784  		return io.NopCloser(lz4.NewReader(r)), nil
   785  	case tree.LZW:
   786  		return nil, moerr.NewInternalError(param.Ctx, "the compress type '%s' is not support now", param.CompressType)
   787  	case tree.TAR_GZ:
   788  		gzipReader, err := gzip.NewReader(r)
   789  		if err != nil {
   790  			return nil, err
   791  		}
   792  		return getTarReader(param.Ctx, gzipReader)
   793  	case tree.TAR_BZ2:
   794  		return getTarReader(param.Ctx, bzip2.NewReader(r))
   795  	default:
   796  		return nil, moerr.NewInternalError(param.Ctx, "the compress type '%s' is not support now", param.CompressType)
   797  	}
   798  }
   799  
   800  func getTarReader(ctx context.Context, r io.Reader) (io.ReadCloser, error) {
   801  	tarReader := tar.NewReader(r)
   802  	// move to first file
   803  	for {
   804  		header, err := tarReader.Next()
   805  		if err == io.EOF {
   806  			return nil, moerr.NewInternalError(ctx, "failed to decompress the file, no available files found")
   807  		}
   808  		if err != nil {
   809  			return nil, err
   810  		}
   811  		if !header.FileInfo().IsDir() && !strings.HasPrefix(header.FileInfo().Name(), ".") {
   812  			break
   813  		}
   814  	}
   815  	return io.NopCloser(tarReader), nil
   816  }
   817  
   818  func makeType(typ *plan.Type, flag bool) types.Type {
   819  	if flag {
   820  		return types.New(types.T_varchar, 0, 0)
   821  	}
   822  	return types.New(types.T(typ.Id), typ.Width, typ.Scale)
   823  }
   824  
   825  func makeBatch(param *ExternalParam, batchSize int, proc *process.Process) (bat *batch.Batch, err error) {
   826  	bat = batch.New(false, param.Attrs)
   827  	//alloc space for vector
   828  	for i := range param.Attrs {
   829  		typ := makeType(&param.Cols[i].Typ, param.ParallelLoad)
   830  		bat.Vecs[i] = proc.GetVector(typ)
   831  	}
   832  	if err = bat.PreExtend(proc.GetMPool(), batchSize); err != nil {
   833  		bat.Clean(proc.GetMPool())
   834  		return nil, err
   835  	}
   836  	for i := range bat.Vecs {
   837  		bat.Vecs[i].SetLength(batchSize)
   838  	}
   839  	return bat, nil
   840  }
   841  
   842  func getRealAttrCnt(attrs []string, cols []*plan.ColDef) int {
   843  	cnt := 0
   844  	for i := 0; i < len(attrs); i++ {
   845  		if catalog.ContainExternalHidenCol(attrs[i]) || cols[i].Hidden {
   846  			cnt++
   847  		}
   848  	}
   849  	return len(attrs) - cnt
   850  }
   851  
   852  func getBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Process) (*batch.Batch, error) {
   853  	bat, err := makeBatch(param, plh.batchSize, proc)
   854  	if err != nil {
   855  		return nil, err
   856  	}
   857  
   858  	unexpectEOF := false
   859  	for rowIdx := 0; rowIdx < plh.batchSize; rowIdx++ {
   860  		line := plh.moCsvLineArray[rowIdx]
   861  		if param.Extern.Format == tree.JSONLINE {
   862  			line, err = transJson2Lines(proc.Ctx, line[0].Val, param.Attrs, param.Cols, param.Extern.JsonData, param)
   863  			if err != nil {
   864  				if errors.Is(err, io.ErrUnexpectedEOF) {
   865  					logutil.Infof("unexpected EOF, wait for next batch")
   866  					unexpectEOF = true
   867  					continue
   868  				}
   869  				return nil, err
   870  			}
   871  			plh.moCsvLineArray[rowIdx] = line
   872  		}
   873  		if param.ClusterTable != nil && param.ClusterTable.GetIsClusterTable() {
   874  			//the column account_id of the cluster table do need to be filled here
   875  			if len(line)+1 < getRealAttrCnt(param.Attrs, param.Cols) {
   876  				return nil, moerr.NewInternalError(proc.Ctx, ColumnCntLargerErrorInfo)
   877  			}
   878  		} else {
   879  			if !param.Extern.SysTable && len(line) < getRealAttrCnt(param.Attrs, param.Cols) {
   880  				return nil, moerr.NewInternalError(proc.Ctx, ColumnCntLargerErrorInfo)
   881  			}
   882  		}
   883  		err = getOneRowData(bat, line, rowIdx, param, proc.GetMPool())
   884  		if err != nil {
   885  			return nil, err
   886  		}
   887  	}
   888  
   889  	n := bat.Vecs[0].Length()
   890  	if unexpectEOF && n > 0 {
   891  		n--
   892  		for i := 0; i < bat.VectorCount(); i++ {
   893  			vec := bat.GetVector(int32(i))
   894  			vec.SetLength(n)
   895  		}
   896  	}
   897  	bat.SetRowCount(n)
   898  	return bat, nil
   899  }
   900  
   901  // getMOCSVReader get file reader from external file
   902  func getMOCSVReader(param *ExternalParam, proc *process.Process) (*ParseLineHandler, error) {
   903  	var err error
   904  	param.reader, err = readFile(param, proc)
   905  	if err != nil || param.reader == nil {
   906  		return nil, err
   907  	}
   908  	param.reader, err = getUnCompressReader(param.Extern, param.Fileparam.Filepath, param.reader)
   909  	if err != nil {
   910  		return nil, err
   911  	}
   912  
   913  	csvReader, err := newReaderWithParam(param, false)
   914  	if err != nil {
   915  		return nil, err
   916  	}
   917  	plh := &ParseLineHandler{
   918  		csvReader:      csvReader,
   919  		moCsvLineArray: param.MoCsvLineArray,
   920  	}
   921  	return plh, nil
   922  }
   923  
   924  func scanCsvFile(ctx context.Context, param *ExternalParam, proc *process.Process) (*batch.Batch, error) {
   925  	var bat *batch.Batch
   926  	var err error
   927  	var cnt int
   928  	_, span := trace.Start(ctx, "scanCsvFile")
   929  	defer span.End()
   930  	if param.plh == nil {
   931  		param.IgnoreLine = param.IgnoreLineTag
   932  		param.plh, err = getMOCSVReader(param, proc)
   933  		if err != nil || param.plh == nil {
   934  			return nil, err
   935  		}
   936  	}
   937  	plh := param.plh
   938  	finish := false
   939  	cnt, finish, err = readCountStringLimitSize(plh.csvReader, proc.Ctx, param.maxBatchSize, plh.moCsvLineArray)
   940  	if err != nil {
   941  		logutil.Errorf("read external file meet error: %s", err.Error())
   942  		return nil, err
   943  	}
   944  
   945  	if finish {
   946  		err := param.reader.Close()
   947  		if err != nil {
   948  			logutil.Errorf("close file failed. err:%v", err)
   949  		}
   950  		param.plh = nil
   951  		param.Fileparam.FileFin++
   952  		if param.Fileparam.FileFin >= param.Fileparam.FileCnt {
   953  			param.Fileparam.End = true
   954  		}
   955  	}
   956  	if param.IgnoreLine != 0 {
   957  		if !param.Extern.Parallel || param.FileOffset[0] == 0 {
   958  			if cnt >= param.IgnoreLine {
   959  				plh.moCsvLineArray = plh.moCsvLineArray[param.IgnoreLine:cnt]
   960  				cnt -= param.IgnoreLine
   961  				plh.moCsvLineArray = append(plh.moCsvLineArray, make([]csvparser.Field, param.IgnoreLine))
   962  			} else {
   963  				plh.moCsvLineArray = nil
   964  				cnt = 0
   965  			}
   966  			param.IgnoreLine = 0
   967  		}
   968  	}
   969  	plh.batchSize = cnt
   970  	bat, err = getBatchData(param, plh, proc)
   971  	if err != nil {
   972  		return nil, err
   973  	}
   974  	return bat, nil
   975  }
   976  
   977  func getBatchFromZonemapFile(ctx context.Context, param *ExternalParam, proc *process.Process, objectReader *blockio.BlockReader) (bat *batch.Batch, err error) {
   978  	var tmpBat *batch.Batch
   979  	var vecTmp *vector.Vector
   980  	var release func()
   981  	mp := proc.Mp()
   982  
   983  	ctx, span := trace.Start(ctx, "getBatchFromZonemapFile")
   984  	defer func() {
   985  		span.End()
   986  		if tmpBat != nil {
   987  			for i, v := range tmpBat.Vecs {
   988  				if v == vecTmp {
   989  					tmpBat.Vecs[i] = nil
   990  				}
   991  			}
   992  			tmpBat.Clean(mp)
   993  		}
   994  		if vecTmp != nil {
   995  			vecTmp.Free(mp)
   996  		}
   997  		if release != nil {
   998  			release()
   999  		}
  1000  		if err != nil && bat != nil {
  1001  			bat.Clean(mp)
  1002  		}
  1003  	}()
  1004  
  1005  	bat, err = makeBatch(param, 0, proc)
  1006  	if err != nil {
  1007  		return nil, err
  1008  	}
  1009  	if param.Zoneparam.offset >= len(param.Zoneparam.bs) {
  1010  		return bat, nil
  1011  	}
  1012  
  1013  	rows := 0
  1014  
  1015  	idxs := make([]uint16, len(param.Attrs))
  1016  	meta := param.Zoneparam.bs[param.Zoneparam.offset].GetMeta()
  1017  	colCnt := meta.BlockHeader().ColumnCount()
  1018  	for i := 0; i < len(param.Attrs); i++ {
  1019  		idxs[i] = uint16(param.Name2ColIndex[param.Attrs[i]])
  1020  		if param.Extern.SysTable && idxs[i] >= colCnt {
  1021  			idxs[i] = 0
  1022  		}
  1023  	}
  1024  
  1025  	tmpBat, release, err = objectReader.LoadColumns(ctx, idxs, nil, param.Zoneparam.bs[param.Zoneparam.offset].BlockHeader().BlockID().Sequence(), mp)
  1026  	if err != nil {
  1027  		return nil, err
  1028  	}
  1029  	filepathBytes := []byte(param.Fileparam.Filepath)
  1030  
  1031  	var sels []int32
  1032  	for i := 0; i < len(param.Attrs); i++ {
  1033  		if param.Extern.SysTable && uint16(param.Name2ColIndex[param.Attrs[i]]) >= colCnt {
  1034  			vecTmp, err = proc.AllocVectorOfRows(makeType(&param.Cols[i].Typ, false), rows, nil)
  1035  			if err != nil {
  1036  				return nil, err
  1037  			}
  1038  			for j := 0; j < rows; j++ {
  1039  				nulls.Add(vecTmp.GetNulls(), uint64(j))
  1040  			}
  1041  		} else if catalog.ContainExternalHidenCol(param.Attrs[i]) {
  1042  			if rows == 0 {
  1043  				rows = tmpBat.Vecs[i].Length()
  1044  			}
  1045  			vecTmp, err = proc.AllocVectorOfRows(makeType(&param.Cols[i].Typ, false), rows, nil)
  1046  			if err != nil {
  1047  				return nil, err
  1048  			}
  1049  			for j := 0; j < rows; j++ {
  1050  				if err = vector.SetBytesAt(vecTmp, j, filepathBytes, mp); err != nil {
  1051  					return nil, err
  1052  				}
  1053  			}
  1054  		} else {
  1055  			vecTmp = tmpBat.Vecs[i]
  1056  			rows = vecTmp.Length()
  1057  		}
  1058  		if cap(sels) >= vecTmp.Length() {
  1059  			sels = sels[:vecTmp.Length()]
  1060  		} else {
  1061  			sels = make([]int32, vecTmp.Length())
  1062  
  1063  			for j, k := int32(0), int32(len(sels)); j < k; j++ {
  1064  				sels[j] = j
  1065  			}
  1066  		}
  1067  
  1068  		if err = bat.Vecs[i].Union(vecTmp, sels, proc.GetMPool()); err != nil {
  1069  			return nil, err
  1070  		}
  1071  	}
  1072  
  1073  	n := bat.Vecs[0].Length()
  1074  	bat.SetRowCount(n)
  1075  	return bat, nil
  1076  }
  1077  
  1078  func needRead(ctx context.Context, param *ExternalParam, proc *process.Process) bool {
  1079  	_, span := trace.Start(ctx, "needRead")
  1080  	defer span.End()
  1081  
  1082  	expr := param.Filter.FilterExpr
  1083  	if expr == nil {
  1084  		return true
  1085  	}
  1086  	if param.Zoneparam.offset >= len(param.Zoneparam.bs) {
  1087  		return true
  1088  	}
  1089  
  1090  	notReportErrCtx := errutil.ContextWithNoReport(proc.Ctx, true)
  1091  
  1092  	meta := param.Zoneparam.bs[param.Zoneparam.offset]
  1093  	columnMap := param.Filter.columnMap
  1094  	var (
  1095  		zms  []objectio.ZoneMap
  1096  		vecs []*vector.Vector
  1097  	)
  1098  
  1099  	if isMonoExpr := plan2.ExprIsZonemappable(proc.Ctx, expr); isMonoExpr {
  1100  		cnt := plan2.AssignAuxIdForExpr(expr, 0)
  1101  		zms = make([]objectio.ZoneMap, cnt)
  1102  		vecs = make([]*vector.Vector, cnt)
  1103  	}
  1104  
  1105  	return colexec.EvaluateFilterByZoneMap(
  1106  		notReportErrCtx, proc, expr, meta, columnMap, zms, vecs)
  1107  }
  1108  
  1109  func getZonemapBatch(ctx context.Context, param *ExternalParam, proc *process.Process, objectReader *blockio.BlockReader) (*batch.Batch, error) {
  1110  	var err error
  1111  	param.Zoneparam.bs, err = objectReader.LoadAllBlocks(param.Ctx, proc.GetMPool())
  1112  	if err != nil {
  1113  		return nil, err
  1114  	}
  1115  	if param.Zoneparam.offset >= len(param.Zoneparam.bs) {
  1116  		return makeBatch(param, 0, proc)
  1117  	}
  1118  
  1119  	if param.Filter.zonemappable {
  1120  		for !needRead(ctx, param, proc) {
  1121  			param.Zoneparam.offset++
  1122  		}
  1123  	}
  1124  	return getBatchFromZonemapFile(ctx, param, proc, objectReader)
  1125  }
  1126  
  1127  func scanZonemapFile(ctx context.Context, param *ExternalParam, proc *process.Process) (*batch.Batch, error) {
  1128  	var err error
  1129  	param.Filter.blockReader, err = blockio.NewFileReader(param.Extern.FileService, param.Fileparam.Filepath)
  1130  	if err != nil {
  1131  		return nil, err
  1132  	}
  1133  
  1134  	bat, err := getZonemapBatch(ctx, param, proc, param.Filter.blockReader)
  1135  	if err != nil {
  1136  		return nil, err
  1137  	}
  1138  
  1139  	if param.Zoneparam.offset >= len(param.Zoneparam.bs) {
  1140  		param.Filter.blockReader = nil
  1141  		param.Zoneparam.bs = nil
  1142  		param.plh = nil
  1143  		param.Fileparam.FileFin++
  1144  		if param.Fileparam.FileFin >= param.Fileparam.FileCnt {
  1145  			param.Fileparam.End = true
  1146  		}
  1147  		param.Zoneparam.offset = 0
  1148  	}
  1149  	return bat, nil
  1150  }
  1151  
  1152  // scanFileData read batch data from external file
  1153  func scanFileData(ctx context.Context, param *ExternalParam, proc *process.Process) (*batch.Batch, error) {
  1154  	if param.Extern.QueryResult {
  1155  		return scanZonemapFile(ctx, param, proc)
  1156  	}
  1157  	if param.Extern.Format == tree.PARQUET {
  1158  		return scanParquetFile(ctx, param, proc)
  1159  	}
  1160  	return scanCsvFile(ctx, param, proc)
  1161  }
  1162  
  1163  func transJson2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, jsonData string, param *ExternalParam) ([]csvparser.Field, error) {
  1164  	switch jsonData {
  1165  	case tree.OBJECT:
  1166  		return transJsonObject2Lines(ctx, str, attrs, cols, param)
  1167  	case tree.ARRAY:
  1168  		return transJsonArray2Lines(ctx, str, attrs, cols, param)
  1169  	default:
  1170  		return nil, moerr.NewNotSupported(ctx, "the jsonline format '%s' is not support now", jsonData)
  1171  	}
  1172  }
  1173  
  1174  const JsonNull = "\\N"
  1175  
  1176  func transJsonObject2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, param *ExternalParam) ([]csvparser.Field, error) {
  1177  	var (
  1178  		err error
  1179  		res = make([]csvparser.Field, 0, len(attrs))
  1180  	)
  1181  	if param.prevStr != "" {
  1182  		str = param.prevStr + str
  1183  		param.prevStr = ""
  1184  	}
  1185  	var jsonMap map[string]interface{}
  1186  	var decoder = json.NewDecoder(bytes.NewReader([]byte(str)))
  1187  	decoder.UseNumber()
  1188  	err = decoder.Decode(&jsonMap)
  1189  	if err != nil {
  1190  		logutil.Errorf("json unmarshal err:%v", err)
  1191  		param.prevStr = str
  1192  		return nil, err
  1193  	}
  1194  	if len(jsonMap) < getRealAttrCnt(attrs, cols) {
  1195  		return nil, moerr.NewInternalError(ctx, ColumnCntLargerErrorInfo)
  1196  	}
  1197  	for idx, attr := range attrs {
  1198  		if cols[idx].Hidden {
  1199  			continue
  1200  		}
  1201  		if val, ok := jsonMap[attr]; ok {
  1202  			if val == nil {
  1203  				res = append(res, csvparser.Field{IsNull: true})
  1204  				continue
  1205  			}
  1206  			tp := cols[idx].Typ.Id
  1207  			if tp != int32(types.T_json) {
  1208  				val = fmt.Sprintf("%v", val)
  1209  				res = append(res, csvparser.Field{Val: fmt.Sprintf("%v", val), IsNull: val == JsonNull})
  1210  				continue
  1211  			}
  1212  			var bj bytejson.ByteJson
  1213  			err = bj.UnmarshalObject(val)
  1214  			if err != nil {
  1215  				return nil, err
  1216  			}
  1217  			dt, err := bj.Marshal()
  1218  			if err != nil {
  1219  				return nil, err
  1220  			}
  1221  			res = append(res, csvparser.Field{Val: string(dt)})
  1222  		} else {
  1223  			return nil, moerr.NewInvalidInput(ctx, "the attr %s is not in json", attr)
  1224  		}
  1225  	}
  1226  	return res, nil
  1227  }
  1228  
  1229  func transJsonArray2Lines(ctx context.Context, str string, attrs []string, cols []*plan.ColDef, param *ExternalParam) ([]csvparser.Field, error) {
  1230  	var (
  1231  		err error
  1232  		res = make([]csvparser.Field, 0, len(attrs))
  1233  	)
  1234  	if param.prevStr != "" {
  1235  		str = param.prevStr + str
  1236  		param.prevStr = ""
  1237  	}
  1238  	var jsonArray []interface{}
  1239  	var decoder = json.NewDecoder(bytes.NewReader([]byte(str)))
  1240  	decoder.UseNumber()
  1241  	err = decoder.Decode(&jsonArray)
  1242  	if err != nil {
  1243  		param.prevStr = str
  1244  		return nil, err
  1245  	}
  1246  	if len(jsonArray) < getRealAttrCnt(attrs, cols) {
  1247  		return nil, moerr.NewInternalError(ctx, ColumnCntLargerErrorInfo)
  1248  	}
  1249  	for idx, val := range jsonArray {
  1250  		if val == nil {
  1251  			res = append(res, csvparser.Field{IsNull: true})
  1252  			continue
  1253  		}
  1254  		if idx >= len(cols) {
  1255  			return nil, moerr.NewInvalidInput(ctx, str+" , wrong number of colunms")
  1256  		}
  1257  		tp := cols[idx].Typ.Id
  1258  		if tp != int32(types.T_json) {
  1259  			val = fmt.Sprintf("%v", val)
  1260  			res = append(res, csvparser.Field{Val: fmt.Sprintf("%v", val), IsNull: val == JsonNull})
  1261  			continue
  1262  		}
  1263  		var bj bytejson.ByteJson
  1264  		err = bj.UnmarshalObject(val)
  1265  		if err != nil {
  1266  			return nil, err
  1267  		}
  1268  		dt, err := bj.Marshal()
  1269  		if err != nil {
  1270  			return nil, err
  1271  		}
  1272  		res = append(res, csvparser.Field{Val: string(dt)})
  1273  	}
  1274  	return res, nil
  1275  }
  1276  
  1277  func getNullFlag(nullMap map[string]([]string), attr, field string) bool {
  1278  	if nullMap == nil || len(nullMap[attr]) == 0 {
  1279  		return false
  1280  	}
  1281  	field = strings.ToLower(field)
  1282  	for _, v := range nullMap[attr] {
  1283  		if v == field {
  1284  			return true
  1285  		}
  1286  	}
  1287  	return false
  1288  }
  1289  
  1290  func getFieldFromLine(line []csvparser.Field, colIdx int, param *ExternalParam) csvparser.Field {
  1291  	if catalog.ContainExternalHidenCol(param.Attrs[colIdx]) {
  1292  		return csvparser.Field{Val: param.Fileparam.Filepath}
  1293  	}
  1294  	return line[param.Name2ColIndex[param.Attrs[colIdx]]]
  1295  }
  1296  
  1297  func getOneRowData(bat *batch.Batch, line []csvparser.Field, rowIdx int, param *ExternalParam, mp *mpool.MPool) error {
  1298  	var buf bytes.Buffer
  1299  	for colIdx := range param.Attrs {
  1300  		vec := bat.Vecs[colIdx]
  1301  		if param.Cols[colIdx].Hidden {
  1302  			nulls.Add(vec.GetNulls(), uint64(rowIdx))
  1303  			continue
  1304  		}
  1305  		field := getFieldFromLine(line, colIdx, param)
  1306  		id := types.T(param.Cols[colIdx].Typ.Id)
  1307  		if id != types.T_char && id != types.T_varchar && id != types.T_json &&
  1308  			id != types.T_binary && id != types.T_varbinary && id != types.T_blob && id != types.T_text {
  1309  			field.Val = strings.TrimSpace(field.Val)
  1310  		}
  1311  		isNullOrEmpty := field.IsNull || (getNullFlag(param.Extern.NullMap, param.Attrs[colIdx], field.Val))
  1312  		if id != types.T_char && id != types.T_varchar &&
  1313  			id != types.T_binary && id != types.T_varbinary && id != types.T_json && id != types.T_blob && id != types.T_text {
  1314  			isNullOrEmpty = isNullOrEmpty || len(field.Val) == 0
  1315  		}
  1316  		if isNullOrEmpty {
  1317  			nulls.Add(vec.GetNulls(), uint64(rowIdx))
  1318  			continue
  1319  		}
  1320  		if param.ParallelLoad {
  1321  			buf.WriteString(field.Val)
  1322  			bs := buf.Bytes()
  1323  			err := vector.SetBytesAt(vec, rowIdx, bs, mp)
  1324  			if err != nil {
  1325  				return err
  1326  			}
  1327  			buf.Reset()
  1328  			continue
  1329  		}
  1330  
  1331  		switch id {
  1332  		case types.T_bool:
  1333  			b, err := types.ParseBool(field.Val)
  1334  			if err != nil {
  1335  				return moerr.NewInternalError(param.Ctx, "the input value '%s' is not bool type for column %d", field.Val, colIdx)
  1336  			}
  1337  			if err := vector.SetFixedAt(vec, rowIdx, b); err != nil {
  1338  				return err
  1339  			}
  1340  		case types.T_bit:
  1341  			if len(field.Val) > 8 {
  1342  				return moerr.NewInternalError(param.Ctx, "data too long, len(val) = %v", len(field.Val))
  1343  			}
  1344  
  1345  			width := param.Cols[colIdx].Typ.Width
  1346  			var val uint64
  1347  			for i := 0; i < len(field.Val); i++ {
  1348  				val = (val << 8) | uint64(field.Val[i])
  1349  			}
  1350  			if val > uint64(1<<width-1) {
  1351  				return moerr.NewInternalError(param.Ctx, "data too long, type width = %d, val = %b", width, val)
  1352  			}
  1353  			if err := vector.SetFixedAt(vec, rowIdx, val); err != nil {
  1354  				return err
  1355  			}
  1356  			buf.Reset()
  1357  		case types.T_int8:
  1358  			d, err := strconv.ParseInt(field.Val, 10, 8)
  1359  			if err == nil {
  1360  				if err := vector.SetFixedAt(vec, rowIdx, int8(d)); err != nil {
  1361  					return err
  1362  				}
  1363  			} else {
  1364  				if errors.Is(err, strconv.ErrRange) {
  1365  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1366  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int8 type for column %d", field.Val, colIdx)
  1367  				}
  1368  				f, err := strconv.ParseFloat(field.Val, 64)
  1369  				if err != nil || f < math.MinInt8 || f > math.MaxInt8 {
  1370  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1371  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int8 type for column %d", field.Val, colIdx)
  1372  				}
  1373  				if err := vector.SetFixedAt(vec, rowIdx, int8(f)); err != nil {
  1374  					return err
  1375  				}
  1376  			}
  1377  		case types.T_int16:
  1378  			d, err := strconv.ParseInt(field.Val, 10, 16)
  1379  			if err == nil {
  1380  				if err := vector.SetFixedAt(vec, rowIdx, int16(d)); err != nil {
  1381  					return err
  1382  				}
  1383  			} else {
  1384  				if errors.Is(err, strconv.ErrRange) {
  1385  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1386  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int16 type for column %d", field.Val, colIdx)
  1387  				}
  1388  				f, err := strconv.ParseFloat(field.Val, 64)
  1389  				if err != nil || f < math.MinInt16 || f > math.MaxInt16 {
  1390  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1391  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int16 type for column %d", field.Val, colIdx)
  1392  				}
  1393  				if err := vector.SetFixedAt(vec, rowIdx, int16(f)); err != nil {
  1394  					return err
  1395  				}
  1396  			}
  1397  		case types.T_int32:
  1398  			d, err := strconv.ParseInt(field.Val, 10, 32)
  1399  			if err == nil {
  1400  				if err := vector.SetFixedAt(vec, rowIdx, int32(d)); err != nil {
  1401  					return err
  1402  				}
  1403  			} else {
  1404  				if errors.Is(err, strconv.ErrRange) {
  1405  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1406  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int32 type for column %d", field.Val, colIdx)
  1407  				}
  1408  				f, err := strconv.ParseFloat(field.Val, 64)
  1409  				if err != nil || f < math.MinInt32 || f > math.MaxInt32 {
  1410  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1411  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int32 type for column %d", field.Val, colIdx)
  1412  				}
  1413  				if err := vector.SetFixedAt(vec, rowIdx, int32(f)); err != nil {
  1414  					return err
  1415  				}
  1416  			}
  1417  		case types.T_int64:
  1418  			d, err := strconv.ParseInt(field.Val, 10, 64)
  1419  			if err == nil {
  1420  				if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1421  					return err
  1422  				}
  1423  			} else {
  1424  				if errors.Is(err, strconv.ErrRange) {
  1425  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1426  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int64 type for column %d", field.Val, colIdx)
  1427  				}
  1428  				f, err := strconv.ParseFloat(field.Val, 64)
  1429  				if err != nil || f < math.MinInt64 || f > math.MaxInt64 {
  1430  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1431  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not int64 type for column %d", field.Val, colIdx)
  1432  				}
  1433  				if err := vector.SetFixedAt(vec, rowIdx, int64(f)); err != nil {
  1434  					return err
  1435  				}
  1436  			}
  1437  		case types.T_uint8:
  1438  			d, err := strconv.ParseUint(field.Val, 10, 8)
  1439  			if err == nil {
  1440  				if err := vector.SetFixedAt(vec, rowIdx, uint8(d)); err != nil {
  1441  					return err
  1442  				}
  1443  			} else {
  1444  				if errors.Is(err, strconv.ErrRange) {
  1445  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1446  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint8 type for column %d", field.Val, colIdx)
  1447  				}
  1448  				f, err := strconv.ParseFloat(field.Val, 64)
  1449  				if err != nil || f < 0 || f > math.MaxUint8 {
  1450  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1451  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint8 type for column %d", field.Val, colIdx)
  1452  				}
  1453  				if err := vector.SetFixedAt(vec, rowIdx, uint8(f)); err != nil {
  1454  					return err
  1455  				}
  1456  			}
  1457  		case types.T_uint16:
  1458  			d, err := strconv.ParseUint(field.Val, 10, 16)
  1459  			if err == nil {
  1460  				if err := vector.SetFixedAt(vec, rowIdx, uint16(d)); err != nil {
  1461  					return err
  1462  				}
  1463  			} else {
  1464  				if errors.Is(err, strconv.ErrRange) {
  1465  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1466  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint16 type for column %d", field.Val, colIdx)
  1467  				}
  1468  				f, err := strconv.ParseFloat(field.Val, 64)
  1469  				if err != nil || f < 0 || f > math.MaxUint16 {
  1470  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1471  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint16 type for column %d", field.Val, colIdx)
  1472  				}
  1473  				if err := vector.SetFixedAt(vec, rowIdx, uint16(f)); err != nil {
  1474  					return err
  1475  				}
  1476  			}
  1477  		case types.T_uint32:
  1478  			d, err := strconv.ParseUint(field.Val, 10, 32)
  1479  			if err == nil {
  1480  				if err := vector.SetFixedAt(vec, rowIdx, uint32(d)); err != nil {
  1481  					return err
  1482  				}
  1483  			} else {
  1484  				if errors.Is(err, strconv.ErrRange) {
  1485  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1486  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint32 type for column %d", field.Val, colIdx)
  1487  				}
  1488  				f, err := strconv.ParseFloat(field.Val, 64)
  1489  				if err != nil || f < 0 || f > math.MaxUint32 {
  1490  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1491  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint32 type for column %d", field.Val, colIdx)
  1492  				}
  1493  				if err := vector.SetFixedAt(vec, rowIdx, uint32(f)); err != nil {
  1494  					return err
  1495  				}
  1496  			}
  1497  		case types.T_uint64:
  1498  			d, err := strconv.ParseUint(field.Val, 10, 64)
  1499  			if err == nil {
  1500  				if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1501  					return err
  1502  				}
  1503  			} else {
  1504  				if errors.Is(err, strconv.ErrRange) {
  1505  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1506  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint64 type for column %d", field.Val, colIdx)
  1507  				}
  1508  				f, err := strconv.ParseFloat(field.Val, 64)
  1509  				if err != nil || f < 0 || f > math.MaxUint64 {
  1510  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1511  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint64 type for column %d", field.Val, colIdx)
  1512  				}
  1513  				if err := vector.SetFixedAt(vec, rowIdx, uint64(f)); err != nil {
  1514  					return err
  1515  				}
  1516  			}
  1517  		case types.T_float32:
  1518  			// origin float32 data type
  1519  			if vec.GetType().Scale < 0 || vec.GetType().Width == 0 {
  1520  				d, err := strconv.ParseFloat(field.Val, 32)
  1521  				if err != nil {
  1522  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1523  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float32 type for column %d", field.Val, colIdx)
  1524  				}
  1525  				if err := vector.SetFixedAt(vec, rowIdx, float32(d)); err != nil {
  1526  					return err
  1527  				}
  1528  			} else {
  1529  				d, err := types.ParseDecimal128(field.Val, vec.GetType().Width, vec.GetType().Scale)
  1530  				if err != nil {
  1531  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1532  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float32 type for column %d", field.Val, colIdx)
  1533  				}
  1534  				if err := vector.SetFixedAt(vec, rowIdx, float32(types.Decimal128ToFloat64(d, vec.GetType().Scale))); err != nil {
  1535  					return err
  1536  				}
  1537  			}
  1538  		case types.T_float64:
  1539  			// origin float64 data type
  1540  			if vec.GetType().Scale < 0 || vec.GetType().Width == 0 {
  1541  				d, err := strconv.ParseFloat(field.Val, 64)
  1542  				if err != nil {
  1543  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1544  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float64 type for column %d", field.Val, colIdx)
  1545  				}
  1546  				if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1547  					return err
  1548  				}
  1549  			} else {
  1550  				d, err := types.ParseDecimal128(field.Val, vec.GetType().Width, vec.GetType().Scale)
  1551  				if err != nil {
  1552  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1553  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not float64 type for column %d", field.Val, colIdx)
  1554  				}
  1555  				if err := vector.SetFixedAt(vec, rowIdx, types.Decimal128ToFloat64(d, vec.GetType().Scale)); err != nil {
  1556  					return err
  1557  				}
  1558  			}
  1559  		case types.T_char, types.T_varchar, types.T_binary, types.T_varbinary, types.T_blob, types.T_text:
  1560  			// XXX Memory accounting?
  1561  			buf.WriteString(field.Val)
  1562  			bs := buf.Bytes()
  1563  			err := vector.SetBytesAt(vec, rowIdx, bs, mp)
  1564  			if err != nil {
  1565  				return err
  1566  			}
  1567  			buf.Reset()
  1568  		case types.T_array_float32:
  1569  			arrBytes, err := types.StringToArrayToBytes[float32](field.Val)
  1570  			if err != nil {
  1571  				return err
  1572  			}
  1573  			err = vector.SetBytesAt(vec, rowIdx, arrBytes, mp)
  1574  			if err != nil {
  1575  				return err
  1576  			}
  1577  			buf.Reset()
  1578  		case types.T_array_float64:
  1579  			arrBytes, err := types.StringToArrayToBytes[float64](field.Val)
  1580  			if err != nil {
  1581  				return err
  1582  			}
  1583  			err = vector.SetBytesAt(vec, rowIdx, arrBytes, mp)
  1584  			if err != nil {
  1585  				return err
  1586  			}
  1587  			buf.Reset()
  1588  		case types.T_json:
  1589  			var jsonBytes []byte
  1590  			if param.Extern.Format != tree.CSV {
  1591  				jsonBytes = []byte(field.Val)
  1592  			} else {
  1593  				field.Val = fmt.Sprintf("%v", strings.Trim(field.Val, "\""))
  1594  				byteJson, err := types.ParseStringToByteJson(field.Val)
  1595  				if err != nil {
  1596  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1597  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not json type for column %d", field.Val, colIdx)
  1598  				}
  1599  				jsonBytes, err = types.EncodeJson(byteJson)
  1600  				if err != nil {
  1601  					logutil.Errorf("encode json[%v] err:%v", field.Val, err)
  1602  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not json type for column %d", field.Val, colIdx)
  1603  				}
  1604  			}
  1605  
  1606  			err := vector.SetBytesAt(vec, rowIdx, jsonBytes, mp)
  1607  			if err != nil {
  1608  				return err
  1609  			}
  1610  		case types.T_date:
  1611  			d, err := types.ParseDateCast(field.Val)
  1612  			if err != nil {
  1613  				logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1614  				return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Date type for column %d", field.Val, colIdx)
  1615  			}
  1616  			if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1617  				return err
  1618  			}
  1619  		case types.T_time:
  1620  			d, err := types.ParseTime(field.Val, vec.GetType().Scale)
  1621  			if err != nil {
  1622  				logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1623  				return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Time type for column %d", field.Val, colIdx)
  1624  			}
  1625  			if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1626  				return err
  1627  			}
  1628  		case types.T_datetime:
  1629  			d, err := types.ParseDatetime(field.Val, vec.GetType().Scale)
  1630  			if err != nil {
  1631  				logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1632  				return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Datetime type for column %d", field.Val, colIdx)
  1633  			}
  1634  			if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1635  				return err
  1636  			}
  1637  		case types.T_enum:
  1638  			d, err := strconv.ParseUint(field.Val, 10, 16)
  1639  			if err == nil {
  1640  				if err := vector.SetFixedAt(vec, rowIdx, types.Enum(d)); err != nil {
  1641  					return err
  1642  				}
  1643  			} else if errors.Is(err, strconv.ErrSyntax) {
  1644  				v, err := types.ParseEnum(param.Cols[colIdx].Typ.Enumvalues, field.Val)
  1645  				if err != nil {
  1646  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1647  					return err
  1648  				}
  1649  				if err := vector.SetFixedAt(vec, rowIdx, types.Enum(v)); err != nil {
  1650  					return err
  1651  				}
  1652  			} else {
  1653  				if errors.Is(err, strconv.ErrRange) {
  1654  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1655  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint16 type for column %d", field.Val, colIdx)
  1656  				}
  1657  				f, err := strconv.ParseFloat(field.Val, 64)
  1658  				if err != nil || f < 0 || f > math.MaxUint16 {
  1659  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1660  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uint16 type for column %d", field.Val, colIdx)
  1661  				}
  1662  				if err := vector.SetFixedAt(vec, rowIdx, types.Enum(f)); err != nil {
  1663  					return err
  1664  				}
  1665  			}
  1666  		case types.T_decimal64:
  1667  			d, err := types.ParseDecimal64(field.Val, vec.GetType().Width, vec.GetType().Scale)
  1668  			if err != nil {
  1669  				// we tolerate loss of digits.
  1670  				if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) {
  1671  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1672  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is invalid Decimal64 type for column %d", field.Val, colIdx)
  1673  				}
  1674  			}
  1675  			if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1676  				return err
  1677  			}
  1678  		case types.T_decimal128:
  1679  			d, err := types.ParseDecimal128(field.Val, vec.GetType().Width, vec.GetType().Scale)
  1680  			if err != nil {
  1681  				// we tolerate loss of digits.
  1682  				if !moerr.IsMoErrCode(err, moerr.ErrDataTruncated) {
  1683  					logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1684  					return moerr.NewInternalError(param.Ctx, "the input value '%v' is invalid Decimal128 type for column %d", field.Val, colIdx)
  1685  				}
  1686  			}
  1687  			if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1688  				return err
  1689  			}
  1690  		case types.T_timestamp:
  1691  			t := time.Local
  1692  			d, err := types.ParseTimestamp(t, field.Val, vec.GetType().Scale)
  1693  			if err != nil {
  1694  				logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1695  				return moerr.NewInternalError(param.Ctx, "the input value '%v' is not Timestamp type for column %d", field.Val, colIdx)
  1696  			}
  1697  			if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1698  				return err
  1699  			}
  1700  		case types.T_uuid:
  1701  			d, err := types.ParseUuid(field.Val)
  1702  			if err != nil {
  1703  				logutil.Errorf("parse field[%v] err:%v", field.Val, err)
  1704  				return moerr.NewInternalError(param.Ctx, "the input value '%v' is not uuid type for column %d", field.Val, colIdx)
  1705  			}
  1706  			if err := vector.SetFixedAt(vec, rowIdx, d); err != nil {
  1707  				return err
  1708  			}
  1709  		default:
  1710  			return moerr.NewInternalError(param.Ctx, "the value type %d is not support now", param.Cols[rowIdx].Typ.Id)
  1711  		}
  1712  	}
  1713  	return nil
  1714  }
  1715  
  1716  // Read reads len count records from r.
  1717  // Each record is a slice of fields.
  1718  // A successful call returns err == nil, not err == io.EOF. Because ReadAll is
  1719  // defined to read until EOF, it does not treat an end of file as an error to be
  1720  // reported.
  1721  func readCountStringLimitSize(r *csvparser.CSVParser, ctx context.Context, size uint64, records [][]csvparser.Field) (int, bool, error) {
  1722  	var curBatchSize uint64 = 0
  1723  	for i := 0; i < OneBatchMaxRow; i++ {
  1724  		select {
  1725  		case <-ctx.Done():
  1726  			return i, true, nil
  1727  		default:
  1728  		}
  1729  		record, err := r.Read()
  1730  		if err != nil {
  1731  			if err == io.EOF {
  1732  				return i, true, nil
  1733  			}
  1734  			return i, true, err
  1735  		}
  1736  		records[i] = record
  1737  		for j := 0; j < len(record); j++ {
  1738  			curBatchSize += uint64(len(record[j].Val))
  1739  		}
  1740  		if curBatchSize >= size {
  1741  			return i + 1, false, nil
  1742  		}
  1743  	}
  1744  	return OneBatchMaxRow, false, nil
  1745  }
  1746  
  1747  func loadFormatIsValid(param *tree.ExternParam) bool {
  1748  	switch param.Format {
  1749  	case tree.JSONLINE, tree.CSV, tree.PARQUET:
  1750  		return true
  1751  	}
  1752  	return false
  1753  }