github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/external/external_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package external
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"os"
    23  	"path/filepath"
    24  	"testing"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/catalog"
    27  	"github.com/matrixorigin/matrixone/pkg/container/types"
    28  	"github.com/matrixorigin/matrixone/pkg/pb/pipeline"
    29  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    30  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    31  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    32  	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
    33  	"github.com/matrixorigin/matrixone/pkg/sql/util/csvparser"
    34  	"github.com/matrixorigin/matrixone/pkg/testutil"
    35  	"github.com/matrixorigin/matrixone/pkg/vm"
    36  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    37  	"github.com/smartystreets/goconvey/convey"
    38  	"github.com/stretchr/testify/assert"
    39  	"github.com/stretchr/testify/require"
    40  )
    41  
    42  const (
    43  	Rows = 10 // default rows
    44  )
    45  
    46  // add unit tests for cases
    47  type externalTestCase struct {
    48  	arg      *Argument
    49  	types    []types.Type
    50  	proc     *process.Process
    51  	cancel   context.CancelFunc
    52  	format   string
    53  	jsondata string
    54  }
    55  
    56  var (
    57  	cases         []externalTestCase
    58  	defaultOption = []string{"filepath", "abc", "format", "jsonline", "jsondata", "array"}
    59  )
    60  
    61  func newTestCase(format, jsondata string) externalTestCase {
    62  	proc := testutil.NewProcess()
    63  	proc.FileService = testutil.NewFS()
    64  	ctx, cancel := context.WithCancel(context.Background())
    65  	return externalTestCase{
    66  		proc:  proc,
    67  		types: []types.Type{types.T_int8.ToType()},
    68  		arg: &Argument{
    69  			Es: &ExternalParam{
    70  				ExParamConst: ExParamConst{
    71  					Ctx: ctx,
    72  				},
    73  				ExParam: ExParam{
    74  					Fileparam: &ExFileparam{},
    75  					Filter:    &FilterParam{},
    76  				},
    77  			},
    78  			OperatorBase: vm.OperatorBase{
    79  				OperatorInfo: vm.OperatorInfo{
    80  					Idx:     1,
    81  					IsFirst: false,
    82  					IsLast:  false,
    83  				},
    84  			},
    85  		},
    86  		cancel:   cancel,
    87  		format:   format,
    88  		jsondata: jsondata,
    89  	}
    90  }
    91  
    92  func init() {
    93  	cases = []externalTestCase{
    94  		newTestCase(tree.CSV, ""),
    95  		newTestCase(tree.JSONLINE, tree.OBJECT),
    96  		newTestCase(tree.JSONLINE, tree.ARRAY),
    97  	}
    98  }
    99  
   100  func Test_String(t *testing.T) {
   101  	buf := new(bytes.Buffer)
   102  	cases[0].arg.String(buf)
   103  }
   104  
   105  func Test_Prepare(t *testing.T) {
   106  	convey.Convey("external Prepare", t, func() {
   107  		for _, tcs := range cases {
   108  			param := tcs.arg.Es
   109  			extern := &tree.ExternParam{
   110  				ExParamConst: tree.ExParamConst{
   111  					Filepath: "",
   112  					Tail: &tree.TailParameter{
   113  						IgnoredLines: 0,
   114  					},
   115  					Format: tcs.format,
   116  					Option: defaultOption,
   117  				},
   118  				ExParam: tree.ExParam{
   119  					FileService: tcs.proc.FileService,
   120  					JsonData:    tcs.jsondata,
   121  					Ctx:         context.Background(),
   122  				},
   123  			}
   124  			json_byte, err := json.Marshal(extern)
   125  			if err != nil {
   126  				panic(err)
   127  			}
   128  			param.CreateSql = string(json_byte)
   129  			tcs.arg.Es.Extern = extern
   130  			err = tcs.arg.Prepare(tcs.proc)
   131  			convey.So(err, convey.ShouldBeNil)
   132  			convey.So(param.FileList, convey.ShouldBeNil)
   133  			convey.So(param.Fileparam.FileCnt, convey.ShouldEqual, 0)
   134  
   135  			extern.Format = tcs.format
   136  			json_byte, err = json.Marshal(extern)
   137  			convey.So(err, convey.ShouldBeNil)
   138  			param.CreateSql = string(json_byte)
   139  			err = tcs.arg.Prepare(tcs.proc)
   140  			convey.So(err, convey.ShouldBeNil)
   141  
   142  			if tcs.format == tree.JSONLINE {
   143  				extern = &tree.ExternParam{
   144  					ExParamConst: tree.ExParamConst{
   145  						Filepath: "",
   146  						Tail: &tree.TailParameter{
   147  							IgnoredLines: 0,
   148  						},
   149  						Format: tcs.format,
   150  						Option: defaultOption,
   151  					},
   152  				}
   153  				extern.JsonData = tcs.jsondata
   154  				json_byte, err = json.Marshal(extern)
   155  				convey.So(err, convey.ShouldBeNil)
   156  				param.CreateSql = string(json_byte)
   157  				err = tcs.arg.Prepare(tcs.proc)
   158  				convey.So(err, convey.ShouldBeNil)
   159  				convey.So(param.FileList, convey.ShouldResemble, []string(nil))
   160  				convey.So(param.Fileparam.FileCnt, convey.ShouldEqual, 0)
   161  
   162  				extern.Option = []string{"filepath", "abc", "format", "jsonline", "jsondata", "array"}
   163  				json_byte, err = json.Marshal(extern)
   164  				convey.So(err, convey.ShouldBeNil)
   165  				param.CreateSql = string(json_byte)
   166  
   167  				err = tcs.arg.Prepare(tcs.proc)
   168  				convey.So(err, convey.ShouldBeNil)
   169  			}
   170  		}
   171  	})
   172  }
   173  
   174  func Test_Call(t *testing.T) {
   175  	convey.Convey("external Call", t, func() {
   176  		for _, tcs := range cases {
   177  			param := tcs.arg.Es
   178  			extern := &tree.ExternParam{
   179  				ExParamConst: tree.ExParamConst{
   180  					Filepath: "",
   181  					Tail: &tree.TailParameter{
   182  						IgnoredLines: 0,
   183  					},
   184  					Format: tcs.format,
   185  				},
   186  				ExParam: tree.ExParam{
   187  					FileService: tcs.proc.FileService,
   188  					JsonData:    tcs.jsondata,
   189  					Ctx:         context.Background(),
   190  				},
   191  			}
   192  			param.Extern = extern
   193  			param.Fileparam.End = false
   194  			param.FileList = []string{"abc.txt"}
   195  			param.FileOffsetTotal = []*pipeline.FileOffset{
   196  				{
   197  					Offset: []int64{0, -1},
   198  				},
   199  			}
   200  			param.FileSize = []int64{1}
   201  			end, err := tcs.arg.Call(tcs.proc)
   202  			convey.So(err, convey.ShouldNotBeNil)
   203  			convey.So(end.Status == vm.ExecStop, convey.ShouldBeFalse)
   204  
   205  			param.Fileparam.End = false
   206  			end, err = tcs.arg.Call(tcs.proc)
   207  			convey.So(err, convey.ShouldBeNil)
   208  			convey.So(end.Status == vm.ExecStop, convey.ShouldBeTrue)
   209  
   210  			param.Fileparam.End = true
   211  			end, err = tcs.arg.Call(tcs.proc)
   212  			convey.So(err, convey.ShouldBeNil)
   213  			convey.So(end.Status == vm.ExecStop, convey.ShouldBeTrue)
   214  		}
   215  	})
   216  }
   217  
   218  func Test_getCompressType(t *testing.T) {
   219  	convey.Convey("getCompressType succ", t, func() {
   220  		param := &tree.ExternParam{
   221  			ExParamConst: tree.ExParamConst{
   222  				CompressType: tree.GZIP,
   223  			},
   224  			ExParam: tree.ExParam{
   225  				Ctx: context.Background(),
   226  			},
   227  		}
   228  		compress := GetCompressType(param, param.Filepath)
   229  		convey.So(compress, convey.ShouldEqual, param.CompressType)
   230  
   231  		param.CompressType = tree.AUTO
   232  		param.Filepath = "a.gz"
   233  		compress = GetCompressType(param, param.Filepath)
   234  		convey.So(compress, convey.ShouldEqual, tree.GZIP)
   235  
   236  		param.Filepath = "a.bz2"
   237  		compress = GetCompressType(param, param.Filepath)
   238  		convey.So(compress, convey.ShouldEqual, tree.BZIP2)
   239  
   240  		param.Filepath = "a.lz4"
   241  		compress = GetCompressType(param, param.Filepath)
   242  		convey.So(compress, convey.ShouldEqual, tree.LZ4)
   243  
   244  		param.Filepath = "a.csv"
   245  		compress = GetCompressType(param, param.Filepath)
   246  		convey.So(compress, convey.ShouldEqual, tree.NOCOMPRESS)
   247  
   248  		param.Filepath = "a"
   249  		compress = GetCompressType(param, param.Filepath)
   250  		convey.So(compress, convey.ShouldEqual, tree.NOCOMPRESS)
   251  	})
   252  }
   253  
   254  func Test_getUnCompressReader(t *testing.T) {
   255  	convey.Convey("getUnCompressReader succ", t, func() {
   256  		param := &tree.ExternParam{
   257  			ExParamConst: tree.ExParamConst{
   258  				CompressType: tree.NOCOMPRESS,
   259  			},
   260  			ExParam: tree.ExParam{
   261  				Ctx: context.Background(),
   262  			},
   263  		}
   264  		read, err := getUnCompressReader(param, param.Filepath, nil)
   265  		convey.So(read, convey.ShouldBeNil)
   266  		convey.So(err, convey.ShouldBeNil)
   267  
   268  		param.CompressType = tree.BZIP2
   269  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   270  		convey.So(read, convey.ShouldNotBeNil)
   271  		convey.So(err, convey.ShouldBeNil)
   272  
   273  		param.CompressType = tree.FLATE
   274  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   275  		convey.So(read, convey.ShouldNotBeNil)
   276  		convey.So(err, convey.ShouldBeNil)
   277  
   278  		param.CompressType = tree.LZ4
   279  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   280  		convey.So(read, convey.ShouldNotBeNil)
   281  		convey.So(err, convey.ShouldBeNil)
   282  
   283  		param.CompressType = tree.LZW
   284  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   285  		convey.So(read, convey.ShouldBeNil)
   286  		convey.So(err, convey.ShouldNotBeNil)
   287  
   288  		param.CompressType = "abc"
   289  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   290  		convey.So(read, convey.ShouldBeNil)
   291  		convey.So(err, convey.ShouldNotBeNil)
   292  	})
   293  }
   294  
   295  func Test_makeBatch(t *testing.T) {
   296  	convey.Convey("makeBatch succ", t, func() {
   297  		col := &plan.ColDef{
   298  			Typ: plan.Type{
   299  				Id: int32(types.T_bool),
   300  			},
   301  		}
   302  		param := &ExternalParam{
   303  			ExParamConst: ExParamConst{
   304  				Cols:  []*plan.ColDef{col},
   305  				Attrs: []string{"a"},
   306  			},
   307  		}
   308  		plh := &ParseLineHandler{
   309  			batchSize: 1,
   310  		}
   311  		_, err := makeBatch(param, plh.batchSize, testutil.NewProc())
   312  		convey.So(err, convey.ShouldBeNil)
   313  	})
   314  }
   315  
   316  func Test_getBatchData(t *testing.T) {
   317  	convey.Convey("getBatchData succ", t, func() {
   318  		line := []string{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "2020-09-07",
   319  			"2020-09-07 00:00:00", "16", "17", "2020-09-07 00:00:00"}
   320  		attrs := []string{"col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10",
   321  			"col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18"}
   322  
   323  		buf := bytes.Buffer{}
   324  		buf.WriteString("{")
   325  		for idx, attr := range attrs {
   326  			buf.WriteString(fmt.Sprintf(`"%s":"%s"`, attr, line[idx]))
   327  			if idx != len(attrs)-1 {
   328  				buf.WriteString(",")
   329  			}
   330  		}
   331  		buf.WriteString("}")
   332  		jsonline_object := []string{buf.String()}
   333  		buf.Reset()
   334  
   335  		for idx, attr := range attrs {
   336  			if idx == 0 {
   337  				buf.WriteString(fmt.Sprintf(`"%s":"%s"`, line[idx], line[idx]))
   338  			} else {
   339  				buf.WriteString(fmt.Sprintf(`"%s":"%s"`, attr, line[idx]))
   340  			}
   341  			if idx != len(attrs)-1 {
   342  				buf.WriteString(",")
   343  			}
   344  		}
   345  		jsonline_object_key_not_match := []string{"{" + buf.String() + "}"}
   346  		buf.Reset()
   347  
   348  		buf.WriteString("[")
   349  		for idx := range line {
   350  			buf.WriteString(fmt.Sprintf(`"%s"`, line[idx]))
   351  			if idx != len(attrs)-1 {
   352  				buf.WriteString(",")
   353  			}
   354  		}
   355  		buf.WriteString("]")
   356  		jsonline_array := []string{buf.String()}
   357  		buf.Reset()
   358  
   359  		buf.WriteString("{")
   360  		for i := 0; i < len(line)-1; i++ {
   361  			buf.WriteString(fmt.Sprintf(`"%s":"%s",`, attrs[i], line[i]))
   362  			if i != len(line)-2 {
   363  				buf.WriteString(",")
   364  			}
   365  		}
   366  		buf.WriteString("}")
   367  		jsonline_object_less := []string{buf.String()}
   368  		buf.Reset()
   369  
   370  		buf.WriteString("[")
   371  		for i := 0; i < len(line)-1; i++ {
   372  			buf.WriteString(fmt.Sprintf(`"%s"`, line[i]))
   373  			if i != len(line)-2 {
   374  				buf.WriteString(",")
   375  			}
   376  		}
   377  		buf.WriteString("]")
   378  		jsonline_array_less := []string{buf.String()}
   379  
   380  		buildFields := func(ls []string) []csvparser.Field {
   381  			ret := make([]csvparser.Field, 0, len(ls))
   382  			for _, s := range ls {
   383  				ret = append(ret, csvparser.Field{Val: s})
   384  			}
   385  			return ret
   386  		}
   387  
   388  		cols := []*plan.ColDef{
   389  			{
   390  				Typ: plan.Type{
   391  					Id: int32(types.T_bool),
   392  				},
   393  			},
   394  			{
   395  				Typ: plan.Type{
   396  					Id: int32(types.T_int8),
   397  				},
   398  			},
   399  			{
   400  				Typ: plan.Type{
   401  					Id: int32(types.T_int16),
   402  				},
   403  			},
   404  			{
   405  				Typ: plan.Type{
   406  					Id: int32(types.T_int32),
   407  				},
   408  			},
   409  			{
   410  				Typ: plan.Type{
   411  					Id: int32(types.T_int64),
   412  				},
   413  			},
   414  			{
   415  				Typ: plan.Type{
   416  					Id: int32(types.T_uint8),
   417  				},
   418  			},
   419  			{
   420  				Typ: plan.Type{
   421  					Id: int32(types.T_uint16),
   422  				},
   423  			},
   424  			{
   425  				Typ: plan.Type{
   426  					Id: int32(types.T_uint32),
   427  				},
   428  			},
   429  			{
   430  				Typ: plan.Type{
   431  					Id: int32(types.T_uint64),
   432  				},
   433  			},
   434  			{
   435  				Typ: plan.Type{
   436  					Id:    int32(types.T_float32),
   437  					Scale: -1,
   438  				},
   439  			},
   440  			{
   441  				Typ: plan.Type{
   442  					Id:    int32(types.T_float64),
   443  					Scale: -1,
   444  				},
   445  			},
   446  			{
   447  				Typ: plan.Type{
   448  					Id: int32(types.T_varchar),
   449  				},
   450  			},
   451  			{
   452  				Typ: plan.Type{
   453  					Id: int32(types.T_json),
   454  				},
   455  			},
   456  			{
   457  				Typ: plan.Type{
   458  					Id: int32(types.T_date),
   459  				},
   460  			},
   461  			{
   462  				Typ: plan.Type{
   463  					Id: int32(types.T_datetime),
   464  				},
   465  			},
   466  			{
   467  				Typ: plan.Type{
   468  					Id:    int32(types.T_decimal64),
   469  					Width: 15,
   470  					Scale: 0,
   471  				},
   472  			},
   473  			{
   474  				Typ: plan.Type{
   475  					Id:    int32(types.T_decimal128),
   476  					Width: 17,
   477  					Scale: 0,
   478  				},
   479  			},
   480  			{
   481  				Typ: plan.Type{
   482  					Id: int32(types.T_timestamp),
   483  				},
   484  			},
   485  		}
   486  		param := &ExternalParam{
   487  			ExParamConst: ExParamConst{
   488  				Attrs: attrs,
   489  				Cols:  cols,
   490  				Extern: &tree.ExternParam{
   491  					ExParamConst: tree.ExParamConst{
   492  						Tail: &tree.TailParameter{
   493  							Fields: &tree.Fields{},
   494  						},
   495  						Format: tree.CSV,
   496  					},
   497  					ExParam: tree.ExParam{
   498  						Ctx: context.Background(),
   499  					},
   500  				},
   501  			},
   502  		}
   503  		param.Name2ColIndex = make(map[string]int32)
   504  		for i := 0; i < len(attrs); i++ {
   505  			param.Name2ColIndex[attrs[i]] = int32(i)
   506  		}
   507  		plh := &ParseLineHandler{
   508  			batchSize:      1,
   509  			moCsvLineArray: [][]csvparser.Field{buildFields(line)},
   510  		}
   511  
   512  		proc := testutil.NewProc()
   513  		_, err := getBatchData(param, plh, proc)
   514  		convey.So(err, convey.ShouldBeNil)
   515  
   516  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(line[:1])}
   517  		_, err = getBatchData(param, plh, proc)
   518  		convey.So(err, convey.ShouldNotBeNil)
   519  
   520  		fields := make([]csvparser.Field, len(attrs))
   521  		for i := range attrs {
   522  			fields[i].IsNull = true
   523  		}
   524  		plh.moCsvLineArray = [][]csvparser.Field{fields}
   525  		_, err = getBatchData(param, plh, proc)
   526  		convey.So(err, convey.ShouldBeNil)
   527  
   528  		line = []string{"0", "1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0", "9.0", "10.0", "11.0", "13", "2020-09-07",
   529  			"2020-09-07 00:00:00", "16", "17", "2020-09-07 00:00:00"}
   530  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(line)}
   531  		_, err = getBatchData(param, plh, proc)
   532  		convey.So(err, convey.ShouldBeNil)
   533  
   534  		line = []string{"truefalse", "128", "32768", "2147483648", "9223372036854775808", "256", "65536", "4294967296", "18446744073709551616",
   535  			"float32", "float64", "", "13", "date", "datetime", "decimal64", "decimal128", "timestamp"}
   536  		for i := 0; i < len(attrs); i++ {
   537  			tmp := attrs[i:]
   538  			param.Attrs = tmp
   539  			param.Cols = cols[i:]
   540  			plh.moCsvLineArray = [][]csvparser.Field{buildFields(line)}
   541  			_, err = getBatchData(param, plh, proc)
   542  			convey.So(err, convey.ShouldNotBeNil)
   543  		}
   544  
   545  		param.Extern.Tail.Fields.EnclosedBy = &tree.EnclosedBy{Value: 't'}
   546  		_, err = getBatchData(param, plh, proc)
   547  		convey.So(err, convey.ShouldNotBeNil)
   548  
   549  		line[1] = "128.9"
   550  		line[2] = "32768.9"
   551  		line[3] = "2147483648.9"
   552  		line[4] = "a.9"
   553  		line[5] = "256.9"
   554  		line[6] = "65536.9"
   555  		line[7] = "4294967296.9"
   556  		line[8] = "a.9"
   557  		for i := 1; i <= 8; i++ {
   558  			tmp := attrs[i:]
   559  			param.Attrs = tmp
   560  			param.Cols = cols[i:]
   561  			plh.moCsvLineArray = [][]csvparser.Field{buildFields(line)}
   562  			_, err = getBatchData(param, plh, proc)
   563  			convey.So(err, convey.ShouldNotBeNil)
   564  		}
   565  
   566  		//test jsonline
   567  		param.Extern.Format = tree.JSONLINE
   568  		param.Extern.JsonData = tree.OBJECT
   569  		param.Attrs = attrs
   570  		param.Cols = cols
   571  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(jsonline_object)}
   572  		_, err = getBatchData(param, plh, proc)
   573  		convey.So(err, convey.ShouldBeNil)
   574  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(jsonline_object_less)}
   575  		_, err = getBatchData(param, plh, proc)
   576  		convey.So(err, convey.ShouldNotBeNil)
   577  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(jsonline_object_key_not_match)}
   578  		_, err = getBatchData(param, plh, proc)
   579  		convey.So(err, convey.ShouldNotBeNil)
   580  
   581  		param.Extern.Format = tree.CSV
   582  		_, err = getBatchData(param, plh, proc)
   583  		convey.So(err, convey.ShouldNotBeNil)
   584  
   585  		param.Extern.Format = tree.JSONLINE
   586  		param.Extern.JsonData = tree.ARRAY
   587  		param.prevStr = ""
   588  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(jsonline_array)}
   589  		_, err = getBatchData(param, plh, proc)
   590  		convey.So(err, convey.ShouldBeNil)
   591  		prevStr, str := jsonline_array[0][:len(jsonline_array[0])-2], jsonline_array[0][len(jsonline_array[0])-2:]
   592  		plh.moCsvLineArray = [][]csvparser.Field{{{Val: prevStr}}}
   593  		_, err = getBatchData(param, plh, proc)
   594  		convey.So(err, convey.ShouldBeNil)
   595  		convey.So(param.prevStr, convey.ShouldEqual, prevStr)
   596  
   597  		plh.moCsvLineArray = [][]csvparser.Field{{{Val: str}}}
   598  		_, err = getBatchData(param, plh, proc)
   599  		convey.So(err, convey.ShouldBeNil)
   600  
   601  		param.Extern.JsonData = "test"
   602  		_, err = getBatchData(param, plh, proc)
   603  		convey.So(err, convey.ShouldNotBeNil)
   604  
   605  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(jsonline_array_less)}
   606  		_, err = getBatchData(param, plh, proc)
   607  		convey.So(err, convey.ShouldNotBeNil)
   608  
   609  		jsonline_array_less[0] = jsonline_object_less[0][1:]
   610  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(jsonline_array_less)}
   611  		_, err = getBatchData(param, plh, proc)
   612  		convey.So(err, convey.ShouldNotBeNil)
   613  		jsonline_array = append(jsonline_array, jsonline_array_less...)
   614  		plh.moCsvLineArray = [][]csvparser.Field{buildFields(jsonline_array)}
   615  		_, err = getBatchData(param, plh, proc)
   616  		convey.So(err, convey.ShouldNotBeNil)
   617  	})
   618  }
   619  
   620  func TestReadDirSymlink(t *testing.T) {
   621  	root := t.TempDir()
   622  	ctx := context.Background()
   623  
   624  	// create a/b/c
   625  	err := os.MkdirAll(filepath.Join(root, "a", "b", "c"), 0755)
   626  	assert.Nil(t, err)
   627  
   628  	// write a/b/c/foo
   629  	err = os.WriteFile(filepath.Join(root, "a", "b", "c", "foo"), []byte("abc"), 0644)
   630  	assert.Nil(t, err)
   631  
   632  	// symlink a/b/d to a/b/c
   633  	err = os.Symlink(
   634  		filepath.Join(root, "a", "b", "c"),
   635  		filepath.Join(root, "a", "b", "d"),
   636  	)
   637  	assert.Nil(t, err)
   638  
   639  	// read a/b/d/foo
   640  	fooPathInB := filepath.Join(root, "a", "b", "d", "foo")
   641  	files, _, err := plan2.ReadDir(&tree.ExternParam{
   642  		ExParamConst: tree.ExParamConst{
   643  			Filepath: fooPathInB,
   644  		},
   645  		ExParam: tree.ExParam{
   646  			Ctx: ctx,
   647  		},
   648  	})
   649  	assert.Nil(t, err)
   650  	assert.Equal(t, 1, len(files))
   651  	assert.Equal(t, fooPathInB, files[0])
   652  
   653  	path1 := root + "/a//b/./../b/c/foo"
   654  	files1, _, err := plan2.ReadDir(&tree.ExternParam{
   655  		ExParamConst: tree.ExParamConst{
   656  			Filepath: path1,
   657  		},
   658  		ExParam: tree.ExParam{
   659  			Ctx: ctx,
   660  		},
   661  	})
   662  	assert.Nil(t, err)
   663  	pathWant1 := root + "/a/b/c/foo"
   664  	assert.Equal(t, 1, len(files1))
   665  	assert.Equal(t, pathWant1, files1[0])
   666  }
   667  
   668  func Test_fliterByAccountAndFilename(t *testing.T) {
   669  	type args struct {
   670  		node     *plan.Node
   671  		proc     *process.Process
   672  		fileList []string
   673  		fileSize []int64
   674  	}
   675  
   676  	files := []struct {
   677  		date types.Date
   678  		path string
   679  		size int64
   680  	}{
   681  		{738551, "etl:/sys/logs/2023/02/01/filepath", 1},
   682  		{738552, "etl:/sys/logs/2023/02/02/filepath", 2},
   683  		{738553, "etl:/sys/logs/2023/02/03/filepath", 3},
   684  		{738554, "etl:/sys/logs/2023/02/04/filepath", 4},
   685  		{738555, "etl:/sys/logs/2023/02/05/filepath", 5},
   686  		{738556, "etl:/sys/logs/2023/02/06/filepath", 6},
   687  	}
   688  
   689  	toPathArr := func(files []struct {
   690  		date types.Date
   691  		path string
   692  		size int64
   693  	}) []string {
   694  		fileList := make([]string, len(files))
   695  		for idx, f := range files {
   696  			fileList[idx] = f.path
   697  		}
   698  		return fileList
   699  	}
   700  	toSizeArr := func(files []struct {
   701  		date types.Date
   702  		path string
   703  		size int64
   704  	}) []int64 {
   705  		fileSize := make([]int64, len(files))
   706  		for idx, f := range files {
   707  			fileSize[idx] = f.size
   708  		}
   709  		return fileSize
   710  	}
   711  
   712  	fileList := toPathArr(files)
   713  	fileSize := toSizeArr(files)
   714  
   715  	e, err := function.GetFunctionByName(context.Background(), "=", []types.Type{types.T_date.ToType(), types.T_date.ToType()})
   716  	if err != nil {
   717  		panic(err)
   718  	}
   719  	equalDate2DateFid := e.GetEncodedOverloadID()
   720  
   721  	e, err = function.GetFunctionByName(context.Background(), "<", []types.Type{types.T_date.ToType(), types.T_date.ToType()})
   722  	if err != nil {
   723  		panic(err)
   724  	}
   725  	lessDate2DateFid := e.GetEncodedOverloadID()
   726  
   727  	e, err = function.GetFunctionByName(context.Background(), "mo_log_date", []types.Type{types.T_varchar.ToType()})
   728  	if err != nil {
   729  		panic(err)
   730  	}
   731  	mologdateFid := e.GetEncodedOverloadID()
   732  	tableName := "dummy_table"
   733  
   734  	mologdateConst := func(idx int) *plan.Expr {
   735  		return &plan.Expr{
   736  			Typ: plan.Type{
   737  				Id: int32(types.T_date),
   738  			},
   739  			Expr: &plan.Expr_Lit{
   740  				Lit: &plan.Literal{
   741  					Isnull: false,
   742  					Value: &plan.Literal_Dateval{
   743  						Dateval: int32(files[idx].date),
   744  					},
   745  				},
   746  			},
   747  		}
   748  	}
   749  	mologdateFunc := func() *plan.Expr {
   750  		return &plan.Expr{
   751  			Typ: plan.Type{
   752  				Id: int32(types.T_date),
   753  			},
   754  			Expr: &plan.Expr_F{
   755  				F: &plan.Function{
   756  					Func: &plan.ObjectRef{Obj: mologdateFid, ObjName: "mo_log_date"},
   757  					Args: []*plan.Expr{
   758  						{
   759  							Typ: plan.Type{
   760  								Id: int32(types.T_varchar),
   761  							},
   762  							Expr: &plan.Expr_Col{
   763  								Col: &plan.ColRef{
   764  									RelPos: 0,
   765  									ColPos: 0,
   766  									Name:   tableName + "." + catalog.ExternalFilePath,
   767  								},
   768  							},
   769  						},
   770  					},
   771  				},
   772  			},
   773  		}
   774  	}
   775  
   776  	nodeWithFunction := func(expr *plan.Expr_F) *plan.Node {
   777  		return &plan.Node{
   778  			NodeType: plan.Node_EXTERNAL_SCAN,
   779  			Stats:    &plan.Stats{},
   780  			TableDef: &plan.TableDef{
   781  				TableType: "func_table",
   782  				TblFunc: &plan.TableFunction{
   783  					Name: tableName,
   784  				},
   785  				Cols: []*plan.ColDef{
   786  					{
   787  						Name: catalog.ExternalFilePath,
   788  						Typ: plan.Type{
   789  							Id:    int32(types.T_varchar),
   790  							Width: types.MaxVarcharLen,
   791  							Table: tableName,
   792  						},
   793  					},
   794  				},
   795  			},
   796  			FilterList: []*plan.Expr{
   797  				{
   798  					Typ: plan.Type{
   799  						Id: int32(types.T_bool),
   800  					},
   801  					Expr: expr,
   802  				},
   803  			},
   804  		}
   805  	}
   806  
   807  	tests := []struct {
   808  		name  string
   809  		args  args
   810  		want  []string
   811  		want1 []int64
   812  	}{
   813  		{
   814  			name: "mo_log_date_20230205",
   815  			args: args{
   816  				node: nodeWithFunction(&plan.Expr_F{
   817  					F: &plan.Function{
   818  						Func: &plan.ObjectRef{Obj: equalDate2DateFid, ObjName: "="},
   819  						Args: []*plan.Expr{
   820  							mologdateConst(5),
   821  							mologdateFunc(),
   822  						},
   823  					},
   824  				}),
   825  				proc:     testutil.NewProc(),
   826  				fileList: fileList,
   827  				fileSize: fileSize,
   828  			},
   829  			want:  []string{files[5].path},
   830  			want1: []int64{files[5].size},
   831  		},
   832  		{
   833  			name: "mo_log_date_gt_20230202",
   834  			args: args{
   835  				node: nodeWithFunction(&plan.Expr_F{
   836  					F: &plan.Function{
   837  						Func: &plan.ObjectRef{Obj: lessDate2DateFid, ObjName: "<"},
   838  						Args: []*plan.Expr{
   839  							mologdateConst(2),
   840  							mologdateFunc(),
   841  						},
   842  					},
   843  				}),
   844  				proc:     testutil.NewProc(),
   845  				fileList: fileList,
   846  				fileSize: fileSize,
   847  			},
   848  			want:  toPathArr(files[3:]),
   849  			want1: toSizeArr(files[3:]),
   850  		},
   851  		{
   852  			name: "mo_log_date_lt_20230202",
   853  			args: args{
   854  				node: nodeWithFunction(&plan.Expr_F{
   855  					F: &plan.Function{
   856  						Func: &plan.ObjectRef{Obj: lessDate2DateFid, ObjName: "<"},
   857  						Args: []*plan.Expr{
   858  							mologdateFunc(),
   859  							mologdateConst(2),
   860  						},
   861  					},
   862  				}),
   863  				proc:     testutil.NewProc(),
   864  				fileList: fileList,
   865  				fileSize: fileSize,
   866  			},
   867  			want:  toPathArr(files[:2]),
   868  			want1: toSizeArr(files[:2]),
   869  		},
   870  	}
   871  	for _, tt := range tests {
   872  		t.Run(tt.name, func(t *testing.T) {
   873  			got, got1, err := filterByAccountAndFilename(context.TODO(), tt.args.node, tt.args.proc, tt.args.fileList, tt.args.fileSize)
   874  			require.Nil(t, err)
   875  			require.Equal(t, tt.want, got)
   876  			require.Equal(t, tt.want1, got1)
   877  		})
   878  	}
   879  }