github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/external/external_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package external
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"os"
    23  	"path/filepath"
    24  	"testing"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/catalog"
    27  	"github.com/matrixorigin/matrixone/pkg/container/types"
    28  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    29  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    30  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    31  	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
    32  	"github.com/matrixorigin/matrixone/pkg/testutil"
    33  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    34  	"github.com/smartystreets/goconvey/convey"
    35  	"github.com/stretchr/testify/assert"
    36  	"github.com/stretchr/testify/require"
    37  )
    38  
    39  const (
    40  	Rows = 10 // default rows
    41  )
    42  
    43  // add unit tests for cases
    44  type externalTestCase struct {
    45  	arg      *Argument
    46  	types    []types.Type
    47  	proc     *process.Process
    48  	cancel   context.CancelFunc
    49  	format   string
    50  	jsondata string
    51  }
    52  
    53  var (
    54  	cases         []externalTestCase
    55  	defaultOption = []string{"filepath", "abc", "format", "jsonline", "jsondata", "array"}
    56  )
    57  
    58  func newTestCase(all bool, format, jsondata string) externalTestCase {
    59  	proc := testutil.NewProcess()
    60  	proc.FileService = testutil.NewFS()
    61  	ctx, cancel := context.WithCancel(context.Background())
    62  	return externalTestCase{
    63  		proc: proc,
    64  		types: []types.Type{
    65  			{Oid: types.T_int8},
    66  		},
    67  		arg: &Argument{
    68  			Es: &ExternalParam{
    69  				ExParamConst: ExParamConst{
    70  					Ctx: ctx,
    71  				},
    72  				ExParam: ExParam{
    73  					Fileparam: &ExFileparam{},
    74  					Filter:    &FilterParam{},
    75  				},
    76  			},
    77  		},
    78  		cancel:   cancel,
    79  		format:   format,
    80  		jsondata: jsondata,
    81  	}
    82  }
    83  
    84  func init() {
    85  	cases = []externalTestCase{
    86  		newTestCase(true, tree.CSV, ""),
    87  		newTestCase(true, tree.JSONLINE, tree.OBJECT),
    88  		newTestCase(true, tree.JSONLINE, tree.ARRAY),
    89  	}
    90  }
    91  
    92  func Test_String(t *testing.T) {
    93  	buf := new(bytes.Buffer)
    94  	String(cases[0].arg, buf)
    95  }
    96  
    97  func Test_Prepare(t *testing.T) {
    98  	convey.Convey("external Prepare", t, func() {
    99  		for _, tcs := range cases {
   100  			param := tcs.arg.Es
   101  			extern := &tree.ExternParam{
   102  				ExParamConst: tree.ExParamConst{
   103  					Filepath: "",
   104  					Tail: &tree.TailParameter{
   105  						IgnoredLines: 0,
   106  					},
   107  					Format: tcs.format,
   108  					Option: defaultOption,
   109  				},
   110  				ExParam: tree.ExParam{
   111  					FileService: tcs.proc.FileService,
   112  					JsonData:    tcs.jsondata,
   113  					Ctx:         context.Background(),
   114  				},
   115  			}
   116  			json_byte, err := json.Marshal(extern)
   117  			if err != nil {
   118  				panic(err)
   119  			}
   120  			param.CreateSql = string(json_byte)
   121  			tcs.arg.Es.Extern = extern
   122  			err = Prepare(tcs.proc, tcs.arg)
   123  			convey.So(err, convey.ShouldBeNil)
   124  			convey.So(param.FileList, convey.ShouldBeNil)
   125  			convey.So(param.Fileparam.FileCnt, convey.ShouldEqual, 0)
   126  
   127  			extern.Format = "test"
   128  			json_byte, err = json.Marshal(extern)
   129  			convey.So(err, convey.ShouldBeNil)
   130  			param.CreateSql = string(json_byte)
   131  			err = Prepare(tcs.proc, tcs.arg)
   132  			convey.So(err, convey.ShouldBeNil)
   133  
   134  			if tcs.format == tree.JSONLINE {
   135  				extern = &tree.ExternParam{
   136  					ExParamConst: tree.ExParamConst{
   137  						Filepath: "",
   138  						Tail: &tree.TailParameter{
   139  							IgnoredLines: 0,
   140  						},
   141  						Format: tcs.format,
   142  						Option: defaultOption,
   143  					},
   144  				}
   145  				extern.JsonData = tcs.jsondata
   146  				json_byte, err = json.Marshal(extern)
   147  				convey.So(err, convey.ShouldBeNil)
   148  				param.CreateSql = string(json_byte)
   149  				err = Prepare(tcs.proc, tcs.arg)
   150  				convey.So(err, convey.ShouldBeNil)
   151  				convey.So(param.FileList, convey.ShouldResemble, []string(nil))
   152  				convey.So(param.Fileparam.FileCnt, convey.ShouldEqual, 0)
   153  
   154  				extern.Option = []string{"filepath", "abc", "format", "jsonline", "jsondata", "test"}
   155  				json_byte, err = json.Marshal(extern)
   156  				convey.So(err, convey.ShouldBeNil)
   157  				param.CreateSql = string(json_byte)
   158  
   159  				err = Prepare(tcs.proc, tcs.arg)
   160  				convey.So(err, convey.ShouldBeNil)
   161  			}
   162  		}
   163  	})
   164  }
   165  
   166  func Test_Call(t *testing.T) {
   167  	convey.Convey("external Call", t, func() {
   168  		for _, tcs := range cases {
   169  			param := tcs.arg.Es
   170  			extern := &tree.ExternParam{
   171  				ExParamConst: tree.ExParamConst{
   172  					Filepath: "",
   173  					Tail: &tree.TailParameter{
   174  						IgnoredLines: 0,
   175  					},
   176  					Format: tcs.format,
   177  				},
   178  				ExParam: tree.ExParam{
   179  					FileService: tcs.proc.FileService,
   180  					JsonData:    tcs.jsondata,
   181  					Ctx:         context.Background(),
   182  				},
   183  			}
   184  			param.Extern = extern
   185  			param.Fileparam.End = false
   186  			param.FileList = []string{"abc.txt"}
   187  			param.FileOffset = [][2]int{{0, -1}}
   188  			param.FileSize = []int64{1}
   189  			end, err := Call(1, tcs.proc, tcs.arg, false, false)
   190  			convey.So(err, convey.ShouldNotBeNil)
   191  			convey.So(end, convey.ShouldBeFalse)
   192  
   193  			param.Fileparam.End = false
   194  			end, err = Call(1, tcs.proc, tcs.arg, false, false)
   195  			convey.So(err, convey.ShouldBeNil)
   196  			convey.So(end, convey.ShouldBeTrue)
   197  
   198  			param.Fileparam.End = true
   199  			end, err = Call(1, tcs.proc, tcs.arg, false, false)
   200  			convey.So(err, convey.ShouldBeNil)
   201  			convey.So(end, convey.ShouldBeTrue)
   202  		}
   203  	})
   204  }
   205  
   206  func Test_getCompressType(t *testing.T) {
   207  	convey.Convey("getCompressType succ", t, func() {
   208  		param := &tree.ExternParam{
   209  			ExParamConst: tree.ExParamConst{
   210  				CompressType: tree.GZIP,
   211  			},
   212  			ExParam: tree.ExParam{
   213  				Ctx: context.Background(),
   214  			},
   215  		}
   216  		compress := getCompressType(param, param.Filepath)
   217  		convey.So(compress, convey.ShouldEqual, param.CompressType)
   218  
   219  		param.CompressType = tree.AUTO
   220  		param.Filepath = "a.gz"
   221  		compress = getCompressType(param, param.Filepath)
   222  		convey.So(compress, convey.ShouldEqual, tree.GZIP)
   223  
   224  		param.Filepath = "a.bz2"
   225  		compress = getCompressType(param, param.Filepath)
   226  		convey.So(compress, convey.ShouldEqual, tree.BZIP2)
   227  
   228  		param.Filepath = "a.lz4"
   229  		compress = getCompressType(param, param.Filepath)
   230  		convey.So(compress, convey.ShouldEqual, tree.LZ4)
   231  
   232  		param.Filepath = "a.csv"
   233  		compress = getCompressType(param, param.Filepath)
   234  		convey.So(compress, convey.ShouldEqual, tree.NOCOMPRESS)
   235  
   236  		param.Filepath = "a"
   237  		compress = getCompressType(param, param.Filepath)
   238  		convey.So(compress, convey.ShouldEqual, tree.NOCOMPRESS)
   239  	})
   240  }
   241  
   242  func Test_getUnCompressReader(t *testing.T) {
   243  	convey.Convey("getUnCompressReader succ", t, func() {
   244  		param := &tree.ExternParam{
   245  			ExParamConst: tree.ExParamConst{
   246  				CompressType: tree.NOCOMPRESS,
   247  			},
   248  			ExParam: tree.ExParam{
   249  				Ctx: context.Background(),
   250  			},
   251  		}
   252  		read, err := getUnCompressReader(param, param.Filepath, nil)
   253  		convey.So(read, convey.ShouldBeNil)
   254  		convey.So(err, convey.ShouldBeNil)
   255  
   256  		param.CompressType = tree.BZIP2
   257  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   258  		convey.So(read, convey.ShouldNotBeNil)
   259  		convey.So(err, convey.ShouldBeNil)
   260  
   261  		param.CompressType = tree.FLATE
   262  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   263  		convey.So(read, convey.ShouldNotBeNil)
   264  		convey.So(err, convey.ShouldBeNil)
   265  
   266  		param.CompressType = tree.LZ4
   267  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   268  		convey.So(read, convey.ShouldNotBeNil)
   269  		convey.So(err, convey.ShouldBeNil)
   270  
   271  		param.CompressType = tree.LZW
   272  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   273  		convey.So(read, convey.ShouldBeNil)
   274  		convey.So(err, convey.ShouldNotBeNil)
   275  
   276  		param.CompressType = "abc"
   277  		read, err = getUnCompressReader(param, param.Filepath, &os.File{})
   278  		convey.So(read, convey.ShouldBeNil)
   279  		convey.So(err, convey.ShouldNotBeNil)
   280  	})
   281  }
   282  
   283  func Test_makeBatch(t *testing.T) {
   284  	convey.Convey("makeBatch succ", t, func() {
   285  		col := &plan.ColDef{
   286  			Typ: &plan.Type{
   287  				Id: int32(types.T_bool),
   288  			},
   289  		}
   290  		param := &ExternalParam{
   291  			ExParamConst: ExParamConst{
   292  				Cols:  []*plan.ColDef{col},
   293  				Attrs: []string{"a"},
   294  			},
   295  		}
   296  		plh := &ParseLineHandler{
   297  			batchSize: 1,
   298  		}
   299  		_ = makeBatch(param, plh.batchSize, testutil.TestUtilMp)
   300  	})
   301  }
   302  
   303  func Test_GetBatchData(t *testing.T) {
   304  	convey.Convey("GetBatchData succ", t, func() {
   305  		line := []string{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "2020-09-07",
   306  			"2020-09-07 00:00:00", "16", "17", "2020-09-07 00:00:00"}
   307  		atrrs := []string{"col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10",
   308  			"col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18"}
   309  		buf := bytes.NewBuffer(nil)
   310  		buf.WriteString("{")
   311  		for idx, attr := range atrrs {
   312  			buf.WriteString(fmt.Sprintf("\"%s\":\"%s\"", attr, line[idx]))
   313  			if idx != len(atrrs)-1 {
   314  				buf.WriteString(",")
   315  			}
   316  		}
   317  		buf.WriteString("}")
   318  		jsonline_object := []string{buf.String()}
   319  		buf.Reset()
   320  		for idx, attr := range atrrs {
   321  			if idx == 0 {
   322  				buf.WriteString(fmt.Sprintf("\"%s\":\"%s\"", line[idx], line[idx]))
   323  			} else {
   324  				buf.WriteString(fmt.Sprintf("\"%s\":\"%s\"", attr, line[idx]))
   325  			}
   326  			if idx != len(atrrs)-1 {
   327  				buf.WriteString(",")
   328  			}
   329  		}
   330  		jsonline_object_key_not_match := []string{"{" + buf.String() + "}"}
   331  		buf.Reset()
   332  		buf.WriteString("[")
   333  		for idx := range line {
   334  			buf.WriteString(fmt.Sprintf("\"%s\"", line[idx]))
   335  			if idx != len(atrrs)-1 {
   336  				buf.WriteString(",")
   337  			}
   338  		}
   339  		buf.WriteString("]")
   340  		jsonline_array := []string{buf.String()}
   341  		buf.Reset()
   342  		buf.WriteString("{")
   343  		for i := 0; i < len(line)-1; i++ {
   344  			buf.WriteString(fmt.Sprintf("\"%s\":\"%s\",", atrrs[i], line[i]))
   345  			if i != len(line)-2 {
   346  				buf.WriteString(",")
   347  			}
   348  		}
   349  		buf.WriteString("}")
   350  		jsonline_object_less := []string{buf.String()}
   351  		buf.Reset()
   352  		buf.WriteString("[")
   353  		for i := 0; i < len(line)-1; i++ {
   354  			buf.WriteString(fmt.Sprintf("\"%s\"", line[i]))
   355  			if i != len(line)-2 {
   356  				buf.WriteString(",")
   357  			}
   358  		}
   359  		buf.WriteString("]")
   360  		jsonline_array_less := []string{buf.String()}
   361  
   362  		cols := []*plan.ColDef{
   363  			{
   364  				Typ: &plan.Type{
   365  					Id: int32(types.T_bool),
   366  				},
   367  			},
   368  			{
   369  				Typ: &plan.Type{
   370  					Id: int32(types.T_int8),
   371  				},
   372  			},
   373  			{
   374  				Typ: &plan.Type{
   375  					Id: int32(types.T_int16),
   376  				},
   377  			},
   378  			{
   379  				Typ: &plan.Type{
   380  					Id: int32(types.T_int32),
   381  				},
   382  			},
   383  			{
   384  				Typ: &plan.Type{
   385  					Id: int32(types.T_int64),
   386  				},
   387  			},
   388  			{
   389  				Typ: &plan.Type{
   390  					Id: int32(types.T_uint8),
   391  				},
   392  			},
   393  			{
   394  				Typ: &plan.Type{
   395  					Id: int32(types.T_uint16),
   396  				},
   397  			},
   398  			{
   399  				Typ: &plan.Type{
   400  					Id: int32(types.T_uint32),
   401  				},
   402  			},
   403  			{
   404  				Typ: &plan.Type{
   405  					Id: int32(types.T_uint64),
   406  				},
   407  			},
   408  			{
   409  				Typ: &plan.Type{
   410  					Id:        int32(types.T_float32),
   411  					Precision: -1,
   412  				},
   413  			},
   414  			{
   415  				Typ: &plan.Type{
   416  					Id:        int32(types.T_float64),
   417  					Precision: -1,
   418  				},
   419  			},
   420  			{
   421  				Typ: &plan.Type{
   422  					Id: int32(types.T_varchar),
   423  				},
   424  			},
   425  			{
   426  				Typ: &plan.Type{
   427  					Id: int32(types.T_json),
   428  				},
   429  			},
   430  			{
   431  				Typ: &plan.Type{
   432  					Id: int32(types.T_date),
   433  				},
   434  			},
   435  			{
   436  				Typ: &plan.Type{
   437  					Id: int32(types.T_datetime),
   438  				},
   439  			},
   440  			{
   441  				Typ: &plan.Type{
   442  					Id:    int32(types.T_decimal64),
   443  					Width: 15,
   444  					Scale: 0,
   445  				},
   446  			},
   447  			{
   448  				Typ: &plan.Type{
   449  					Id:    int32(types.T_decimal128),
   450  					Width: 17,
   451  					Scale: 0,
   452  				},
   453  			},
   454  			{
   455  				Typ: &plan.Type{
   456  					Id: int32(types.T_timestamp),
   457  				},
   458  			},
   459  		}
   460  		param := &ExternalParam{
   461  			ExParamConst: ExParamConst{
   462  				Attrs: atrrs,
   463  				Cols:  cols,
   464  				Extern: &tree.ExternParam{
   465  					ExParamConst: tree.ExParamConst{
   466  						Tail: &tree.TailParameter{
   467  							Fields: &tree.Fields{},
   468  						},
   469  						Format: tree.CSV,
   470  					},
   471  					ExParam: tree.ExParam{
   472  						Ctx: context.Background(),
   473  					},
   474  				},
   475  			},
   476  		}
   477  		param.Name2ColIndex = make(map[string]int32)
   478  		for i := 0; i < len(atrrs); i++ {
   479  			param.Name2ColIndex[atrrs[i]] = int32(i)
   480  		}
   481  		plh := &ParseLineHandler{
   482  			batchSize:        1,
   483  			simdCsvLineArray: [][]string{line},
   484  		}
   485  
   486  		proc := testutil.NewProc()
   487  		_, err := GetBatchData(param, plh, proc)
   488  		convey.So(err, convey.ShouldBeNil)
   489  
   490  		plh.simdCsvLineArray = [][]string{line[:1]}
   491  		_, err = GetBatchData(param, plh, proc)
   492  		convey.So(err, convey.ShouldNotBeNil)
   493  
   494  		for i := 0; i < len(atrrs); i++ {
   495  			line[i] = "\\N"
   496  		}
   497  		plh.simdCsvLineArray = [][]string{line}
   498  		_, err = GetBatchData(param, plh, proc)
   499  		convey.So(err, convey.ShouldBeNil)
   500  
   501  		line = []string{"0", "1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0", "9.0", "10.0", "11.0", "13", "2020-09-07",
   502  			"2020-09-07 00:00:00", "16", "17", "2020-09-07 00:00:00"}
   503  		plh.simdCsvLineArray = [][]string{line}
   504  		_, err = GetBatchData(param, plh, proc)
   505  		convey.So(err, convey.ShouldBeNil)
   506  
   507  		line = []string{"truefalse", "128", "32768", "2147483648", "9223372036854775808", "256", "65536", "4294967296", "18446744073709551616",
   508  			"float32", "float64", "", "13", "date", "datetime", "decimal64", "decimal128", "timestamp"}
   509  		for i := 0; i < len(atrrs); i++ {
   510  			tmp := atrrs[i:]
   511  			param.Attrs = tmp
   512  			param.Cols = cols[i:]
   513  			plh.simdCsvLineArray = [][]string{line}
   514  			_, err = GetBatchData(param, plh, proc)
   515  			convey.So(err, convey.ShouldNotBeNil)
   516  		}
   517  
   518  		param.Extern.Tail.Fields.EnclosedBy = 't'
   519  		_, err = GetBatchData(param, plh, proc)
   520  		convey.So(err, convey.ShouldNotBeNil)
   521  
   522  		line[1] = "128.9"
   523  		line[2] = "32768.9"
   524  		line[3] = "2147483648.9"
   525  		line[4] = "a.9"
   526  		line[5] = "256.9"
   527  		line[6] = "65536.9"
   528  		line[7] = "4294967296.9"
   529  		line[8] = "a.9"
   530  		for i := 1; i <= 8; i++ {
   531  			tmp := atrrs[i:]
   532  			param.Attrs = tmp
   533  			param.Cols = cols[i:]
   534  			plh.simdCsvLineArray = [][]string{line}
   535  			_, err = GetBatchData(param, plh, proc)
   536  			convey.So(err, convey.ShouldNotBeNil)
   537  		}
   538  
   539  		//test jsonline
   540  		param.Extern.Format = tree.JSONLINE
   541  		param.Extern.JsonData = tree.OBJECT
   542  		param.Attrs = atrrs
   543  		param.Cols = cols
   544  		plh.simdCsvLineArray = [][]string{jsonline_object}
   545  		_, err = GetBatchData(param, plh, proc)
   546  		convey.So(err, convey.ShouldBeNil)
   547  		plh.simdCsvLineArray = [][]string{jsonline_object_less}
   548  		_, err = GetBatchData(param, plh, proc)
   549  		convey.So(err, convey.ShouldNotBeNil)
   550  		plh.simdCsvLineArray = [][]string{jsonline_object_key_not_match}
   551  		_, err = GetBatchData(param, plh, proc)
   552  		convey.So(err, convey.ShouldNotBeNil)
   553  
   554  		param.Extern.Format = tree.CSV
   555  		_, err = GetBatchData(param, plh, proc)
   556  		convey.So(err, convey.ShouldNotBeNil)
   557  
   558  		param.Extern.Format = tree.JSONLINE
   559  		param.Extern.JsonData = tree.ARRAY
   560  		param.prevStr = ""
   561  		plh.simdCsvLineArray = [][]string{jsonline_array}
   562  		_, err = GetBatchData(param, plh, proc)
   563  		convey.So(err, convey.ShouldBeNil)
   564  		prevStr, str := jsonline_array[0][:len(jsonline_array[0])-2], jsonline_array[0][len(jsonline_array[0])-2:]
   565  		plh.simdCsvLineArray = [][]string{{prevStr}}
   566  		_, err = GetBatchData(param, plh, proc)
   567  		convey.So(err, convey.ShouldBeNil)
   568  		convey.So(param.prevStr, convey.ShouldEqual, prevStr)
   569  
   570  		plh.simdCsvLineArray = [][]string{{str}}
   571  		_, err = GetBatchData(param, plh, proc)
   572  		convey.So(err, convey.ShouldBeNil)
   573  
   574  		param.Extern.JsonData = "test"
   575  		_, err = GetBatchData(param, plh, proc)
   576  		convey.So(err, convey.ShouldNotBeNil)
   577  
   578  		plh.simdCsvLineArray = [][]string{jsonline_array_less}
   579  		_, err = GetBatchData(param, plh, proc)
   580  		convey.So(err, convey.ShouldNotBeNil)
   581  
   582  		jsonline_array_less[0] = jsonline_object_less[0][1:]
   583  		plh.simdCsvLineArray = [][]string{jsonline_array_less}
   584  		_, err = GetBatchData(param, plh, proc)
   585  		convey.So(err, convey.ShouldNotBeNil)
   586  		jsonline_array = append(jsonline_array, jsonline_array_less...)
   587  		plh.simdCsvLineArray = [][]string{jsonline_array}
   588  		_, err = GetBatchData(param, plh, proc)
   589  		convey.So(err, convey.ShouldNotBeNil)
   590  	})
   591  }
   592  
   593  func TestReadDirSymlink(t *testing.T) {
   594  	root := t.TempDir()
   595  	ctx := context.Background()
   596  
   597  	// create a/b/c
   598  	err := os.MkdirAll(filepath.Join(root, "a", "b", "c"), 0755)
   599  	assert.Nil(t, err)
   600  
   601  	// write a/b/c/foo
   602  	err = os.WriteFile(filepath.Join(root, "a", "b", "c", "foo"), []byte("abc"), 0644)
   603  	assert.Nil(t, err)
   604  
   605  	// symlink a/b/d to a/b/c
   606  	err = os.Symlink(
   607  		filepath.Join(root, "a", "b", "c"),
   608  		filepath.Join(root, "a", "b", "d"),
   609  	)
   610  	assert.Nil(t, err)
   611  
   612  	// read a/b/d/foo
   613  	fooPathInB := filepath.Join(root, "a", "b", "d", "foo")
   614  	files, _, err := plan2.ReadDir(&tree.ExternParam{
   615  		ExParamConst: tree.ExParamConst{
   616  			Filepath: fooPathInB,
   617  		},
   618  		ExParam: tree.ExParam{
   619  			Ctx: ctx,
   620  		},
   621  	})
   622  	assert.Nil(t, err)
   623  	assert.Equal(t, 1, len(files))
   624  	assert.Equal(t, fooPathInB, files[0])
   625  
   626  	path1 := root + "/a//b/./../b/c/foo"
   627  	files1, _, err := plan2.ReadDir(&tree.ExternParam{
   628  		ExParamConst: tree.ExParamConst{
   629  			Filepath: path1,
   630  		},
   631  		ExParam: tree.ExParam{
   632  			Ctx: ctx,
   633  		},
   634  	})
   635  	assert.Nil(t, err)
   636  	pathWant1 := root + "/a/b/c/foo"
   637  	assert.Equal(t, 1, len(files1))
   638  	assert.Equal(t, pathWant1, files1[0])
   639  }
   640  
   641  func Test_fliterByAccountAndFilename(t *testing.T) {
   642  	type args struct {
   643  		node     *plan.Node
   644  		proc     *process.Process
   645  		fileList []string
   646  		fileSize []int64
   647  	}
   648  
   649  	files := []struct {
   650  		date types.Date
   651  		path string
   652  		size int64
   653  	}{
   654  		{738551, "etl:/sys/logs/2023/02/01/filepath", 1},
   655  		{738552, "etl:/sys/logs/2023/02/02/filepath", 2},
   656  		{738553, "etl:/sys/logs/2023/02/03/filepath", 3},
   657  		{738554, "etl:/sys/logs/2023/02/04/filepath", 4},
   658  		{738555, "etl:/sys/logs/2023/02/05/filepath", 5},
   659  		{738556, "etl:/sys/logs/2023/02/06/filepath", 6},
   660  	}
   661  
   662  	toPathArr := func(files []struct {
   663  		date types.Date
   664  		path string
   665  		size int64
   666  	}) []string {
   667  		fileList := make([]string, len(files))
   668  		for idx, f := range files {
   669  			fileList[idx] = f.path
   670  		}
   671  		return fileList
   672  	}
   673  	toSizeArr := func(files []struct {
   674  		date types.Date
   675  		path string
   676  		size int64
   677  	}) []int64 {
   678  		fileSize := make([]int64, len(files))
   679  		for idx, f := range files {
   680  			fileSize[idx] = f.size
   681  		}
   682  		return fileSize
   683  	}
   684  
   685  	fileList := toPathArr(files)
   686  	fileSize := toSizeArr(files)
   687  
   688  	equalDate2DateFid := function.EncodeOverloadID(function.EQUAL, 14)
   689  	lessDate2DateFid := function.EncodeOverloadID(function.LESS_THAN, 14)
   690  	mologdateFid := function.EncodeOverloadID(function.MO_LOG_DATE, 0)
   691  	tableName := "dummy_table"
   692  
   693  	mologdateConst := func(idx int) *plan.Expr {
   694  		return &plan.Expr{
   695  			Typ: &plan.Type{
   696  				Size: 4,
   697  				Id:   int32(types.T_date),
   698  			},
   699  			Expr: &plan.Expr_C{
   700  				C: &plan.Const{
   701  					Isnull: false,
   702  					Value: &plan.Const_Dateval{
   703  						Dateval: int32(files[idx].date),
   704  					},
   705  				},
   706  			},
   707  		}
   708  	}
   709  	mologdateFunc := func() *plan.Expr {
   710  		return &plan.Expr{
   711  			Typ: &plan.Type{
   712  				Size: 1,
   713  				Id:   int32(types.T_bool),
   714  			},
   715  			Expr: &plan.Expr_F{
   716  				F: &plan.Function{
   717  					Func: &plan.ObjectRef{Obj: mologdateFid, ObjName: "mo_log_date"},
   718  					Args: []*plan.Expr{
   719  						{
   720  							Typ: nil,
   721  							Expr: &plan.Expr_Col{
   722  								Col: &plan.ColRef{
   723  									RelPos: 0,
   724  									ColPos: 0,
   725  									Name:   tableName + "." + catalog.ExternalFilePath,
   726  								},
   727  							},
   728  						},
   729  					},
   730  				},
   731  			},
   732  		}
   733  	}
   734  
   735  	nodeWithFunction := func(expr *plan.Expr_F) *plan.Node {
   736  		return &plan.Node{
   737  			NodeType: plan.Node_EXTERNAL_SCAN,
   738  			Stats:    &plan.Stats{},
   739  			TableDef: &plan.TableDef{
   740  				TableType: "func_table",
   741  				TblFunc: &plan.TableFunction{
   742  					Name: tableName,
   743  				},
   744  				Cols: []*plan.ColDef{
   745  					{
   746  						Name: catalog.ExternalFilePath,
   747  						Typ: &plan.Type{
   748  							Id:    int32(types.T_varchar),
   749  							Width: types.MaxVarcharLen,
   750  							Table: tableName,
   751  						},
   752  					},
   753  				},
   754  			},
   755  			FilterList: []*plan.Expr{
   756  				{
   757  					Typ: &plan.Type{
   758  						Size: 1,
   759  						Id:   int32(types.T_bool),
   760  					},
   761  					Expr: expr,
   762  				},
   763  			},
   764  		}
   765  	}
   766  
   767  	tests := []struct {
   768  		name  string
   769  		args  args
   770  		want  []string
   771  		want1 []int64
   772  	}{
   773  		{
   774  			name: "mo_log_date_20230205",
   775  			args: args{
   776  				node: nodeWithFunction(&plan.Expr_F{
   777  					F: &plan.Function{
   778  						Func: &plan.ObjectRef{Obj: equalDate2DateFid, ObjName: "="},
   779  						Args: []*plan.Expr{
   780  							mologdateConst(5),
   781  							mologdateFunc(),
   782  						},
   783  					},
   784  				}),
   785  				proc:     testutil.NewProc(),
   786  				fileList: fileList,
   787  				fileSize: fileSize,
   788  			},
   789  			want:  []string{files[5].path},
   790  			want1: []int64{files[5].size},
   791  		},
   792  		{
   793  			name: "mo_log_date_gt_20230202",
   794  			args: args{
   795  				node: nodeWithFunction(&plan.Expr_F{
   796  					F: &plan.Function{
   797  						Func: &plan.ObjectRef{Obj: lessDate2DateFid, ObjName: "<"},
   798  						Args: []*plan.Expr{
   799  							mologdateConst(2),
   800  							mologdateFunc(),
   801  						},
   802  					},
   803  				}),
   804  				proc:     testutil.NewProc(),
   805  				fileList: fileList,
   806  				fileSize: fileSize,
   807  			},
   808  			want:  toPathArr(files[3:]),
   809  			want1: toSizeArr(files[3:]),
   810  		},
   811  		{
   812  			name: "mo_log_date_lt_20230202",
   813  			args: args{
   814  				node: nodeWithFunction(&plan.Expr_F{
   815  					F: &plan.Function{
   816  						Func: &plan.ObjectRef{Obj: lessDate2DateFid, ObjName: "<"},
   817  						Args: []*plan.Expr{
   818  							mologdateFunc(),
   819  							mologdateConst(2),
   820  						},
   821  					},
   822  				}),
   823  				proc:     testutil.NewProc(),
   824  				fileList: fileList,
   825  				fileSize: fileSize,
   826  			},
   827  			want:  toPathArr(files[:2]),
   828  			want1: toSizeArr(files[:2]),
   829  		},
   830  	}
   831  	for _, tt := range tests {
   832  		t.Run(tt.name, func(t *testing.T) {
   833  			got, got1, err := filterByAccountAndFilename(tt.args.node, tt.args.proc, tt.args.fileList, tt.args.fileSize)
   834  			require.Nil(t, err)
   835  			require.Equal(t, tt.want, got)
   836  			require.Equal(t, tt.want1, got1)
   837  		})
   838  	}
   839  }