github.com/matrixorigin/matrixone@v1.2.0/pkg/util/export/merge_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package export
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"path"
    22  	"reflect"
    23  	"strings"
    24  	"sync"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    29  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    30  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    31  	"github.com/matrixorigin/matrixone/pkg/config"
    32  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    33  	"github.com/matrixorigin/matrixone/pkg/logutil"
    34  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    35  	"github.com/matrixorigin/matrixone/pkg/pb/task"
    36  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    37  	"github.com/matrixorigin/matrixone/pkg/testutil"
    38  	"github.com/matrixorigin/matrixone/pkg/util/export/etl"
    39  	"github.com/matrixorigin/matrixone/pkg/util/export/table"
    40  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    41  
    42  	"github.com/lni/goutils/leaktest"
    43  	"github.com/matrixorigin/simdcsv"
    44  	"github.com/robfig/cron/v3"
    45  	"github.com/stretchr/testify/assert"
    46  	"github.com/stretchr/testify/require"
    47  )
    48  
    49  func init() {
    50  	time.Local = time.FixedZone("CST", 0) // set time-zone +0000
    51  	table.RegisterTableDefine(dummyTable)
    52  	runtime.SetupProcessLevelRuntime(runtime.NewRuntime(metadata.ServiceType_CN, "test", logutil.GetGlobalLogger()))
    53  }
    54  
    55  var mux sync.Mutex
    56  
    57  var dummyStrColumn = table.Column{Name: "str", ColType: table.TVarchar, Scale: 32, Default: "", Comment: "str column"}
    58  var dummyInt64Column = table.Column{Name: "int64", ColType: table.TInt64, Default: "0", Comment: "int64 column"}
    59  var dummyFloat64Column = table.Column{Name: "float64", ColType: table.TFloat64, Default: "0.0", Comment: "float64 column"}
    60  
    61  var dummyTable = &table.Table{
    62  	Account:          "test",
    63  	Database:         "db_dummy",
    64  	Table:            "tbl_dummy",
    65  	Columns:          []table.Column{dummyStrColumn, dummyInt64Column, dummyFloat64Column},
    66  	PrimaryKeyColumn: []table.Column{dummyStrColumn, dummyInt64Column},
    67  	Engine:           table.ExternalTableEngine,
    68  	Comment:          "dummy table",
    69  	PathBuilder:      table.NewAccountDatePathBuilder(),
    70  	TableOptions:     nil,
    71  }
    72  
    73  func dummyFillTable(str string, i int64, f float64) *table.Row {
    74  	row := dummyTable.GetRow(context.TODO())
    75  	row.SetColumnVal(dummyStrColumn, table.StringField(str))
    76  	row.SetColumnVal(dummyInt64Column, table.Int64Field(i))
    77  	row.SetColumnVal(dummyFloat64Column, table.Float64Field(f))
    78  	return row
    79  }
    80  
    81  func TestInitCronExpr(t *testing.T) {
    82  	type args struct {
    83  		duration time.Duration
    84  	}
    85  	tests := []struct {
    86  		name           string
    87  		args           args
    88  		wantErr        bool
    89  		wantExpr       string
    90  		expectDuration time.Duration
    91  	}{
    92  		{name: "1h", args: args{duration: 1 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery1Hour},
    93  		{name: "2h", args: args{duration: 2 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery2Hour},
    94  		{name: "4h", args: args{duration: 4 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery4Hour},
    95  		{name: "3h", args: args{duration: 3 * time.Hour}, wantErr: false, wantExpr: "0 0 3,6,9,12,15,18,21 * * *"},
    96  		{name: "5h", args: args{duration: 5 * time.Hour}, wantErr: false, wantExpr: "0 0 5,10,15,20 * * *"},
    97  		{name: "5min", args: args{duration: 5 * time.Minute}, wantErr: false, wantExpr: MergeTaskCronExprEvery05Min},
    98  		{name: "15min", args: args{duration: 15 * time.Minute}, wantErr: false, wantExpr: MergeTaskCronExprEvery15Min},
    99  		{name: "7min", args: args{duration: 7 * time.Minute}, wantErr: false, wantExpr: "@every 10m", expectDuration: 10 * time.Minute},
   100  		{name: "15s", args: args{duration: 15 * time.Second}, wantErr: false, wantExpr: "@every 15s", expectDuration: 15 * time.Second},
   101  		{name: "2min", args: args{duration: 2 * time.Minute}, wantErr: false, wantExpr: "@every 120s", expectDuration: 2 * time.Minute},
   102  		{name: "13h", args: args{duration: 13 * time.Hour}, wantErr: true, wantExpr: ""},
   103  	}
   104  
   105  	ctx := context.Background()
   106  	parser := cron.NewParser(
   107  		cron.Second |
   108  			cron.Minute |
   109  			cron.Hour |
   110  			cron.Dom |
   111  			cron.Month |
   112  			cron.Dow |
   113  			cron.Descriptor)
   114  	for _, tt := range tests {
   115  		t.Run(tt.name, func(t *testing.T) {
   116  			err := InitCronExpr(ctx, tt.args.duration)
   117  			if tt.wantErr {
   118  				var e *moerr.Error
   119  				require.True(t, errors.As(err, &e))
   120  				require.True(t, moerr.IsMoErrCode(e, moerr.ErrNotSupported))
   121  			} else {
   122  				require.Equal(t, tt.wantExpr, MergeTaskCronExpr)
   123  
   124  				sche, err := parser.Parse(MergeTaskCronExpr)
   125  				require.Nil(t, err)
   126  
   127  				now := time.Unix(60, 0)
   128  				next := sche.Next(time.UnixMilli(now.UnixMilli()))
   129  				t.Logf("duration: %v, expr: %s, next: %v", tt.args.duration, MergeTaskCronExpr, next)
   130  				if tt.expectDuration > 0 {
   131  					require.Equal(t, tt.expectDuration, next.Sub(now))
   132  				} else {
   133  					require.Equal(t, tt.args.duration-time.Minute, next.Sub(now))
   134  				}
   135  			}
   136  		})
   137  	}
   138  }
   139  
   140  var newFilePath = func(tbl *table.Table, ts time.Time) string {
   141  	filename := tbl.PathBuilder.NewLogFilename(tbl.GetName(), "uuid", "node", ts, table.CsvExtension)
   142  	p := tbl.PathBuilder.Build(tbl.Account, table.MergeLogTypeLogs, ts, tbl.Database, tbl.GetName())
   143  	filepath := path.Join(p, filename)
   144  	return filepath
   145  }
   146  
   147  //
   148  //func initLogsFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) error {
   149  //	mux.Lock()
   150  //	defer mux.Unlock()
   151  //
   152  //	buf := make([]byte, 0, 4096)
   153  //
   154  //	ts1 := ts
   155  //	writer, _ := newETLWriter(ctx, fs, newFilePath(tbl, ts1), buf, nil, nil)
   156  //	writer.WriteStrings(dummyFillTable("row1", 1, 1.0).ToStrings())
   157  //	writer.WriteStrings(dummyFillTable("row2", 2, 2.0).ToStrings())
   158  //	writer.FlushAndClose()
   159  //
   160  //	ts2 := ts.Add(time.Minute)
   161  //	writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts2), buf, nil, nil)
   162  //	writer.WriteStrings(dummyFillTable("row3", 1, 1.0).ToStrings())
   163  //	writer.WriteStrings(dummyFillTable("row4", 2, 2.0).ToStrings())
   164  //	writer.FlushAndClose()
   165  //
   166  //	ts3 := ts.Add(time.Hour)
   167  //	writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts3), buf, nil, nil)
   168  //	writer.WriteStrings(dummyFillTable("row5", 1, 1.0).ToStrings())
   169  //	writer.WriteStrings(dummyFillTable("row6", 2, 2.0).ToStrings())
   170  //	writer.FlushAndClose()
   171  //
   172  //	ts1New := ts.Add(time.Hour + time.Minute)
   173  //	writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts1New), buf, nil, nil)
   174  //	writer.WriteStrings(dummyFillTable("row1", 1, 11.0).ToStrings())
   175  //	writer.WriteStrings(dummyFillTable("row2", 2, 22.0).ToStrings())
   176  //	writer.FlushAndClose()
   177  //
   178  //	return nil
   179  //}
   180  
   181  func newETLWriter(ctx context.Context, fs fileservice.FileService, filePath string, buf []byte, tbl *table.Table, mp *mpool.MPool) (ETLWriter, error) {
   182  
   183  	if strings.LastIndex(filePath, table.TaeExtension) > 0 {
   184  		writer := etl.NewTAEWriter(ctx, tbl, mp, filePath, fs)
   185  		return writer, nil
   186  	} else {
   187  		// CSV
   188  		fsWriter := etl.NewFSWriter(ctx, fs, etl.WithFilePath(filePath))
   189  		return etl.NewCSVWriter(ctx, fsWriter), nil
   190  	}
   191  }
   192  
   193  func initEmptyLogFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) ([]string, error) {
   194  	mux.Lock()
   195  	defer mux.Unlock()
   196  
   197  	files := []string{}
   198  	buf := make([]byte, 0, 4096)
   199  
   200  	ts1 := ts
   201  	filePath := newFilePath(tbl, ts1)
   202  	files = append(files, filePath)
   203  	writer, err := newETLWriter(ctx, fs, filePath, buf, tbl, nil)
   204  	if err != nil {
   205  		return nil, err
   206  	}
   207  	_, err = writer.FlushAndClose()
   208  	if err != nil {
   209  		var e *moerr.Error
   210  		if !errors.As(err, &e) || e.ErrorCode() != moerr.ErrEmptyRange {
   211  			return nil, err
   212  		}
   213  	}
   214  
   215  	return files, nil
   216  }
   217  
   218  func getdummyMpool() *mpool.MPool {
   219  	mp, err := mpool.NewMPool("testETL", 0, mpool.NoFixed)
   220  	if err != nil {
   221  		panic(err)
   222  	}
   223  	return mp
   224  }
   225  
   226  func initSingleLogsFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time, ext string) (string, error) {
   227  	mux.Lock()
   228  	defer mux.Unlock()
   229  
   230  	var newFilePath = func(ts time.Time) string {
   231  		filename := tbl.PathBuilder.NewLogFilename(tbl.GetName(), "uuid", "node", ts, ext)
   232  		p := tbl.PathBuilder.Build(tbl.Account, table.MergeLogTypeLogs, ts, tbl.Database, tbl.GetName())
   233  		filepath := path.Join(p, filename)
   234  		return filepath
   235  	}
   236  
   237  	buf := make([]byte, 0, 4096)
   238  
   239  	ts1 := ts
   240  	path := newFilePath(ts1)
   241  	writer, _ := newETLWriter(ctx, fs, path, buf, tbl, getdummyMpool())
   242  	writer.WriteStrings(dummyFillTable("row1", 1, 1.0).ToStrings())
   243  	writer.WriteStrings(dummyFillTable("row2", 2, 2.0).ToStrings())
   244  	writer.FlushAndClose()
   245  
   246  	return path, nil
   247  }
   248  
   249  var mergeLock sync.Mutex
   250  
   251  func TestNewMergeNOFiles(t *testing.T) {
   252  	const newSqlWriteLogic = true
   253  	if simdcsv.SupportedCPU() || newSqlWriteLogic {
   254  		t.Skip()
   255  	}
   256  	mergeLock.Lock()
   257  	defer mergeLock.Unlock()
   258  	fs := testutil.NewFS()
   259  	ts, _ := time.Parse("2006-01-02 15:04:05", "2021-01-01 00:00:00")
   260  	dummyFilePath := newFilePath(dummyTable, ts)
   261  
   262  	ctx := trace.Generate(context.Background())
   263  	ctx, cancel := context.WithCancel(ctx)
   264  	defer cancel()
   265  
   266  	_, err := initEmptyLogFile(ctx, fs, dummyTable, ts)
   267  	require.Nil(t, err)
   268  
   269  	type args struct {
   270  		ctx  context.Context
   271  		opts []MergeOption
   272  		// files
   273  		files []*FileMeta
   274  	}
   275  	tests := []struct {
   276  		name string
   277  		args args
   278  		// wantMsg
   279  		wantMsg string
   280  	}{
   281  		{
   282  			name: "normal",
   283  			args: args{
   284  				ctx: ctx,
   285  				opts: []MergeOption{
   286  					WithFileService(fs),
   287  					WithTable(dummyTable),
   288  					WithMaxFileSize(1),
   289  					WithMaxFileSize(16 * mpool.MB),
   290  					WithMaxMergeJobs(16),
   291  				},
   292  				files: []*FileMeta{{dummyFilePath, 0}},
   293  			},
   294  			wantMsg: "is not found",
   295  		},
   296  	}
   297  	for _, tt := range tests {
   298  		t.Run(tt.name, func(t *testing.T) {
   299  
   300  			got, err := NewMerge(tt.args.ctx, tt.args.opts...)
   301  			require.Nil(t, err)
   302  			require.NotNil(t, got)
   303  
   304  			err = got.doMergeFiles(ctx, tt.args.files)
   305  			require.Equal(t, true, strings.Contains(err.Error(), tt.wantMsg))
   306  
   307  		})
   308  	}
   309  }
   310  
   311  func TestMergeTaskExecutorFactory(t *testing.T) {
   312  	defer leaktest.AfterTest(t)()
   313  	t.Logf("tmpDir: %s/%s", t.TempDir(), t.Name())
   314  	fs := testutil.NewSharedFS()
   315  	targetDate := "2021-01-01"
   316  	ts, err := time.Parse("2006-01-02 15:04:05", targetDate+" 00:00:00")
   317  	require.Nil(t, err)
   318  
   319  	ctx := trace.Generate(context.TODO())
   320  
   321  	type args struct {
   322  		ctx  context.Context
   323  		opts []MergeOption
   324  		task *task.AsyncTask
   325  	}
   326  	tests := []struct {
   327  		name string
   328  		args args
   329  		want func(ctx context.Context, task task.Task) error
   330  	}{
   331  		{
   332  			name: "normal",
   333  			args: args{
   334  				ctx:  ctx,
   335  				opts: []MergeOption{WithFileService(fs)},
   336  				task: &task.AsyncTask{
   337  					Metadata: task.TaskMetadata{
   338  						ID:                   "",
   339  						Executor:             0,
   340  						Context:              []byte(strings.Join([]string{dummyTable.GetIdentify(), targetDate}, ParamSeparator)),
   341  						Options:              task.TaskOptions{},
   342  						XXX_NoUnkeyedLiteral: struct{}{},
   343  						XXX_unrecognized:     nil,
   344  						XXX_sizecache:        0,
   345  					},
   346  				},
   347  			},
   348  			want: nil,
   349  		},
   350  	}
   351  	for _, tt := range tests {
   352  		t.Run(tt.name, func(t *testing.T) {
   353  
   354  			_, err := initSingleLogsFile(tt.args.ctx, fs, dummyTable, ts, table.CsvExtension)
   355  			require.Nil(t, err)
   356  
   357  			got := MergeTaskExecutorFactory(tt.args.opts...)
   358  			require.NotNil(t, got)
   359  
   360  			err = got(tt.args.ctx, tt.args.task)
   361  			require.Nilf(t, err, "err: %v", err)
   362  
   363  			files := make([]string, 0, 1)
   364  			dir := []string{"/"}
   365  			for len(dir) > 0 {
   366  				entrys, _ := fs.List(tt.args.ctx, dir[0])
   367  				for _, e := range entrys {
   368  					p := path.Join(dir[0], e.Name)
   369  					if e.IsDir {
   370  						dir = append(dir, p)
   371  					} else {
   372  						files = append(files, p)
   373  					}
   374  				}
   375  				dir = dir[1:]
   376  			}
   377  			require.Equal(t, 1, len(files))
   378  			t.Logf("%v", files)
   379  		})
   380  	}
   381  }
   382  
   383  func TestCreateCronTask(t *testing.T) {
   384  	store := taskservice.NewMemTaskStorage()
   385  	s := taskservice.NewTaskService(runtime.DefaultRuntime(), store)
   386  	defer func() {
   387  		assert.NoError(t, s.Close())
   388  	}()
   389  	ctx, cancel := context.WithTimeout(context.TODO(), time.Second*10)
   390  	defer cancel()
   391  
   392  	type args struct {
   393  		ctx         context.Context
   394  		executorID  task.TaskCode
   395  		taskService taskservice.TaskService
   396  	}
   397  	tests := []struct {
   398  		name    string
   399  		args    args
   400  		wantErr error
   401  	}{
   402  		{
   403  			name: "name",
   404  			args: args{
   405  				ctx:         ctx,
   406  				executorID:  1,
   407  				taskService: s,
   408  			},
   409  			wantErr: nil,
   410  		},
   411  	}
   412  	for _, tt := range tests {
   413  		t.Run(tt.name, func(t *testing.T) {
   414  			got := CreateCronTask(tt.args.ctx, tt.args.executorID, tt.args.taskService)
   415  			require.Nil(t, got)
   416  		})
   417  	}
   418  }
   419  
   420  func TestNewMergeService(t *testing.T) {
   421  	ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*5)
   422  	defer cancel()
   423  	fs := testutil.NewFS()
   424  
   425  	type args struct {
   426  		ctx  context.Context
   427  		opts []MergeOption
   428  	}
   429  	tests := []struct {
   430  		name  string
   431  		args  args
   432  		want  *Merge
   433  		want1 bool
   434  	}{
   435  		{
   436  			name: "normal",
   437  			args: args{
   438  				ctx:  ctx,
   439  				opts: []MergeOption{WithFileService(fs), WithTable(dummyTable)},
   440  			},
   441  			want:  nil,
   442  			want1: false,
   443  		},
   444  	}
   445  	for _, tt := range tests {
   446  		t.Run(tt.name, func(t *testing.T) {
   447  			got, got1, err := NewMergeService(tt.args.ctx, tt.args.opts...)
   448  			require.Nil(t, err)
   449  			require.NotNil(t, got)
   450  			require.Equal(t, tt.want1, got1)
   451  		})
   452  	}
   453  }
   454  
   455  func Test_newETLReader(t *testing.T) {
   456  	ctx := trace.Generate(context.TODO())
   457  	fs := testutil.NewETLFS()
   458  	mp := getdummyMpool()
   459  	require.NotNil(t, mp)
   460  
   461  	type args struct {
   462  		ctx  context.Context
   463  		tbl  *table.Table
   464  		fs   fileservice.FileService
   465  		ext  string
   466  		size int64
   467  		mp   *mpool.MPool
   468  	}
   469  	tests := []struct {
   470  		name string
   471  		args args
   472  		want ETLReader
   473  	}{
   474  		{
   475  			name: "csv",
   476  			args: args{
   477  				ctx:  ctx,
   478  				tbl:  dummyTable,
   479  				fs:   fs,
   480  				ext:  table.CsvExtension,
   481  				size: 0,
   482  				mp:   mp,
   483  			},
   484  			want: &ContentReader{},
   485  		},
   486  		{
   487  			name: "tae",
   488  			args: args{
   489  				ctx:  ctx,
   490  				tbl:  dummyTable,
   491  				fs:   fs,
   492  				ext:  table.TaeExtension,
   493  				size: 0,
   494  				mp:   mp,
   495  			},
   496  			want: &etl.TAEReader{},
   497  		},
   498  	}
   499  	for _, tt := range tests {
   500  		t.Run(tt.name, func(t *testing.T) {
   501  			path, err := initSingleLogsFile(tt.args.ctx, tt.args.fs, tt.args.tbl, time.Now(), tt.args.ext)
   502  			assert.Nil(t, err)
   503  			got, err := newETLReader(tt.args.ctx, tt.args.tbl, tt.args.fs, path, tt.args.size, tt.args.mp)
   504  			assert.Nil(t, err)
   505  			assert.Equal(t, reflect.TypeOf(tt.want), reflect.TypeOf(got))
   506  			defer got.Close()
   507  		})
   508  	}
   509  }
   510  
   511  func TestInitMerge(t *testing.T) {
   512  	type args struct {
   513  		ctx context.Context
   514  		SV  *config.ObservabilityParameters
   515  	}
   516  	tests := []struct {
   517  		name    string
   518  		args    args
   519  		wantErr assert.ErrorAssertionFunc
   520  	}{
   521  		{
   522  			name: "normal",
   523  			args: args{
   524  				ctx: context.TODO(),
   525  				SV:  config.NewObservabilityParameters(),
   526  			},
   527  			wantErr: func(t assert.TestingT, err error, i ...interface{}) bool {
   528  				if err != nil {
   529  					t.Errorf("%v", i)
   530  					return false
   531  				}
   532  				return true
   533  			},
   534  		},
   535  	}
   536  	for _, tt := range tests {
   537  		t.Run(tt.name, func(t *testing.T) {
   538  			tt.wantErr(t, InitMerge(tt.args.ctx, tt.args.SV), fmt.Sprintf("InitMerge(%v, %v)", tt.args.ctx, tt.args.SV))
   539  		})
   540  	}
   541  }