github.com/matrixorigin/matrixone@v0.7.0/pkg/util/export/merge_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package export
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"path"
    21  	"strings"
    22  	"sync"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    27  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    28  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    29  	"github.com/matrixorigin/matrixone/pkg/defines"
    30  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    31  	"github.com/matrixorigin/matrixone/pkg/logutil"
    32  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    33  	"github.com/matrixorigin/matrixone/pkg/pb/task"
    34  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    35  	"github.com/matrixorigin/matrixone/pkg/util/export/table"
    36  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    37  	"github.com/robfig/cron/v3"
    38  	"github.com/stretchr/testify/assert"
    39  	"github.com/stretchr/testify/require"
    40  
    41  	"github.com/lni/goutils/leaktest"
    42  	"github.com/matrixorigin/simdcsv"
    43  )
    44  
    45  func init() {
    46  	time.Local = time.FixedZone("CST", 0) // set time-zone +0000
    47  	table.RegisterTableDefine(dummyTable)
    48  	runtime.SetupProcessLevelRuntime(runtime.NewRuntime(metadata.ServiceType_CN, "test", logutil.GetGlobalLogger()))
    49  }
    50  
    51  var mux sync.Mutex
    52  
    53  var dummyStrColumn = table.Column{Name: "str", ColType: table.TVarchar, Precision: 32, Default: "", Comment: "str column"}
    54  var dummyInt64Column = table.Column{Name: "int64", ColType: table.TInt64, Default: "0", Comment: "int64 column"}
    55  var dummyFloat64Column = table.Column{Name: "float64", ColType: table.TFloat64, Default: "0.0", Comment: "float64 column"}
    56  
    57  var dummyTable = &table.Table{
    58  	Account:          "test",
    59  	Database:         "db_dummy",
    60  	Table:            "tbl_dummy",
    61  	Columns:          []table.Column{dummyStrColumn, dummyInt64Column, dummyFloat64Column},
    62  	PrimaryKeyColumn: []table.Column{dummyStrColumn, dummyInt64Column},
    63  	Engine:           table.ExternalTableEngine,
    64  	Comment:          "dummy table",
    65  	PathBuilder:      table.NewAccountDatePathBuilder(),
    66  	TableOptions:     nil,
    67  }
    68  
    69  func dummyFillTable(str string, i int64, f float64) *table.Row {
    70  	row := dummyTable.GetRow(context.TODO())
    71  	row.SetColumnVal(dummyStrColumn, str)
    72  	row.SetColumnVal(dummyInt64Column, i)
    73  	row.SetColumnVal(dummyFloat64Column, f)
    74  	return row
    75  }
    76  
    77  func TestInitCronExpr(t *testing.T) {
    78  	type args struct {
    79  		duration time.Duration
    80  	}
    81  	tests := []struct {
    82  		name           string
    83  		args           args
    84  		wantErr        bool
    85  		wantExpr       string
    86  		expectDuration time.Duration
    87  	}{
    88  		{name: "1h", args: args{duration: 1 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery1Hour},
    89  		{name: "2h", args: args{duration: 2 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery2Hour},
    90  		{name: "4h", args: args{duration: 4 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery4Hour},
    91  		{name: "3h", args: args{duration: 3 * time.Hour}, wantErr: false, wantExpr: "0 0 3,6,9,12,15,18,21 * * *"},
    92  		{name: "5h", args: args{duration: 5 * time.Hour}, wantErr: false, wantExpr: "0 0 5,10,15,20 * * *"},
    93  		{name: "5min", args: args{duration: 5 * time.Minute}, wantErr: false, wantExpr: MergeTaskCronExprEvery05Min},
    94  		{name: "15min", args: args{duration: 15 * time.Minute}, wantErr: false, wantExpr: MergeTaskCronExprEvery15Min},
    95  		{name: "7min", args: args{duration: 7 * time.Minute}, wantErr: false, wantExpr: "@every 10m", expectDuration: 10 * time.Minute},
    96  		{name: "15s", args: args{duration: 15 * time.Second}, wantErr: false, wantExpr: "@every 15s", expectDuration: 15 * time.Second},
    97  		{name: "2min", args: args{duration: 2 * time.Minute}, wantErr: false, wantExpr: "@every 120s", expectDuration: 2 * time.Minute},
    98  		{name: "13h", args: args{duration: 13 * time.Hour}, wantErr: true, wantExpr: ""},
    99  	}
   100  
   101  	ctx := context.Background()
   102  	parser := cron.NewParser(
   103  		cron.Second |
   104  			cron.Minute |
   105  			cron.Hour |
   106  			cron.Dom |
   107  			cron.Month |
   108  			cron.Dow |
   109  			cron.Descriptor)
   110  	for _, tt := range tests {
   111  		t.Run(tt.name, func(t *testing.T) {
   112  			err := InitCronExpr(ctx, tt.args.duration)
   113  			if tt.wantErr {
   114  				var e *moerr.Error
   115  				require.True(t, errors.As(err, &e))
   116  				require.True(t, moerr.IsMoErrCode(e, moerr.ErrNotSupported))
   117  			} else {
   118  				require.Equal(t, tt.wantExpr, MergeTaskCronExpr)
   119  
   120  				sche, err := parser.Parse(MergeTaskCronExpr)
   121  				require.Nil(t, err)
   122  
   123  				now := time.Unix(60, 0)
   124  				next := sche.Next(time.UnixMilli(now.UnixMilli()))
   125  				t.Logf("duration: %v, expr: %s, next: %v", tt.args.duration, MergeTaskCronExpr, next)
   126  				if tt.expectDuration > 0 {
   127  					require.Equal(t, tt.expectDuration, next.Sub(now))
   128  				} else {
   129  					require.Equal(t, tt.args.duration-time.Minute, next.Sub(now))
   130  				}
   131  			}
   132  		})
   133  	}
   134  }
   135  
   136  var newFilePath = func(tbl *table.Table, ts time.Time) string {
   137  	filename := tbl.PathBuilder.NewLogFilename(tbl.GetName(), "uuid", "node", ts, table.CsvExtension)
   138  	p := tbl.PathBuilder.Build(tbl.Account, table.MergeLogTypeLogs, ts, tbl.Database, tbl.GetName())
   139  	filepath := path.Join(p, filename)
   140  	return filepath
   141  }
   142  
   143  func initLogsFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) error {
   144  	mux.Lock()
   145  	defer mux.Unlock()
   146  
   147  	buf := make([]byte, 0, 4096)
   148  
   149  	ts1 := ts
   150  	writer, _ := newETLWriter(ctx, fs, newFilePath(tbl, ts1), buf, nil, nil)
   151  	writer.WriteStrings(dummyFillTable("row1", 1, 1.0).ToStrings())
   152  	writer.WriteStrings(dummyFillTable("row2", 2, 2.0).ToStrings())
   153  	writer.FlushAndClose()
   154  
   155  	ts2 := ts.Add(time.Minute)
   156  	writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts2), buf, nil, nil)
   157  	writer.WriteStrings(dummyFillTable("row3", 1, 1.0).ToStrings())
   158  	writer.WriteStrings(dummyFillTable("row4", 2, 2.0).ToStrings())
   159  	writer.FlushAndClose()
   160  
   161  	ts3 := ts.Add(time.Hour)
   162  	writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts3), buf, nil, nil)
   163  	writer.WriteStrings(dummyFillTable("row5", 1, 1.0).ToStrings())
   164  	writer.WriteStrings(dummyFillTable("row6", 2, 2.0).ToStrings())
   165  	writer.FlushAndClose()
   166  
   167  	ts1New := ts.Add(time.Hour + time.Minute)
   168  	writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts1New), buf, nil, nil)
   169  	writer.WriteStrings(dummyFillTable("row1", 1, 11.0).ToStrings())
   170  	writer.WriteStrings(dummyFillTable("row2", 2, 22.0).ToStrings())
   171  	writer.FlushAndClose()
   172  
   173  	return nil
   174  }
   175  
   176  func initEmptyLogFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) ([]string, error) {
   177  	mux.Lock()
   178  	defer mux.Unlock()
   179  
   180  	files := []string{}
   181  	buf := make([]byte, 0, 4096)
   182  
   183  	ts1 := ts
   184  	filePath := newFilePath(tbl, ts1)
   185  	files = append(files, filePath)
   186  	writer, err := newETLWriter(ctx, fs, filePath, buf, nil, nil)
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  	_, err = writer.FlushAndClose()
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  
   195  	return files, nil
   196  }
   197  
   198  func initSingleLogsFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) error {
   199  	mux.Lock()
   200  	defer mux.Unlock()
   201  
   202  	var newFilePath = func(ts time.Time) string {
   203  		filename := tbl.PathBuilder.NewLogFilename(tbl.GetName(), "uuid", "node", ts, table.CsvExtension)
   204  		p := tbl.PathBuilder.Build(tbl.Account, table.MergeLogTypeLogs, ts, tbl.Database, tbl.GetName())
   205  		filepath := path.Join(p, filename)
   206  		return filepath
   207  	}
   208  
   209  	buf := make([]byte, 0, 4096)
   210  
   211  	ts1 := ts
   212  	writer, _ := newETLWriter(ctx, fs, newFilePath(ts1), buf, nil, nil)
   213  	writer.WriteStrings(dummyFillTable("row1", 1, 1.0).ToStrings())
   214  	writer.WriteStrings(dummyFillTable("row2", 2, 2.0).ToStrings())
   215  	writer.FlushAndClose()
   216  
   217  	return nil
   218  }
   219  
   220  var mergeLock sync.Mutex
   221  
   222  func TestNewMerge(t *testing.T) {
   223  	mergeLock.Lock()
   224  	defer mergeLock.Unlock()
   225  	fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, t.TempDir())
   226  	require.Nil(t, err)
   227  	ts, _ := time.Parse("2006-01-02 15:04:05", "2021-01-01 00:00:00")
   228  
   229  	ctx := trace.Generate(context.Background())
   230  
   231  	defaultOpts := []MergeOption{WithFileServiceName(defines.ETLFileServiceName),
   232  		WithFileService(fs), WithTable(dummyTable),
   233  		WithMaxFileSize(1), WithMinFilesMerge(1), WithMaxFileSize(16 * mpool.MB), WithMaxMergeJobs(16)}
   234  
   235  	type args struct {
   236  		ctx  context.Context
   237  		opts []MergeOption
   238  		// extension
   239  		logsExt, mergedExt string
   240  	}
   241  	tests := []struct {
   242  		name string
   243  		args args
   244  		want *Merge
   245  	}{
   246  		{
   247  			name: "csv",
   248  			args: args{
   249  				ctx:       ctx,
   250  				opts:      defaultOpts,
   251  				logsExt:   table.CsvExtension,
   252  				mergedExt: table.CsvExtension,
   253  			},
   254  			want: nil,
   255  		},
   256  	}
   257  	for _, tt := range tests {
   258  		t.Run(tt.name, func(t *testing.T) {
   259  
   260  			err := initLogsFile(tt.args.ctx, fs, dummyTable, ts)
   261  			require.Nil(t, err)
   262  
   263  			got, err := NewMerge(tt.args.ctx, tt.args.opts...)
   264  			require.Nil(t, err)
   265  			require.NotNil(t, got)
   266  
   267  			err = got.Main(tt.args.ctx, ts)
   268  			require.Nilf(t, err, "err: %v", err)
   269  
   270  			files := make([]string, 0, 1)
   271  			dir := []string{"/"}
   272  			for len(dir) > 0 {
   273  				entrys, _ := fs.List(tt.args.ctx, dir[0])
   274  				for _, e := range entrys {
   275  					p := path.Join(dir[0], e.Name)
   276  					if e.IsDir {
   277  						dir = append(dir, p)
   278  					} else {
   279  						files = append(files, p)
   280  					}
   281  				}
   282  				dir = dir[1:]
   283  			}
   284  			require.Equal(t, 1, len(files))
   285  			t.Logf("%v", files)
   286  
   287  			//r, err = newETLReader(tt.args.ctx, m.Table, m.FS, path.FilePath, path.FileSize, m.mp)
   288  			r, err := NewCSVReader(tt.args.ctx, fs, files[0])
   289  			require.Nil(t, err)
   290  			lines := 0
   291  			for l, err := r.ReadLine(); l != nil && err == nil; l, err = r.ReadLine() {
   292  				lines++
   293  				t.Logf("line %d: %s", lines, l)
   294  			}
   295  			require.Nil(t, err)
   296  			require.Equal(t, 6, lines)
   297  
   298  		})
   299  	}
   300  }
   301  
   302  func TestNewMergeWithContextDone(t *testing.T) {
   303  	if simdcsv.SupportedCPU() {
   304  		t.Skip()
   305  	}
   306  	mergeLock.Lock()
   307  	defer mergeLock.Unlock()
   308  	fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, t.TempDir())
   309  	require.Nil(t, err)
   310  	ts, _ := time.Parse("2006-01-02 15:04:05", "2021-01-01 00:00:00")
   311  
   312  	ctx := trace.Generate(context.Background())
   313  
   314  	type args struct {
   315  		ctx  context.Context
   316  		opts []MergeOption
   317  	}
   318  	tests := []struct {
   319  		name string
   320  		args args
   321  		want *Merge
   322  	}{
   323  		{
   324  			name: "normal",
   325  			args: args{
   326  				ctx: ctx,
   327  				opts: []MergeOption{WithFileServiceName(defines.ETLFileServiceName),
   328  					WithFileService(fs), WithTable(dummyTable),
   329  					WithMaxFileSize(1), WithMinFilesMerge(1), WithMaxFileSize(16 * mpool.MB), WithMaxMergeJobs(16)},
   330  			},
   331  			want: nil,
   332  		},
   333  	}
   334  	for _, tt := range tests {
   335  		t.Run(tt.name, func(t *testing.T) {
   336  			ctx, cancel := context.WithCancel(tt.args.ctx)
   337  
   338  			files, err := initEmptyLogFile(ctx, fs, dummyTable, ts)
   339  			require.Nil(t, err)
   340  
   341  			got, err := NewMerge(ctx, tt.args.opts...)
   342  			require.Nil(t, err)
   343  			require.NotNil(t, got)
   344  
   345  			reader, err := newETLReader(got.ctx, dummyTable, got.FS, files[0], 0, nil)
   346  			require.Nil(t, err)
   347  
   348  			// trigger context.Done
   349  			cancel()
   350  			_, err = reader.ReadLine()
   351  			t.Logf("doMergeFiles meet err: %s", err)
   352  			require.Equal(t, err.Error(), "internal error: read files meet context Done")
   353  		})
   354  	}
   355  }
   356  
   357  func TestNewMergeNOFiles(t *testing.T) {
   358  	if simdcsv.SupportedCPU() {
   359  		t.Skip()
   360  	}
   361  	mergeLock.Lock()
   362  	defer mergeLock.Unlock()
   363  	fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, t.TempDir())
   364  	require.Nil(t, err)
   365  	ts, _ := time.Parse("2006-01-02 15:04:05", "2021-01-01 00:00:00")
   366  
   367  	ctx := trace.Generate(context.Background())
   368  	ctx, cancel := context.WithCancel(ctx)
   369  	defer cancel()
   370  
   371  	type args struct {
   372  		ctx  context.Context
   373  		opts []MergeOption
   374  	}
   375  	tests := []struct {
   376  		name string
   377  		args args
   378  		want *Merge
   379  	}{
   380  		{
   381  			name: "normal",
   382  			args: args{
   383  				ctx: ctx,
   384  				opts: []MergeOption{WithFileServiceName(defines.ETLFileServiceName),
   385  					WithFileService(fs), WithTable(dummyTable),
   386  					WithMaxFileSize(1), WithMinFilesMerge(1), WithMaxFileSize(16 * mpool.MB), WithMaxMergeJobs(16)},
   387  			},
   388  			want: nil,
   389  		},
   390  	}
   391  	for _, tt := range tests {
   392  		t.Run(tt.name, func(t *testing.T) {
   393  			filePath := newFilePath(dummyTable, ts)
   394  			fm := &FileMeta{filePath, 0}
   395  			files := []*FileMeta{fm}
   396  
   397  			got, err := NewMerge(tt.args.ctx, tt.args.opts...)
   398  			require.Nil(t, err)
   399  			require.NotNil(t, got)
   400  
   401  			err = got.doMergeFiles(ctx, dummyTable.Table, files, 0)
   402  			require.Equal(t, true, strings.Contains(err.Error(), "is not found"))
   403  
   404  		})
   405  	}
   406  }
   407  
   408  func TestMergeTaskExecutorFactory(t *testing.T) {
   409  	defer leaktest.AfterTest(t)()
   410  	t.Logf("tmpDir: %s/%s", t.TempDir(), t.Name())
   411  	fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, path.Join(t.TempDir(), t.Name()))
   412  	require.Nil(t, err)
   413  	targetDate := "2021-01-01"
   414  	ts, err := time.Parse("2006-01-02 15:04:05", targetDate+" 00:00:00")
   415  	require.Nil(t, err)
   416  
   417  	type args struct {
   418  		ctx  context.Context
   419  		opts []MergeOption
   420  		task task.Task
   421  	}
   422  	tests := []struct {
   423  		name string
   424  		args args
   425  		want func(ctx context.Context, task task.Task) error
   426  	}{
   427  		{
   428  			name: "normal",
   429  			args: args{
   430  				ctx:  context.Background(),
   431  				opts: []MergeOption{WithFileService(fs), WithMinFilesMerge(1)},
   432  				task: task.Task{
   433  					Metadata: task.TaskMetadata{
   434  						ID:                   "",
   435  						Executor:             0,
   436  						Context:              []byte(strings.Join([]string{dummyTable.GetIdentify(), targetDate}, ParamSeparator)),
   437  						Options:              task.TaskOptions{},
   438  						XXX_NoUnkeyedLiteral: struct{}{},
   439  						XXX_unrecognized:     nil,
   440  						XXX_sizecache:        0,
   441  					},
   442  				},
   443  			},
   444  			want: nil,
   445  		},
   446  	}
   447  	for _, tt := range tests {
   448  		t.Run(tt.name, func(t *testing.T) {
   449  
   450  			err := initSingleLogsFile(tt.args.ctx, fs, dummyTable, ts)
   451  			require.Nil(t, err)
   452  
   453  			got := MergeTaskExecutorFactory(tt.args.opts...)
   454  			require.NotNil(t, got)
   455  
   456  			err = got(tt.args.ctx, tt.args.task)
   457  			require.Nilf(t, err, "err: %v", err)
   458  
   459  			files := make([]string, 0, 1)
   460  			dir := []string{"/"}
   461  			for len(dir) > 0 {
   462  				entrys, _ := fs.List(tt.args.ctx, dir[0])
   463  				for _, e := range entrys {
   464  					p := path.Join(dir[0], e.Name)
   465  					if e.IsDir {
   466  						dir = append(dir, p)
   467  					} else {
   468  						files = append(files, p)
   469  					}
   470  				}
   471  				dir = dir[1:]
   472  			}
   473  			require.Equal(t, 1, len(files))
   474  			t.Logf("%v", files)
   475  		})
   476  	}
   477  }
   478  
   479  func TestCreateCronTask(t *testing.T) {
   480  	store := taskservice.NewMemTaskStorage()
   481  	s := taskservice.NewTaskService(runtime.DefaultRuntime(), store)
   482  	defer func() {
   483  		assert.NoError(t, s.Close())
   484  	}()
   485  	ctx, cancel := context.WithTimeout(context.TODO(), time.Second*10)
   486  	defer cancel()
   487  
   488  	type args struct {
   489  		ctx         context.Context
   490  		executorID  task.TaskCode
   491  		taskService taskservice.TaskService
   492  	}
   493  	tests := []struct {
   494  		name    string
   495  		args    args
   496  		wantErr error
   497  	}{
   498  		{
   499  			name: "name",
   500  			args: args{
   501  				ctx:         ctx,
   502  				executorID:  1,
   503  				taskService: s,
   504  			},
   505  			wantErr: nil,
   506  		},
   507  	}
   508  	for _, tt := range tests {
   509  		t.Run(tt.name, func(t *testing.T) {
   510  			got := CreateCronTask(tt.args.ctx, tt.args.executorID, tt.args.taskService)
   511  			require.Nil(t, got)
   512  		})
   513  	}
   514  }
   515  
   516  func TestNewMergeService(t *testing.T) {
   517  	ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*5)
   518  	defer cancel()
   519  	fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, path.Join(t.TempDir(), t.Name()))
   520  	require.Nil(t, err)
   521  
   522  	type args struct {
   523  		ctx  context.Context
   524  		opts []MergeOption
   525  	}
   526  	tests := []struct {
   527  		name  string
   528  		args  args
   529  		want  *Merge
   530  		want1 bool
   531  	}{
   532  		{
   533  			name: "normal",
   534  			args: args{
   535  				ctx:  ctx,
   536  				opts: []MergeOption{WithFileService(fs), WithMinFilesMerge(1), WithTable(dummyTable)},
   537  			},
   538  			want:  nil,
   539  			want1: false,
   540  		},
   541  	}
   542  	for _, tt := range tests {
   543  		t.Run(tt.name, func(t *testing.T) {
   544  			got, got1, err := NewMergeService(tt.args.ctx, tt.args.opts...)
   545  			require.Nil(t, err)
   546  			require.NotNil(t, got)
   547  			require.Equal(t, tt.want1, got1)
   548  		})
   549  	}
   550  }