github.com/matrixorigin/matrixone@v1.2.0/pkg/util/export/etl/tae_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package etl
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"path"
    22  	"strings"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    27  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    28  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    29  	"github.com/matrixorigin/matrixone/pkg/testutil"
    30  	"github.com/matrixorigin/matrixone/pkg/util/export/table"
    31  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    32  	"github.com/matrixorigin/matrixone/pkg/util/trace/impl/motrace"
    33  	"github.com/stretchr/testify/require"
    34  )
    35  
    36  var dummyStrColumn = table.Column{Name: "str", ColType: table.TVarchar, Scale: 32, Default: "", Comment: "str column"}
    37  var dummyInt64Column = table.Column{Name: "int64", ColType: table.TInt64, Default: "0", Comment: "int64 column"}
    38  var dummyFloat64Column = table.Column{Name: "float64", ColType: table.TFloat64, Default: "0.0", Comment: "float64 column"}
    39  var dummyUInt64Column = table.Column{Name: "uint64", ColType: table.TUint64, Default: "0", Comment: "uint64 column"}
    40  var dummyDatetimeColumn = table.Column{Name: "datetime_6", ColType: table.TDatetime, Default: "", Comment: "datetime.6 column"}
    41  var dummyJsonColumn = table.Column{Name: "json_col", ColType: table.TJson, Default: "{}", Comment: "json column"}
    42  
    43  var dummyAllTypeTable = &table.Table{
    44  	Account:          "test",
    45  	Database:         "db_dummy",
    46  	Table:            "tbl_all_type_dummy",
    47  	Columns:          []table.Column{dummyStrColumn, dummyInt64Column, dummyFloat64Column, dummyUInt64Column, dummyDatetimeColumn, dummyJsonColumn},
    48  	PrimaryKeyColumn: []table.Column{dummyStrColumn, dummyInt64Column},
    49  	Engine:           table.ExternalTableEngine,
    50  	Comment:          "dummy table",
    51  	PathBuilder:      table.NewAccountDatePathBuilder(),
    52  	TableOptions:     nil,
    53  }
    54  
    55  func TestTAEWriter_WriteElems(t *testing.T) {
    56  	t.Logf("local timezone: %v", time.Local.String())
    57  	mp, err := mpool.NewMPool("test", 0, mpool.NoFixed)
    58  	require.Nil(t, err)
    59  	ctx := context.TODO()
    60  	fs := testutil.NewSharedFS()
    61  	defer fs.Close()
    62  
    63  	filepath := path.Join(t.TempDir(), "file.tae")
    64  	writer := NewTAEWriter(ctx, dummyAllTypeTable, mp, filepath, fs)
    65  
    66  	cnt := 10240
    67  	lines := genLines(cnt)
    68  	for _, row := range lines {
    69  		err = writer.WriteRow(row)
    70  		require.Nil(t, err)
    71  	}
    72  	_, err = writer.FlushAndClose()
    73  	require.Nil(t, err)
    74  	for _, row := range lines {
    75  		row.Free()
    76  	}
    77  	// Done. write
    78  
    79  	folder := path.Dir(filepath)
    80  	files, err := fs.List(ctx, folder)
    81  	require.Nil(t, err)
    82  	require.Equal(t, 1, len(files))
    83  
    84  	file := files[0]
    85  	t.Logf("path: %s, size: %d", file.Name, file.Size)
    86  
    87  	// ----- reader ------
    88  
    89  	r, err := NewTaeReader(context.TODO(), dummyAllTypeTable, filepath, file.Size, fs, mp)
    90  	require.Nil(t, err)
    91  	defer r.Close()
    92  
    93  	// read data
    94  	batchs, err := r.ReadAll(ctx)
    95  	require.Nil(t, err)
    96  	require.Equal(t, (cnt+BatchSize)/BatchSize, len(batchs))
    97  
    98  	_, err = r.ReadLine()
    99  	require.Nil(t, err)
   100  
   101  	// read index
   102  	for _, bbs := range r.bs {
   103  		_, err = r.blockReader.LoadZoneMaps(context.Background(),
   104  			r.idxs, bbs.GetID(), mp)
   105  		require.Nil(t, err)
   106  	}
   107  
   108  	readCnt := 0
   109  	for batIDX, bat := range batchs {
   110  		for _, vec := range bat.Vecs {
   111  			rows, err := GetVectorArrayLen(context.TODO(), vec)
   112  			require.Nil(t, err)
   113  			t.Logf("calculate length: %d, vec.Length: %d, type: %s", rows, vec.Length(), vec.GetType().String())
   114  		}
   115  		rows := bat.Vecs[0].Length()
   116  		ctn := strings.Builder{}
   117  		for rowId := 0; rowId < rows; rowId++ {
   118  			for _, vec := range bat.Vecs {
   119  				val, err := ValToString(context.TODO(), vec, rowId)
   120  				require.Nil(t, err)
   121  				ctn.WriteString(val)
   122  				ctn.WriteString(",")
   123  			}
   124  			ctn.WriteRune('\n')
   125  		}
   126  		//t.Logf("batch %d: \n%s", batIDX, ctn.String())
   127  		t.Logf("read batch %d", batIDX)
   128  		readCnt += rows
   129  	}
   130  	require.Equal(t, cnt, readCnt)
   131  }
   132  
   133  func genLines(cnt int) (lines []*table.Row) {
   134  	lines = make([]*table.Row, 0, cnt)
   135  	r := dummyAllTypeTable.GetRow(context.TODO())
   136  	defer r.Free()
   137  	for i := 0; i < cnt; i++ {
   138  		row := r.Clone()
   139  		row.SetColumnVal(dummyStrColumn, table.StringField(fmt.Sprintf("str_val_%d", i)))
   140  		row.SetColumnVal(dummyInt64Column, table.Int64Field(int64(i)))
   141  		row.SetColumnVal(dummyFloat64Column, table.Float64Field(float64(i)))
   142  		row.SetColumnVal(dummyUInt64Column, table.Uint64Field(uint64(i)))
   143  		row.SetColumnVal(dummyDatetimeColumn, table.TimeField(time.Now()))
   144  		row.SetColumnVal(dummyJsonColumn, table.StringField(fmt.Sprintf(`{"cnt":"%d"}`, i)))
   145  		lines = append(lines, row)
   146  	}
   147  
   148  	return
   149  }
   150  
   151  func TestTAEWriter_WriteRow(t *testing.T) {
   152  	t.Logf("local timezone: %v", time.Local.String())
   153  	mp, err := mpool.NewMPool("test", 0, mpool.NoFixed)
   154  	require.Nil(t, err)
   155  	ctx := context.TODO()
   156  	fs := testutil.NewSharedFS()
   157  
   158  	type fields struct {
   159  		ctx context.Context
   160  		fs  fileservice.FileService
   161  	}
   162  	type args struct {
   163  		tbl   *table.Table
   164  		items func() []table.RowField
   165  	}
   166  
   167  	var genSpanData = func() []table.RowField {
   168  		arr := make([]table.RowField, 0, 128)
   169  		arr = append(arr, &motrace.MOSpan{
   170  			SpanConfig: trace.SpanConfig{SpanContext: trace.SpanContext{
   171  				TraceID: trace.NilTraceID,
   172  				SpanID:  trace.NilSpanID,
   173  				Kind:    trace.SpanKindInternal,
   174  			}},
   175  			Name:      "span1",
   176  			StartTime: time.Time{},
   177  			EndTime:   time.Time{},
   178  			Duration:  0,
   179  		})
   180  		arr = append(arr, &motrace.MOSpan{
   181  			SpanConfig: trace.SpanConfig{SpanContext: trace.SpanContext{
   182  				TraceID: trace.NilTraceID,
   183  				SpanID:  trace.NilSpanID,
   184  				Kind:    trace.SpanKindStatement,
   185  			}},
   186  			Name:      "span2",
   187  			StartTime: time.Time{},
   188  			EndTime:   time.Time{},
   189  			Duration:  100,
   190  		})
   191  
   192  		return arr
   193  	}
   194  
   195  	var _1TxnID = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1}
   196  	var _1SesID = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1}
   197  	var genStmtData = func() []table.RowField {
   198  		arr := make([]table.RowField, 0, 128)
   199  		arr = append(arr,
   200  			&motrace.StatementInfo{
   201  				StatementID:          trace.NilTraceID,
   202  				TransactionID:        _1TxnID,
   203  				SessionID:            _1SesID,
   204  				Account:              "MO",
   205  				User:                 "moroot",
   206  				Database:             "system",
   207  				Statement:            "show tables",
   208  				StatementFingerprint: "show tables",
   209  				StatementTag:         "",
   210  				ExecPlan:             nil,
   211  			},
   212  		)
   213  		return arr
   214  	}
   215  
   216  	tests := []struct {
   217  		name   string
   218  		fields fields
   219  		args   args
   220  	}{
   221  		{
   222  			name: "statement",
   223  			fields: fields{
   224  				ctx: ctx,
   225  				fs:  fs,
   226  			},
   227  			args: args{
   228  				tbl:   motrace.SingleStatementTable,
   229  				items: genStmtData,
   230  			},
   231  		},
   232  		{
   233  			name: "span",
   234  			fields: fields{
   235  				ctx: ctx,
   236  				fs:  fs,
   237  			},
   238  			args: args{
   239  				tbl:   motrace.SingleRowLogTable,
   240  				items: genSpanData,
   241  			},
   242  		},
   243  	}
   244  
   245  	for _, tt := range tests {
   246  		t.Run(tt.name, func(t *testing.T) {
   247  
   248  			if tt.name == "span" {
   249  				return
   250  			}
   251  
   252  			cfg := table.FilePathCfg{NodeUUID: "uuid", NodeType: "type", Extension: table.TaeExtension}
   253  			filePath := cfg.LogsFilePathFactory("sys", tt.args.tbl, time.Now())
   254  			writer := NewTAEWriter(tt.fields.ctx, tt.args.tbl, mp, filePath, tt.fields.fs)
   255  			items := tt.args.items()
   256  			for _, item := range items {
   257  				row := item.GetTable().GetRow(tt.fields.ctx)
   258  				item.FillRow(tt.fields.ctx, row)
   259  				writer.WriteRow(row)
   260  			}
   261  			writer.FlushAndClose()
   262  
   263  			folder := path.Dir(filePath)
   264  			entrys, err := fs.List(ctx, folder)
   265  			require.Nil(t, err)
   266  			require.NotEqual(t, 0, len(entrys))
   267  			for _, e := range entrys {
   268  				t.Logf("file: %s, size: %d, is_dir: %v", e.Name, e.Size, e.IsDir)
   269  				require.NotEqual(t, 44, e.Size)
   270  			}
   271  		})
   272  	}
   273  }
   274  
   275  func TestTaeReadFile(t *testing.T) {
   276  	filePath := "rawlog.tae"
   277  
   278  	mp, err := mpool.NewMPool("TestTaeReadFile", 0, mpool.NoFixed)
   279  	require.Nil(t, err)
   280  	ctx := context.TODO()
   281  	fs := testutil.NewETLFS()
   282  
   283  	entrys, err := fs.List(context.TODO(), "etl:/")
   284  	require.Nil(t, err)
   285  	if len(entrys) == 0 {
   286  		t.Skip()
   287  	}
   288  	require.Equal(t, 1, len(entrys))
   289  	require.Equal(t, filePath, entrys[0].Name)
   290  
   291  	fileSize := entrys[0].Size
   292  
   293  	r, err := NewTaeReader(context.TODO(), motrace.SingleRowLogTable, filePath, fileSize, fs, mp)
   294  	require.Nil(t, err)
   295  
   296  	// read data
   297  	batchs, err := r.ReadAll(ctx)
   298  	require.Nil(t, err)
   299  
   300  	// read index
   301  	for _, bbs := range r.bs {
   302  		_, err = r.blockReader.LoadZoneMaps(context.Background(),
   303  			r.idxs, bbs.GetID(), mp)
   304  		require.Nil(t, err)
   305  	}
   306  
   307  	readCnt := 0
   308  	for batIDX, bat := range batchs {
   309  		for _, vec := range bat.Vecs {
   310  			rows, err := GetVectorArrayLen(context.TODO(), vec)
   311  			require.Nil(t, err)
   312  			t.Logf("calculate length: %d, vec.Length: %d, type: %s", rows, vec.Length(), vec.GetType().String())
   313  		}
   314  		rows := bat.Vecs[0].Length()
   315  		ctn := strings.Builder{}
   316  		for rowId := 0; rowId < rows; rowId++ {
   317  			for _, vec := range bat.Vecs {
   318  				val, err := ValToString(context.TODO(), vec, rowId)
   319  				require.Nil(t, err)
   320  				ctn.WriteString(val)
   321  				ctn.WriteString(",")
   322  			}
   323  			ctn.WriteRune('\n')
   324  		}
   325  		t.Logf("batch %d: \n%s", batIDX, ctn.String())
   326  		//t.Logf("read batch %d", batIDX)
   327  		readCnt += rows
   328  	}
   329  }
   330  
   331  func TestTaeReadFile_ReadAll(t *testing.T) {
   332  
   333  	mp, err := mpool.NewMPool("TestTaeReadFile", 0, mpool.NoFixed)
   334  	require.Nil(t, err)
   335  	ctx := context.TODO()
   336  	fs := testutil.NewETLFS()
   337  
   338  	folder := "/sys/logs/2023/01/11/rawlog"
   339  	entrys, err := fs.List(context.TODO(), "etl:"+folder)
   340  	require.Nil(t, err)
   341  	if len(entrys) == 0 {
   342  		t.Skip()
   343  	}
   344  
   345  	itemsCnt := make(map[string]int, 2)
   346  	itemsCnt["span_info"] = 0
   347  	itemsCnt["log_info"] = 0
   348  	readCnt := 0
   349  	for _, e := range entrys {
   350  		t.Logf("file: %s, size: %d", e.Name, e.Size)
   351  
   352  		r, err := NewTaeReader(context.TODO(), motrace.SingleRowLogTable, path.Join(folder, e.Name), e.Size, fs, mp)
   353  		require.Nil(t, err)
   354  
   355  		// read data
   356  		batchs, err := r.ReadAll(ctx)
   357  		require.Nil(t, err)
   358  
   359  		// read index
   360  		for _, bbs := range r.bs {
   361  			_, err = r.blockReader.LoadZoneMaps(context.Background(),
   362  				r.idxs, bbs.GetID(), mp)
   363  			require.Nil(t, err)
   364  		}
   365  
   366  		for batIDX, bat := range batchs {
   367  			for _, vec := range bat.Vecs {
   368  				rows, err := GetVectorArrayLen(context.TODO(), vec)
   369  				require.Nil(t, err)
   370  				t.Logf("calculate length: %d", rows)
   371  				break
   372  				//t.Logf("calculate length: %d, vec.Length: %d, type: %s", rows, vec.Length(), vec.GetType().String())
   373  			}
   374  			rows := bat.Vecs[0].Length()
   375  			ctn := strings.Builder{}
   376  			for rowId := 0; rowId < rows; rowId++ {
   377  				for idx, vec := range bat.Vecs {
   378  					val, err := ValToString(context.TODO(), vec, rowId)
   379  					require.Nil(t, err)
   380  					ctn.WriteString(val)
   381  					ctn.WriteString(",")
   382  					if idx == 0 {
   383  						itemsCnt[val]++
   384  					}
   385  				}
   386  				ctn.WriteRune('\n')
   387  			}
   388  			//t.Logf("batch %d: \n%s", batIDX, ctn.String())
   389  			t.Logf("read batch %d", batIDX)
   390  			readCnt += rows
   391  		}
   392  		t.Logf("cnt: %v", itemsCnt)
   393  	}
   394  	t.Logf("cnt: %v", itemsCnt)
   395  }
   396  
   397  func TestTAEWriter_writeEmpty(t *testing.T) {
   398  	cfg := table.FilePathCfg{NodeUUID: "uuid", NodeType: "type", Extension: table.TaeExtension}
   399  	ctx := context.TODO()
   400  	tbl := motrace.SingleStatementTable
   401  	fs := testutil.NewSharedFS()
   402  	filePath := cfg.LogsFilePathFactory("sys", tbl, time.Now())
   403  	mp, err := mpool.NewMPool("test", 0, mpool.NoFixed)
   404  	require.Nil(t, err)
   405  	writer := NewTAEWriter(ctx, tbl, mp, filePath, fs)
   406  	_, err = writer.FlushAndClose()
   407  	require.NotNil(t, err)
   408  	var e *moerr.Error
   409  	require.True(t, errors.As(err, &e))
   410  	require.Equal(t, moerr.ErrEmptyRange, e.ErrorCode())
   411  }
   412  
   413  func TestTAEWriter_WriteStrings(t *testing.T) {
   414  
   415  	type fields struct {
   416  	}
   417  	type args struct {
   418  		prepare func() (Line []string)
   419  	}
   420  	tests := []struct {
   421  		name    string
   422  		fields  fields
   423  		args    args
   424  		wantErr bool
   425  	}{
   426  		{
   427  			name: "normal",
   428  			args: args{
   429  				prepare: func() (Line []string) {
   430  					rows := genLines(1)
   431  					return rows[0].ToStrings()
   432  				},
   433  			},
   434  			wantErr: false,
   435  		},
   436  	}
   437  
   438  	cfg := table.FilePathCfg{NodeUUID: "uuid", NodeType: "type", Extension: table.TaeExtension}
   439  	ctx := context.TODO()
   440  	tbl := dummyAllTypeTable
   441  	fs := testutil.NewSharedFS()
   442  	filePath := cfg.LogsFilePathFactory("sys", tbl, time.Now())
   443  	mp, err := mpool.NewMPool("test", 0, mpool.NoFixed)
   444  	require.Nil(t, err)
   445  	writer := NewTAEWriter(ctx, tbl, mp, filePath, fs)
   446  	defer writer.FlushAndClose()
   447  
   448  	for _, tt := range tests {
   449  		t.Run(tt.name, func(t *testing.T) {
   450  			if err := writer.WriteStrings(tt.args.prepare()); (err != nil) != tt.wantErr {
   451  				t.Errorf("WriteStrings() error = %v, wantErr %v", err, tt.wantErr)
   452  			}
   453  		})
   454  	}
   455  }