github.com/matrixorigin/matrixone@v1.2.0/pkg/util/export/etl/tae_test.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package etl 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "path" 22 "strings" 23 "testing" 24 "time" 25 26 "github.com/matrixorigin/matrixone/pkg/common/moerr" 27 "github.com/matrixorigin/matrixone/pkg/common/mpool" 28 "github.com/matrixorigin/matrixone/pkg/fileservice" 29 "github.com/matrixorigin/matrixone/pkg/testutil" 30 "github.com/matrixorigin/matrixone/pkg/util/export/table" 31 "github.com/matrixorigin/matrixone/pkg/util/trace" 32 "github.com/matrixorigin/matrixone/pkg/util/trace/impl/motrace" 33 "github.com/stretchr/testify/require" 34 ) 35 36 var dummyStrColumn = table.Column{Name: "str", ColType: table.TVarchar, Scale: 32, Default: "", Comment: "str column"} 37 var dummyInt64Column = table.Column{Name: "int64", ColType: table.TInt64, Default: "0", Comment: "int64 column"} 38 var dummyFloat64Column = table.Column{Name: "float64", ColType: table.TFloat64, Default: "0.0", Comment: "float64 column"} 39 var dummyUInt64Column = table.Column{Name: "uint64", ColType: table.TUint64, Default: "0", Comment: "uint64 column"} 40 var dummyDatetimeColumn = table.Column{Name: "datetime_6", ColType: table.TDatetime, Default: "", Comment: "datetime.6 column"} 41 var dummyJsonColumn = table.Column{Name: "json_col", ColType: table.TJson, Default: "{}", Comment: "json column"} 42 43 var dummyAllTypeTable = &table.Table{ 44 Account: "test", 45 Database: "db_dummy", 46 Table: "tbl_all_type_dummy", 47 Columns: []table.Column{dummyStrColumn, dummyInt64Column, dummyFloat64Column, dummyUInt64Column, dummyDatetimeColumn, dummyJsonColumn}, 48 PrimaryKeyColumn: []table.Column{dummyStrColumn, dummyInt64Column}, 49 Engine: table.ExternalTableEngine, 50 Comment: "dummy table", 51 PathBuilder: table.NewAccountDatePathBuilder(), 52 TableOptions: nil, 53 } 54 55 func TestTAEWriter_WriteElems(t *testing.T) { 56 t.Logf("local timezone: %v", time.Local.String()) 57 mp, err := mpool.NewMPool("test", 0, mpool.NoFixed) 58 require.Nil(t, err) 59 ctx := context.TODO() 60 fs := testutil.NewSharedFS() 61 defer fs.Close() 62 63 filepath := path.Join(t.TempDir(), "file.tae") 64 writer := NewTAEWriter(ctx, dummyAllTypeTable, mp, filepath, fs) 65 66 cnt := 10240 67 lines := genLines(cnt) 68 for _, row := range lines { 69 err = writer.WriteRow(row) 70 require.Nil(t, err) 71 } 72 _, err = writer.FlushAndClose() 73 require.Nil(t, err) 74 for _, row := range lines { 75 row.Free() 76 } 77 // Done. write 78 79 folder := path.Dir(filepath) 80 files, err := fs.List(ctx, folder) 81 require.Nil(t, err) 82 require.Equal(t, 1, len(files)) 83 84 file := files[0] 85 t.Logf("path: %s, size: %d", file.Name, file.Size) 86 87 // ----- reader ------ 88 89 r, err := NewTaeReader(context.TODO(), dummyAllTypeTable, filepath, file.Size, fs, mp) 90 require.Nil(t, err) 91 defer r.Close() 92 93 // read data 94 batchs, err := r.ReadAll(ctx) 95 require.Nil(t, err) 96 require.Equal(t, (cnt+BatchSize)/BatchSize, len(batchs)) 97 98 _, err = r.ReadLine() 99 require.Nil(t, err) 100 101 // read index 102 for _, bbs := range r.bs { 103 _, err = r.blockReader.LoadZoneMaps(context.Background(), 104 r.idxs, bbs.GetID(), mp) 105 require.Nil(t, err) 106 } 107 108 readCnt := 0 109 for batIDX, bat := range batchs { 110 for _, vec := range bat.Vecs { 111 rows, err := GetVectorArrayLen(context.TODO(), vec) 112 require.Nil(t, err) 113 t.Logf("calculate length: %d, vec.Length: %d, type: %s", rows, vec.Length(), vec.GetType().String()) 114 } 115 rows := bat.Vecs[0].Length() 116 ctn := strings.Builder{} 117 for rowId := 0; rowId < rows; rowId++ { 118 for _, vec := range bat.Vecs { 119 val, err := ValToString(context.TODO(), vec, rowId) 120 require.Nil(t, err) 121 ctn.WriteString(val) 122 ctn.WriteString(",") 123 } 124 ctn.WriteRune('\n') 125 } 126 //t.Logf("batch %d: \n%s", batIDX, ctn.String()) 127 t.Logf("read batch %d", batIDX) 128 readCnt += rows 129 } 130 require.Equal(t, cnt, readCnt) 131 } 132 133 func genLines(cnt int) (lines []*table.Row) { 134 lines = make([]*table.Row, 0, cnt) 135 r := dummyAllTypeTable.GetRow(context.TODO()) 136 defer r.Free() 137 for i := 0; i < cnt; i++ { 138 row := r.Clone() 139 row.SetColumnVal(dummyStrColumn, table.StringField(fmt.Sprintf("str_val_%d", i))) 140 row.SetColumnVal(dummyInt64Column, table.Int64Field(int64(i))) 141 row.SetColumnVal(dummyFloat64Column, table.Float64Field(float64(i))) 142 row.SetColumnVal(dummyUInt64Column, table.Uint64Field(uint64(i))) 143 row.SetColumnVal(dummyDatetimeColumn, table.TimeField(time.Now())) 144 row.SetColumnVal(dummyJsonColumn, table.StringField(fmt.Sprintf(`{"cnt":"%d"}`, i))) 145 lines = append(lines, row) 146 } 147 148 return 149 } 150 151 func TestTAEWriter_WriteRow(t *testing.T) { 152 t.Logf("local timezone: %v", time.Local.String()) 153 mp, err := mpool.NewMPool("test", 0, mpool.NoFixed) 154 require.Nil(t, err) 155 ctx := context.TODO() 156 fs := testutil.NewSharedFS() 157 158 type fields struct { 159 ctx context.Context 160 fs fileservice.FileService 161 } 162 type args struct { 163 tbl *table.Table 164 items func() []table.RowField 165 } 166 167 var genSpanData = func() []table.RowField { 168 arr := make([]table.RowField, 0, 128) 169 arr = append(arr, &motrace.MOSpan{ 170 SpanConfig: trace.SpanConfig{SpanContext: trace.SpanContext{ 171 TraceID: trace.NilTraceID, 172 SpanID: trace.NilSpanID, 173 Kind: trace.SpanKindInternal, 174 }}, 175 Name: "span1", 176 StartTime: time.Time{}, 177 EndTime: time.Time{}, 178 Duration: 0, 179 }) 180 arr = append(arr, &motrace.MOSpan{ 181 SpanConfig: trace.SpanConfig{SpanContext: trace.SpanContext{ 182 TraceID: trace.NilTraceID, 183 SpanID: trace.NilSpanID, 184 Kind: trace.SpanKindStatement, 185 }}, 186 Name: "span2", 187 StartTime: time.Time{}, 188 EndTime: time.Time{}, 189 Duration: 100, 190 }) 191 192 return arr 193 } 194 195 var _1TxnID = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1} 196 var _1SesID = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x1} 197 var genStmtData = func() []table.RowField { 198 arr := make([]table.RowField, 0, 128) 199 arr = append(arr, 200 &motrace.StatementInfo{ 201 StatementID: trace.NilTraceID, 202 TransactionID: _1TxnID, 203 SessionID: _1SesID, 204 Account: "MO", 205 User: "moroot", 206 Database: "system", 207 Statement: "show tables", 208 StatementFingerprint: "show tables", 209 StatementTag: "", 210 ExecPlan: nil, 211 }, 212 ) 213 return arr 214 } 215 216 tests := []struct { 217 name string 218 fields fields 219 args args 220 }{ 221 { 222 name: "statement", 223 fields: fields{ 224 ctx: ctx, 225 fs: fs, 226 }, 227 args: args{ 228 tbl: motrace.SingleStatementTable, 229 items: genStmtData, 230 }, 231 }, 232 { 233 name: "span", 234 fields: fields{ 235 ctx: ctx, 236 fs: fs, 237 }, 238 args: args{ 239 tbl: motrace.SingleRowLogTable, 240 items: genSpanData, 241 }, 242 }, 243 } 244 245 for _, tt := range tests { 246 t.Run(tt.name, func(t *testing.T) { 247 248 if tt.name == "span" { 249 return 250 } 251 252 cfg := table.FilePathCfg{NodeUUID: "uuid", NodeType: "type", Extension: table.TaeExtension} 253 filePath := cfg.LogsFilePathFactory("sys", tt.args.tbl, time.Now()) 254 writer := NewTAEWriter(tt.fields.ctx, tt.args.tbl, mp, filePath, tt.fields.fs) 255 items := tt.args.items() 256 for _, item := range items { 257 row := item.GetTable().GetRow(tt.fields.ctx) 258 item.FillRow(tt.fields.ctx, row) 259 writer.WriteRow(row) 260 } 261 writer.FlushAndClose() 262 263 folder := path.Dir(filePath) 264 entrys, err := fs.List(ctx, folder) 265 require.Nil(t, err) 266 require.NotEqual(t, 0, len(entrys)) 267 for _, e := range entrys { 268 t.Logf("file: %s, size: %d, is_dir: %v", e.Name, e.Size, e.IsDir) 269 require.NotEqual(t, 44, e.Size) 270 } 271 }) 272 } 273 } 274 275 func TestTaeReadFile(t *testing.T) { 276 filePath := "rawlog.tae" 277 278 mp, err := mpool.NewMPool("TestTaeReadFile", 0, mpool.NoFixed) 279 require.Nil(t, err) 280 ctx := context.TODO() 281 fs := testutil.NewETLFS() 282 283 entrys, err := fs.List(context.TODO(), "etl:/") 284 require.Nil(t, err) 285 if len(entrys) == 0 { 286 t.Skip() 287 } 288 require.Equal(t, 1, len(entrys)) 289 require.Equal(t, filePath, entrys[0].Name) 290 291 fileSize := entrys[0].Size 292 293 r, err := NewTaeReader(context.TODO(), motrace.SingleRowLogTable, filePath, fileSize, fs, mp) 294 require.Nil(t, err) 295 296 // read data 297 batchs, err := r.ReadAll(ctx) 298 require.Nil(t, err) 299 300 // read index 301 for _, bbs := range r.bs { 302 _, err = r.blockReader.LoadZoneMaps(context.Background(), 303 r.idxs, bbs.GetID(), mp) 304 require.Nil(t, err) 305 } 306 307 readCnt := 0 308 for batIDX, bat := range batchs { 309 for _, vec := range bat.Vecs { 310 rows, err := GetVectorArrayLen(context.TODO(), vec) 311 require.Nil(t, err) 312 t.Logf("calculate length: %d, vec.Length: %d, type: %s", rows, vec.Length(), vec.GetType().String()) 313 } 314 rows := bat.Vecs[0].Length() 315 ctn := strings.Builder{} 316 for rowId := 0; rowId < rows; rowId++ { 317 for _, vec := range bat.Vecs { 318 val, err := ValToString(context.TODO(), vec, rowId) 319 require.Nil(t, err) 320 ctn.WriteString(val) 321 ctn.WriteString(",") 322 } 323 ctn.WriteRune('\n') 324 } 325 t.Logf("batch %d: \n%s", batIDX, ctn.String()) 326 //t.Logf("read batch %d", batIDX) 327 readCnt += rows 328 } 329 } 330 331 func TestTaeReadFile_ReadAll(t *testing.T) { 332 333 mp, err := mpool.NewMPool("TestTaeReadFile", 0, mpool.NoFixed) 334 require.Nil(t, err) 335 ctx := context.TODO() 336 fs := testutil.NewETLFS() 337 338 folder := "/sys/logs/2023/01/11/rawlog" 339 entrys, err := fs.List(context.TODO(), "etl:"+folder) 340 require.Nil(t, err) 341 if len(entrys) == 0 { 342 t.Skip() 343 } 344 345 itemsCnt := make(map[string]int, 2) 346 itemsCnt["span_info"] = 0 347 itemsCnt["log_info"] = 0 348 readCnt := 0 349 for _, e := range entrys { 350 t.Logf("file: %s, size: %d", e.Name, e.Size) 351 352 r, err := NewTaeReader(context.TODO(), motrace.SingleRowLogTable, path.Join(folder, e.Name), e.Size, fs, mp) 353 require.Nil(t, err) 354 355 // read data 356 batchs, err := r.ReadAll(ctx) 357 require.Nil(t, err) 358 359 // read index 360 for _, bbs := range r.bs { 361 _, err = r.blockReader.LoadZoneMaps(context.Background(), 362 r.idxs, bbs.GetID(), mp) 363 require.Nil(t, err) 364 } 365 366 for batIDX, bat := range batchs { 367 for _, vec := range bat.Vecs { 368 rows, err := GetVectorArrayLen(context.TODO(), vec) 369 require.Nil(t, err) 370 t.Logf("calculate length: %d", rows) 371 break 372 //t.Logf("calculate length: %d, vec.Length: %d, type: %s", rows, vec.Length(), vec.GetType().String()) 373 } 374 rows := bat.Vecs[0].Length() 375 ctn := strings.Builder{} 376 for rowId := 0; rowId < rows; rowId++ { 377 for idx, vec := range bat.Vecs { 378 val, err := ValToString(context.TODO(), vec, rowId) 379 require.Nil(t, err) 380 ctn.WriteString(val) 381 ctn.WriteString(",") 382 if idx == 0 { 383 itemsCnt[val]++ 384 } 385 } 386 ctn.WriteRune('\n') 387 } 388 //t.Logf("batch %d: \n%s", batIDX, ctn.String()) 389 t.Logf("read batch %d", batIDX) 390 readCnt += rows 391 } 392 t.Logf("cnt: %v", itemsCnt) 393 } 394 t.Logf("cnt: %v", itemsCnt) 395 } 396 397 func TestTAEWriter_writeEmpty(t *testing.T) { 398 cfg := table.FilePathCfg{NodeUUID: "uuid", NodeType: "type", Extension: table.TaeExtension} 399 ctx := context.TODO() 400 tbl := motrace.SingleStatementTable 401 fs := testutil.NewSharedFS() 402 filePath := cfg.LogsFilePathFactory("sys", tbl, time.Now()) 403 mp, err := mpool.NewMPool("test", 0, mpool.NoFixed) 404 require.Nil(t, err) 405 writer := NewTAEWriter(ctx, tbl, mp, filePath, fs) 406 _, err = writer.FlushAndClose() 407 require.NotNil(t, err) 408 var e *moerr.Error 409 require.True(t, errors.As(err, &e)) 410 require.Equal(t, moerr.ErrEmptyRange, e.ErrorCode()) 411 } 412 413 func TestTAEWriter_WriteStrings(t *testing.T) { 414 415 type fields struct { 416 } 417 type args struct { 418 prepare func() (Line []string) 419 } 420 tests := []struct { 421 name string 422 fields fields 423 args args 424 wantErr bool 425 }{ 426 { 427 name: "normal", 428 args: args{ 429 prepare: func() (Line []string) { 430 rows := genLines(1) 431 return rows[0].ToStrings() 432 }, 433 }, 434 wantErr: false, 435 }, 436 } 437 438 cfg := table.FilePathCfg{NodeUUID: "uuid", NodeType: "type", Extension: table.TaeExtension} 439 ctx := context.TODO() 440 tbl := dummyAllTypeTable 441 fs := testutil.NewSharedFS() 442 filePath := cfg.LogsFilePathFactory("sys", tbl, time.Now()) 443 mp, err := mpool.NewMPool("test", 0, mpool.NoFixed) 444 require.Nil(t, err) 445 writer := NewTAEWriter(ctx, tbl, mp, filePath, fs) 446 defer writer.FlushAndClose() 447 448 for _, tt := range tests { 449 t.Run(tt.name, func(t *testing.T) { 450 if err := writer.WriteStrings(tt.args.prepare()); (err != nil) != tt.wantErr { 451 t.Errorf("WriteStrings() error = %v, wantErr %v", err, tt.wantErr) 452 } 453 }) 454 } 455 }