github.com/matrixorigin/matrixone@v0.7.0/pkg/util/export/merge_test.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package export 16 17 import ( 18 "context" 19 "errors" 20 "path" 21 "strings" 22 "sync" 23 "testing" 24 "time" 25 26 "github.com/matrixorigin/matrixone/pkg/common/moerr" 27 "github.com/matrixorigin/matrixone/pkg/common/mpool" 28 "github.com/matrixorigin/matrixone/pkg/common/runtime" 29 "github.com/matrixorigin/matrixone/pkg/defines" 30 "github.com/matrixorigin/matrixone/pkg/fileservice" 31 "github.com/matrixorigin/matrixone/pkg/logutil" 32 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 33 "github.com/matrixorigin/matrixone/pkg/pb/task" 34 "github.com/matrixorigin/matrixone/pkg/taskservice" 35 "github.com/matrixorigin/matrixone/pkg/util/export/table" 36 "github.com/matrixorigin/matrixone/pkg/util/trace" 37 "github.com/robfig/cron/v3" 38 "github.com/stretchr/testify/assert" 39 "github.com/stretchr/testify/require" 40 41 "github.com/lni/goutils/leaktest" 42 "github.com/matrixorigin/simdcsv" 43 ) 44 45 func init() { 46 time.Local = time.FixedZone("CST", 0) // set time-zone +0000 47 table.RegisterTableDefine(dummyTable) 48 runtime.SetupProcessLevelRuntime(runtime.NewRuntime(metadata.ServiceType_CN, "test", logutil.GetGlobalLogger())) 49 } 50 51 var mux sync.Mutex 52 53 var dummyStrColumn = table.Column{Name: "str", ColType: table.TVarchar, Precision: 32, Default: "", Comment: "str column"} 54 var dummyInt64Column = table.Column{Name: "int64", ColType: table.TInt64, Default: "0", Comment: "int64 column"} 55 var dummyFloat64Column = table.Column{Name: "float64", ColType: table.TFloat64, Default: "0.0", Comment: "float64 column"} 56 57 var dummyTable = &table.Table{ 58 Account: "test", 59 Database: "db_dummy", 60 Table: "tbl_dummy", 61 Columns: []table.Column{dummyStrColumn, dummyInt64Column, dummyFloat64Column}, 62 PrimaryKeyColumn: []table.Column{dummyStrColumn, dummyInt64Column}, 63 Engine: table.ExternalTableEngine, 64 Comment: "dummy table", 65 PathBuilder: table.NewAccountDatePathBuilder(), 66 TableOptions: nil, 67 } 68 69 func dummyFillTable(str string, i int64, f float64) *table.Row { 70 row := dummyTable.GetRow(context.TODO()) 71 row.SetColumnVal(dummyStrColumn, str) 72 row.SetColumnVal(dummyInt64Column, i) 73 row.SetColumnVal(dummyFloat64Column, f) 74 return row 75 } 76 77 func TestInitCronExpr(t *testing.T) { 78 type args struct { 79 duration time.Duration 80 } 81 tests := []struct { 82 name string 83 args args 84 wantErr bool 85 wantExpr string 86 expectDuration time.Duration 87 }{ 88 {name: "1h", args: args{duration: 1 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery1Hour}, 89 {name: "2h", args: args{duration: 2 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery2Hour}, 90 {name: "4h", args: args{duration: 4 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery4Hour}, 91 {name: "3h", args: args{duration: 3 * time.Hour}, wantErr: false, wantExpr: "0 0 3,6,9,12,15,18,21 * * *"}, 92 {name: "5h", args: args{duration: 5 * time.Hour}, wantErr: false, wantExpr: "0 0 5,10,15,20 * * *"}, 93 {name: "5min", args: args{duration: 5 * time.Minute}, wantErr: false, wantExpr: MergeTaskCronExprEvery05Min}, 94 {name: "15min", args: args{duration: 15 * time.Minute}, wantErr: false, wantExpr: MergeTaskCronExprEvery15Min}, 95 {name: "7min", args: args{duration: 7 * time.Minute}, wantErr: false, wantExpr: "@every 10m", expectDuration: 10 * time.Minute}, 96 {name: "15s", args: args{duration: 15 * time.Second}, wantErr: false, wantExpr: "@every 15s", expectDuration: 15 * time.Second}, 97 {name: "2min", args: args{duration: 2 * time.Minute}, wantErr: false, wantExpr: "@every 120s", expectDuration: 2 * time.Minute}, 98 {name: "13h", args: args{duration: 13 * time.Hour}, wantErr: true, wantExpr: ""}, 99 } 100 101 ctx := context.Background() 102 parser := cron.NewParser( 103 cron.Second | 104 cron.Minute | 105 cron.Hour | 106 cron.Dom | 107 cron.Month | 108 cron.Dow | 109 cron.Descriptor) 110 for _, tt := range tests { 111 t.Run(tt.name, func(t *testing.T) { 112 err := InitCronExpr(ctx, tt.args.duration) 113 if tt.wantErr { 114 var e *moerr.Error 115 require.True(t, errors.As(err, &e)) 116 require.True(t, moerr.IsMoErrCode(e, moerr.ErrNotSupported)) 117 } else { 118 require.Equal(t, tt.wantExpr, MergeTaskCronExpr) 119 120 sche, err := parser.Parse(MergeTaskCronExpr) 121 require.Nil(t, err) 122 123 now := time.Unix(60, 0) 124 next := sche.Next(time.UnixMilli(now.UnixMilli())) 125 t.Logf("duration: %v, expr: %s, next: %v", tt.args.duration, MergeTaskCronExpr, next) 126 if tt.expectDuration > 0 { 127 require.Equal(t, tt.expectDuration, next.Sub(now)) 128 } else { 129 require.Equal(t, tt.args.duration-time.Minute, next.Sub(now)) 130 } 131 } 132 }) 133 } 134 } 135 136 var newFilePath = func(tbl *table.Table, ts time.Time) string { 137 filename := tbl.PathBuilder.NewLogFilename(tbl.GetName(), "uuid", "node", ts, table.CsvExtension) 138 p := tbl.PathBuilder.Build(tbl.Account, table.MergeLogTypeLogs, ts, tbl.Database, tbl.GetName()) 139 filepath := path.Join(p, filename) 140 return filepath 141 } 142 143 func initLogsFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) error { 144 mux.Lock() 145 defer mux.Unlock() 146 147 buf := make([]byte, 0, 4096) 148 149 ts1 := ts 150 writer, _ := newETLWriter(ctx, fs, newFilePath(tbl, ts1), buf, nil, nil) 151 writer.WriteStrings(dummyFillTable("row1", 1, 1.0).ToStrings()) 152 writer.WriteStrings(dummyFillTable("row2", 2, 2.0).ToStrings()) 153 writer.FlushAndClose() 154 155 ts2 := ts.Add(time.Minute) 156 writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts2), buf, nil, nil) 157 writer.WriteStrings(dummyFillTable("row3", 1, 1.0).ToStrings()) 158 writer.WriteStrings(dummyFillTable("row4", 2, 2.0).ToStrings()) 159 writer.FlushAndClose() 160 161 ts3 := ts.Add(time.Hour) 162 writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts3), buf, nil, nil) 163 writer.WriteStrings(dummyFillTable("row5", 1, 1.0).ToStrings()) 164 writer.WriteStrings(dummyFillTable("row6", 2, 2.0).ToStrings()) 165 writer.FlushAndClose() 166 167 ts1New := ts.Add(time.Hour + time.Minute) 168 writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts1New), buf, nil, nil) 169 writer.WriteStrings(dummyFillTable("row1", 1, 11.0).ToStrings()) 170 writer.WriteStrings(dummyFillTable("row2", 2, 22.0).ToStrings()) 171 writer.FlushAndClose() 172 173 return nil 174 } 175 176 func initEmptyLogFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) ([]string, error) { 177 mux.Lock() 178 defer mux.Unlock() 179 180 files := []string{} 181 buf := make([]byte, 0, 4096) 182 183 ts1 := ts 184 filePath := newFilePath(tbl, ts1) 185 files = append(files, filePath) 186 writer, err := newETLWriter(ctx, fs, filePath, buf, nil, nil) 187 if err != nil { 188 return nil, err 189 } 190 _, err = writer.FlushAndClose() 191 if err != nil { 192 return nil, err 193 } 194 195 return files, nil 196 } 197 198 func initSingleLogsFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) error { 199 mux.Lock() 200 defer mux.Unlock() 201 202 var newFilePath = func(ts time.Time) string { 203 filename := tbl.PathBuilder.NewLogFilename(tbl.GetName(), "uuid", "node", ts, table.CsvExtension) 204 p := tbl.PathBuilder.Build(tbl.Account, table.MergeLogTypeLogs, ts, tbl.Database, tbl.GetName()) 205 filepath := path.Join(p, filename) 206 return filepath 207 } 208 209 buf := make([]byte, 0, 4096) 210 211 ts1 := ts 212 writer, _ := newETLWriter(ctx, fs, newFilePath(ts1), buf, nil, nil) 213 writer.WriteStrings(dummyFillTable("row1", 1, 1.0).ToStrings()) 214 writer.WriteStrings(dummyFillTable("row2", 2, 2.0).ToStrings()) 215 writer.FlushAndClose() 216 217 return nil 218 } 219 220 var mergeLock sync.Mutex 221 222 func TestNewMerge(t *testing.T) { 223 mergeLock.Lock() 224 defer mergeLock.Unlock() 225 fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, t.TempDir()) 226 require.Nil(t, err) 227 ts, _ := time.Parse("2006-01-02 15:04:05", "2021-01-01 00:00:00") 228 229 ctx := trace.Generate(context.Background()) 230 231 defaultOpts := []MergeOption{WithFileServiceName(defines.ETLFileServiceName), 232 WithFileService(fs), WithTable(dummyTable), 233 WithMaxFileSize(1), WithMinFilesMerge(1), WithMaxFileSize(16 * mpool.MB), WithMaxMergeJobs(16)} 234 235 type args struct { 236 ctx context.Context 237 opts []MergeOption 238 // extension 239 logsExt, mergedExt string 240 } 241 tests := []struct { 242 name string 243 args args 244 want *Merge 245 }{ 246 { 247 name: "csv", 248 args: args{ 249 ctx: ctx, 250 opts: defaultOpts, 251 logsExt: table.CsvExtension, 252 mergedExt: table.CsvExtension, 253 }, 254 want: nil, 255 }, 256 } 257 for _, tt := range tests { 258 t.Run(tt.name, func(t *testing.T) { 259 260 err := initLogsFile(tt.args.ctx, fs, dummyTable, ts) 261 require.Nil(t, err) 262 263 got, err := NewMerge(tt.args.ctx, tt.args.opts...) 264 require.Nil(t, err) 265 require.NotNil(t, got) 266 267 err = got.Main(tt.args.ctx, ts) 268 require.Nilf(t, err, "err: %v", err) 269 270 files := make([]string, 0, 1) 271 dir := []string{"/"} 272 for len(dir) > 0 { 273 entrys, _ := fs.List(tt.args.ctx, dir[0]) 274 for _, e := range entrys { 275 p := path.Join(dir[0], e.Name) 276 if e.IsDir { 277 dir = append(dir, p) 278 } else { 279 files = append(files, p) 280 } 281 } 282 dir = dir[1:] 283 } 284 require.Equal(t, 1, len(files)) 285 t.Logf("%v", files) 286 287 //r, err = newETLReader(tt.args.ctx, m.Table, m.FS, path.FilePath, path.FileSize, m.mp) 288 r, err := NewCSVReader(tt.args.ctx, fs, files[0]) 289 require.Nil(t, err) 290 lines := 0 291 for l, err := r.ReadLine(); l != nil && err == nil; l, err = r.ReadLine() { 292 lines++ 293 t.Logf("line %d: %s", lines, l) 294 } 295 require.Nil(t, err) 296 require.Equal(t, 6, lines) 297 298 }) 299 } 300 } 301 302 func TestNewMergeWithContextDone(t *testing.T) { 303 if simdcsv.SupportedCPU() { 304 t.Skip() 305 } 306 mergeLock.Lock() 307 defer mergeLock.Unlock() 308 fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, t.TempDir()) 309 require.Nil(t, err) 310 ts, _ := time.Parse("2006-01-02 15:04:05", "2021-01-01 00:00:00") 311 312 ctx := trace.Generate(context.Background()) 313 314 type args struct { 315 ctx context.Context 316 opts []MergeOption 317 } 318 tests := []struct { 319 name string 320 args args 321 want *Merge 322 }{ 323 { 324 name: "normal", 325 args: args{ 326 ctx: ctx, 327 opts: []MergeOption{WithFileServiceName(defines.ETLFileServiceName), 328 WithFileService(fs), WithTable(dummyTable), 329 WithMaxFileSize(1), WithMinFilesMerge(1), WithMaxFileSize(16 * mpool.MB), WithMaxMergeJobs(16)}, 330 }, 331 want: nil, 332 }, 333 } 334 for _, tt := range tests { 335 t.Run(tt.name, func(t *testing.T) { 336 ctx, cancel := context.WithCancel(tt.args.ctx) 337 338 files, err := initEmptyLogFile(ctx, fs, dummyTable, ts) 339 require.Nil(t, err) 340 341 got, err := NewMerge(ctx, tt.args.opts...) 342 require.Nil(t, err) 343 require.NotNil(t, got) 344 345 reader, err := newETLReader(got.ctx, dummyTable, got.FS, files[0], 0, nil) 346 require.Nil(t, err) 347 348 // trigger context.Done 349 cancel() 350 _, err = reader.ReadLine() 351 t.Logf("doMergeFiles meet err: %s", err) 352 require.Equal(t, err.Error(), "internal error: read files meet context Done") 353 }) 354 } 355 } 356 357 func TestNewMergeNOFiles(t *testing.T) { 358 if simdcsv.SupportedCPU() { 359 t.Skip() 360 } 361 mergeLock.Lock() 362 defer mergeLock.Unlock() 363 fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, t.TempDir()) 364 require.Nil(t, err) 365 ts, _ := time.Parse("2006-01-02 15:04:05", "2021-01-01 00:00:00") 366 367 ctx := trace.Generate(context.Background()) 368 ctx, cancel := context.WithCancel(ctx) 369 defer cancel() 370 371 type args struct { 372 ctx context.Context 373 opts []MergeOption 374 } 375 tests := []struct { 376 name string 377 args args 378 want *Merge 379 }{ 380 { 381 name: "normal", 382 args: args{ 383 ctx: ctx, 384 opts: []MergeOption{WithFileServiceName(defines.ETLFileServiceName), 385 WithFileService(fs), WithTable(dummyTable), 386 WithMaxFileSize(1), WithMinFilesMerge(1), WithMaxFileSize(16 * mpool.MB), WithMaxMergeJobs(16)}, 387 }, 388 want: nil, 389 }, 390 } 391 for _, tt := range tests { 392 t.Run(tt.name, func(t *testing.T) { 393 filePath := newFilePath(dummyTable, ts) 394 fm := &FileMeta{filePath, 0} 395 files := []*FileMeta{fm} 396 397 got, err := NewMerge(tt.args.ctx, tt.args.opts...) 398 require.Nil(t, err) 399 require.NotNil(t, got) 400 401 err = got.doMergeFiles(ctx, dummyTable.Table, files, 0) 402 require.Equal(t, true, strings.Contains(err.Error(), "is not found")) 403 404 }) 405 } 406 } 407 408 func TestMergeTaskExecutorFactory(t *testing.T) { 409 defer leaktest.AfterTest(t)() 410 t.Logf("tmpDir: %s/%s", t.TempDir(), t.Name()) 411 fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, path.Join(t.TempDir(), t.Name())) 412 require.Nil(t, err) 413 targetDate := "2021-01-01" 414 ts, err := time.Parse("2006-01-02 15:04:05", targetDate+" 00:00:00") 415 require.Nil(t, err) 416 417 type args struct { 418 ctx context.Context 419 opts []MergeOption 420 task task.Task 421 } 422 tests := []struct { 423 name string 424 args args 425 want func(ctx context.Context, task task.Task) error 426 }{ 427 { 428 name: "normal", 429 args: args{ 430 ctx: context.Background(), 431 opts: []MergeOption{WithFileService(fs), WithMinFilesMerge(1)}, 432 task: task.Task{ 433 Metadata: task.TaskMetadata{ 434 ID: "", 435 Executor: 0, 436 Context: []byte(strings.Join([]string{dummyTable.GetIdentify(), targetDate}, ParamSeparator)), 437 Options: task.TaskOptions{}, 438 XXX_NoUnkeyedLiteral: struct{}{}, 439 XXX_unrecognized: nil, 440 XXX_sizecache: 0, 441 }, 442 }, 443 }, 444 want: nil, 445 }, 446 } 447 for _, tt := range tests { 448 t.Run(tt.name, func(t *testing.T) { 449 450 err := initSingleLogsFile(tt.args.ctx, fs, dummyTable, ts) 451 require.Nil(t, err) 452 453 got := MergeTaskExecutorFactory(tt.args.opts...) 454 require.NotNil(t, got) 455 456 err = got(tt.args.ctx, tt.args.task) 457 require.Nilf(t, err, "err: %v", err) 458 459 files := make([]string, 0, 1) 460 dir := []string{"/"} 461 for len(dir) > 0 { 462 entrys, _ := fs.List(tt.args.ctx, dir[0]) 463 for _, e := range entrys { 464 p := path.Join(dir[0], e.Name) 465 if e.IsDir { 466 dir = append(dir, p) 467 } else { 468 files = append(files, p) 469 } 470 } 471 dir = dir[1:] 472 } 473 require.Equal(t, 1, len(files)) 474 t.Logf("%v", files) 475 }) 476 } 477 } 478 479 func TestCreateCronTask(t *testing.T) { 480 store := taskservice.NewMemTaskStorage() 481 s := taskservice.NewTaskService(runtime.DefaultRuntime(), store) 482 defer func() { 483 assert.NoError(t, s.Close()) 484 }() 485 ctx, cancel := context.WithTimeout(context.TODO(), time.Second*10) 486 defer cancel() 487 488 type args struct { 489 ctx context.Context 490 executorID task.TaskCode 491 taskService taskservice.TaskService 492 } 493 tests := []struct { 494 name string 495 args args 496 wantErr error 497 }{ 498 { 499 name: "name", 500 args: args{ 501 ctx: ctx, 502 executorID: 1, 503 taskService: s, 504 }, 505 wantErr: nil, 506 }, 507 } 508 for _, tt := range tests { 509 t.Run(tt.name, func(t *testing.T) { 510 got := CreateCronTask(tt.args.ctx, tt.args.executorID, tt.args.taskService) 511 require.Nil(t, got) 512 }) 513 } 514 } 515 516 func TestNewMergeService(t *testing.T) { 517 ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*5) 518 defer cancel() 519 fs, err := fileservice.NewLocalETLFS(defines.ETLFileServiceName, path.Join(t.TempDir(), t.Name())) 520 require.Nil(t, err) 521 522 type args struct { 523 ctx context.Context 524 opts []MergeOption 525 } 526 tests := []struct { 527 name string 528 args args 529 want *Merge 530 want1 bool 531 }{ 532 { 533 name: "normal", 534 args: args{ 535 ctx: ctx, 536 opts: []MergeOption{WithFileService(fs), WithMinFilesMerge(1), WithTable(dummyTable)}, 537 }, 538 want: nil, 539 want1: false, 540 }, 541 } 542 for _, tt := range tests { 543 t.Run(tt.name, func(t *testing.T) { 544 got, got1, err := NewMergeService(tt.args.ctx, tt.args.opts...) 545 require.Nil(t, err) 546 require.NotNil(t, got) 547 require.Equal(t, tt.want1, got1) 548 }) 549 } 550 }