github.com/matrixorigin/matrixone@v1.2.0/pkg/util/export/merge_test.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package export 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "path" 22 "reflect" 23 "strings" 24 "sync" 25 "testing" 26 "time" 27 28 "github.com/matrixorigin/matrixone/pkg/common/moerr" 29 "github.com/matrixorigin/matrixone/pkg/common/mpool" 30 "github.com/matrixorigin/matrixone/pkg/common/runtime" 31 "github.com/matrixorigin/matrixone/pkg/config" 32 "github.com/matrixorigin/matrixone/pkg/fileservice" 33 "github.com/matrixorigin/matrixone/pkg/logutil" 34 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 35 "github.com/matrixorigin/matrixone/pkg/pb/task" 36 "github.com/matrixorigin/matrixone/pkg/taskservice" 37 "github.com/matrixorigin/matrixone/pkg/testutil" 38 "github.com/matrixorigin/matrixone/pkg/util/export/etl" 39 "github.com/matrixorigin/matrixone/pkg/util/export/table" 40 "github.com/matrixorigin/matrixone/pkg/util/trace" 41 42 "github.com/lni/goutils/leaktest" 43 "github.com/matrixorigin/simdcsv" 44 "github.com/robfig/cron/v3" 45 "github.com/stretchr/testify/assert" 46 "github.com/stretchr/testify/require" 47 ) 48 49 func init() { 50 time.Local = time.FixedZone("CST", 0) // set time-zone +0000 51 table.RegisterTableDefine(dummyTable) 52 runtime.SetupProcessLevelRuntime(runtime.NewRuntime(metadata.ServiceType_CN, "test", logutil.GetGlobalLogger())) 53 } 54 55 var mux sync.Mutex 56 57 var dummyStrColumn = table.Column{Name: "str", ColType: table.TVarchar, Scale: 32, Default: "", Comment: "str column"} 58 var dummyInt64Column = table.Column{Name: "int64", ColType: table.TInt64, Default: "0", Comment: "int64 column"} 59 var dummyFloat64Column = table.Column{Name: "float64", ColType: table.TFloat64, Default: "0.0", Comment: "float64 column"} 60 61 var dummyTable = &table.Table{ 62 Account: "test", 63 Database: "db_dummy", 64 Table: "tbl_dummy", 65 Columns: []table.Column{dummyStrColumn, dummyInt64Column, dummyFloat64Column}, 66 PrimaryKeyColumn: []table.Column{dummyStrColumn, dummyInt64Column}, 67 Engine: table.ExternalTableEngine, 68 Comment: "dummy table", 69 PathBuilder: table.NewAccountDatePathBuilder(), 70 TableOptions: nil, 71 } 72 73 func dummyFillTable(str string, i int64, f float64) *table.Row { 74 row := dummyTable.GetRow(context.TODO()) 75 row.SetColumnVal(dummyStrColumn, table.StringField(str)) 76 row.SetColumnVal(dummyInt64Column, table.Int64Field(i)) 77 row.SetColumnVal(dummyFloat64Column, table.Float64Field(f)) 78 return row 79 } 80 81 func TestInitCronExpr(t *testing.T) { 82 type args struct { 83 duration time.Duration 84 } 85 tests := []struct { 86 name string 87 args args 88 wantErr bool 89 wantExpr string 90 expectDuration time.Duration 91 }{ 92 {name: "1h", args: args{duration: 1 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery1Hour}, 93 {name: "2h", args: args{duration: 2 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery2Hour}, 94 {name: "4h", args: args{duration: 4 * time.Hour}, wantErr: false, wantExpr: MergeTaskCronExprEvery4Hour}, 95 {name: "3h", args: args{duration: 3 * time.Hour}, wantErr: false, wantExpr: "0 0 3,6,9,12,15,18,21 * * *"}, 96 {name: "5h", args: args{duration: 5 * time.Hour}, wantErr: false, wantExpr: "0 0 5,10,15,20 * * *"}, 97 {name: "5min", args: args{duration: 5 * time.Minute}, wantErr: false, wantExpr: MergeTaskCronExprEvery05Min}, 98 {name: "15min", args: args{duration: 15 * time.Minute}, wantErr: false, wantExpr: MergeTaskCronExprEvery15Min}, 99 {name: "7min", args: args{duration: 7 * time.Minute}, wantErr: false, wantExpr: "@every 10m", expectDuration: 10 * time.Minute}, 100 {name: "15s", args: args{duration: 15 * time.Second}, wantErr: false, wantExpr: "@every 15s", expectDuration: 15 * time.Second}, 101 {name: "2min", args: args{duration: 2 * time.Minute}, wantErr: false, wantExpr: "@every 120s", expectDuration: 2 * time.Minute}, 102 {name: "13h", args: args{duration: 13 * time.Hour}, wantErr: true, wantExpr: ""}, 103 } 104 105 ctx := context.Background() 106 parser := cron.NewParser( 107 cron.Second | 108 cron.Minute | 109 cron.Hour | 110 cron.Dom | 111 cron.Month | 112 cron.Dow | 113 cron.Descriptor) 114 for _, tt := range tests { 115 t.Run(tt.name, func(t *testing.T) { 116 err := InitCronExpr(ctx, tt.args.duration) 117 if tt.wantErr { 118 var e *moerr.Error 119 require.True(t, errors.As(err, &e)) 120 require.True(t, moerr.IsMoErrCode(e, moerr.ErrNotSupported)) 121 } else { 122 require.Equal(t, tt.wantExpr, MergeTaskCronExpr) 123 124 sche, err := parser.Parse(MergeTaskCronExpr) 125 require.Nil(t, err) 126 127 now := time.Unix(60, 0) 128 next := sche.Next(time.UnixMilli(now.UnixMilli())) 129 t.Logf("duration: %v, expr: %s, next: %v", tt.args.duration, MergeTaskCronExpr, next) 130 if tt.expectDuration > 0 { 131 require.Equal(t, tt.expectDuration, next.Sub(now)) 132 } else { 133 require.Equal(t, tt.args.duration-time.Minute, next.Sub(now)) 134 } 135 } 136 }) 137 } 138 } 139 140 var newFilePath = func(tbl *table.Table, ts time.Time) string { 141 filename := tbl.PathBuilder.NewLogFilename(tbl.GetName(), "uuid", "node", ts, table.CsvExtension) 142 p := tbl.PathBuilder.Build(tbl.Account, table.MergeLogTypeLogs, ts, tbl.Database, tbl.GetName()) 143 filepath := path.Join(p, filename) 144 return filepath 145 } 146 147 // 148 //func initLogsFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) error { 149 // mux.Lock() 150 // defer mux.Unlock() 151 // 152 // buf := make([]byte, 0, 4096) 153 // 154 // ts1 := ts 155 // writer, _ := newETLWriter(ctx, fs, newFilePath(tbl, ts1), buf, nil, nil) 156 // writer.WriteStrings(dummyFillTable("row1", 1, 1.0).ToStrings()) 157 // writer.WriteStrings(dummyFillTable("row2", 2, 2.0).ToStrings()) 158 // writer.FlushAndClose() 159 // 160 // ts2 := ts.Add(time.Minute) 161 // writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts2), buf, nil, nil) 162 // writer.WriteStrings(dummyFillTable("row3", 1, 1.0).ToStrings()) 163 // writer.WriteStrings(dummyFillTable("row4", 2, 2.0).ToStrings()) 164 // writer.FlushAndClose() 165 // 166 // ts3 := ts.Add(time.Hour) 167 // writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts3), buf, nil, nil) 168 // writer.WriteStrings(dummyFillTable("row5", 1, 1.0).ToStrings()) 169 // writer.WriteStrings(dummyFillTable("row6", 2, 2.0).ToStrings()) 170 // writer.FlushAndClose() 171 // 172 // ts1New := ts.Add(time.Hour + time.Minute) 173 // writer, _ = newETLWriter(ctx, fs, newFilePath(tbl, ts1New), buf, nil, nil) 174 // writer.WriteStrings(dummyFillTable("row1", 1, 11.0).ToStrings()) 175 // writer.WriteStrings(dummyFillTable("row2", 2, 22.0).ToStrings()) 176 // writer.FlushAndClose() 177 // 178 // return nil 179 //} 180 181 func newETLWriter(ctx context.Context, fs fileservice.FileService, filePath string, buf []byte, tbl *table.Table, mp *mpool.MPool) (ETLWriter, error) { 182 183 if strings.LastIndex(filePath, table.TaeExtension) > 0 { 184 writer := etl.NewTAEWriter(ctx, tbl, mp, filePath, fs) 185 return writer, nil 186 } else { 187 // CSV 188 fsWriter := etl.NewFSWriter(ctx, fs, etl.WithFilePath(filePath)) 189 return etl.NewCSVWriter(ctx, fsWriter), nil 190 } 191 } 192 193 func initEmptyLogFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time) ([]string, error) { 194 mux.Lock() 195 defer mux.Unlock() 196 197 files := []string{} 198 buf := make([]byte, 0, 4096) 199 200 ts1 := ts 201 filePath := newFilePath(tbl, ts1) 202 files = append(files, filePath) 203 writer, err := newETLWriter(ctx, fs, filePath, buf, tbl, nil) 204 if err != nil { 205 return nil, err 206 } 207 _, err = writer.FlushAndClose() 208 if err != nil { 209 var e *moerr.Error 210 if !errors.As(err, &e) || e.ErrorCode() != moerr.ErrEmptyRange { 211 return nil, err 212 } 213 } 214 215 return files, nil 216 } 217 218 func getdummyMpool() *mpool.MPool { 219 mp, err := mpool.NewMPool("testETL", 0, mpool.NoFixed) 220 if err != nil { 221 panic(err) 222 } 223 return mp 224 } 225 226 func initSingleLogsFile(ctx context.Context, fs fileservice.FileService, tbl *table.Table, ts time.Time, ext string) (string, error) { 227 mux.Lock() 228 defer mux.Unlock() 229 230 var newFilePath = func(ts time.Time) string { 231 filename := tbl.PathBuilder.NewLogFilename(tbl.GetName(), "uuid", "node", ts, ext) 232 p := tbl.PathBuilder.Build(tbl.Account, table.MergeLogTypeLogs, ts, tbl.Database, tbl.GetName()) 233 filepath := path.Join(p, filename) 234 return filepath 235 } 236 237 buf := make([]byte, 0, 4096) 238 239 ts1 := ts 240 path := newFilePath(ts1) 241 writer, _ := newETLWriter(ctx, fs, path, buf, tbl, getdummyMpool()) 242 writer.WriteStrings(dummyFillTable("row1", 1, 1.0).ToStrings()) 243 writer.WriteStrings(dummyFillTable("row2", 2, 2.0).ToStrings()) 244 writer.FlushAndClose() 245 246 return path, nil 247 } 248 249 var mergeLock sync.Mutex 250 251 func TestNewMergeNOFiles(t *testing.T) { 252 const newSqlWriteLogic = true 253 if simdcsv.SupportedCPU() || newSqlWriteLogic { 254 t.Skip() 255 } 256 mergeLock.Lock() 257 defer mergeLock.Unlock() 258 fs := testutil.NewFS() 259 ts, _ := time.Parse("2006-01-02 15:04:05", "2021-01-01 00:00:00") 260 dummyFilePath := newFilePath(dummyTable, ts) 261 262 ctx := trace.Generate(context.Background()) 263 ctx, cancel := context.WithCancel(ctx) 264 defer cancel() 265 266 _, err := initEmptyLogFile(ctx, fs, dummyTable, ts) 267 require.Nil(t, err) 268 269 type args struct { 270 ctx context.Context 271 opts []MergeOption 272 // files 273 files []*FileMeta 274 } 275 tests := []struct { 276 name string 277 args args 278 // wantMsg 279 wantMsg string 280 }{ 281 { 282 name: "normal", 283 args: args{ 284 ctx: ctx, 285 opts: []MergeOption{ 286 WithFileService(fs), 287 WithTable(dummyTable), 288 WithMaxFileSize(1), 289 WithMaxFileSize(16 * mpool.MB), 290 WithMaxMergeJobs(16), 291 }, 292 files: []*FileMeta{{dummyFilePath, 0}}, 293 }, 294 wantMsg: "is not found", 295 }, 296 } 297 for _, tt := range tests { 298 t.Run(tt.name, func(t *testing.T) { 299 300 got, err := NewMerge(tt.args.ctx, tt.args.opts...) 301 require.Nil(t, err) 302 require.NotNil(t, got) 303 304 err = got.doMergeFiles(ctx, tt.args.files) 305 require.Equal(t, true, strings.Contains(err.Error(), tt.wantMsg)) 306 307 }) 308 } 309 } 310 311 func TestMergeTaskExecutorFactory(t *testing.T) { 312 defer leaktest.AfterTest(t)() 313 t.Logf("tmpDir: %s/%s", t.TempDir(), t.Name()) 314 fs := testutil.NewSharedFS() 315 targetDate := "2021-01-01" 316 ts, err := time.Parse("2006-01-02 15:04:05", targetDate+" 00:00:00") 317 require.Nil(t, err) 318 319 ctx := trace.Generate(context.TODO()) 320 321 type args struct { 322 ctx context.Context 323 opts []MergeOption 324 task *task.AsyncTask 325 } 326 tests := []struct { 327 name string 328 args args 329 want func(ctx context.Context, task task.Task) error 330 }{ 331 { 332 name: "normal", 333 args: args{ 334 ctx: ctx, 335 opts: []MergeOption{WithFileService(fs)}, 336 task: &task.AsyncTask{ 337 Metadata: task.TaskMetadata{ 338 ID: "", 339 Executor: 0, 340 Context: []byte(strings.Join([]string{dummyTable.GetIdentify(), targetDate}, ParamSeparator)), 341 Options: task.TaskOptions{}, 342 XXX_NoUnkeyedLiteral: struct{}{}, 343 XXX_unrecognized: nil, 344 XXX_sizecache: 0, 345 }, 346 }, 347 }, 348 want: nil, 349 }, 350 } 351 for _, tt := range tests { 352 t.Run(tt.name, func(t *testing.T) { 353 354 _, err := initSingleLogsFile(tt.args.ctx, fs, dummyTable, ts, table.CsvExtension) 355 require.Nil(t, err) 356 357 got := MergeTaskExecutorFactory(tt.args.opts...) 358 require.NotNil(t, got) 359 360 err = got(tt.args.ctx, tt.args.task) 361 require.Nilf(t, err, "err: %v", err) 362 363 files := make([]string, 0, 1) 364 dir := []string{"/"} 365 for len(dir) > 0 { 366 entrys, _ := fs.List(tt.args.ctx, dir[0]) 367 for _, e := range entrys { 368 p := path.Join(dir[0], e.Name) 369 if e.IsDir { 370 dir = append(dir, p) 371 } else { 372 files = append(files, p) 373 } 374 } 375 dir = dir[1:] 376 } 377 require.Equal(t, 1, len(files)) 378 t.Logf("%v", files) 379 }) 380 } 381 } 382 383 func TestCreateCronTask(t *testing.T) { 384 store := taskservice.NewMemTaskStorage() 385 s := taskservice.NewTaskService(runtime.DefaultRuntime(), store) 386 defer func() { 387 assert.NoError(t, s.Close()) 388 }() 389 ctx, cancel := context.WithTimeout(context.TODO(), time.Second*10) 390 defer cancel() 391 392 type args struct { 393 ctx context.Context 394 executorID task.TaskCode 395 taskService taskservice.TaskService 396 } 397 tests := []struct { 398 name string 399 args args 400 wantErr error 401 }{ 402 { 403 name: "name", 404 args: args{ 405 ctx: ctx, 406 executorID: 1, 407 taskService: s, 408 }, 409 wantErr: nil, 410 }, 411 } 412 for _, tt := range tests { 413 t.Run(tt.name, func(t *testing.T) { 414 got := CreateCronTask(tt.args.ctx, tt.args.executorID, tt.args.taskService) 415 require.Nil(t, got) 416 }) 417 } 418 } 419 420 func TestNewMergeService(t *testing.T) { 421 ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*5) 422 defer cancel() 423 fs := testutil.NewFS() 424 425 type args struct { 426 ctx context.Context 427 opts []MergeOption 428 } 429 tests := []struct { 430 name string 431 args args 432 want *Merge 433 want1 bool 434 }{ 435 { 436 name: "normal", 437 args: args{ 438 ctx: ctx, 439 opts: []MergeOption{WithFileService(fs), WithTable(dummyTable)}, 440 }, 441 want: nil, 442 want1: false, 443 }, 444 } 445 for _, tt := range tests { 446 t.Run(tt.name, func(t *testing.T) { 447 got, got1, err := NewMergeService(tt.args.ctx, tt.args.opts...) 448 require.Nil(t, err) 449 require.NotNil(t, got) 450 require.Equal(t, tt.want1, got1) 451 }) 452 } 453 } 454 455 func Test_newETLReader(t *testing.T) { 456 ctx := trace.Generate(context.TODO()) 457 fs := testutil.NewETLFS() 458 mp := getdummyMpool() 459 require.NotNil(t, mp) 460 461 type args struct { 462 ctx context.Context 463 tbl *table.Table 464 fs fileservice.FileService 465 ext string 466 size int64 467 mp *mpool.MPool 468 } 469 tests := []struct { 470 name string 471 args args 472 want ETLReader 473 }{ 474 { 475 name: "csv", 476 args: args{ 477 ctx: ctx, 478 tbl: dummyTable, 479 fs: fs, 480 ext: table.CsvExtension, 481 size: 0, 482 mp: mp, 483 }, 484 want: &ContentReader{}, 485 }, 486 { 487 name: "tae", 488 args: args{ 489 ctx: ctx, 490 tbl: dummyTable, 491 fs: fs, 492 ext: table.TaeExtension, 493 size: 0, 494 mp: mp, 495 }, 496 want: &etl.TAEReader{}, 497 }, 498 } 499 for _, tt := range tests { 500 t.Run(tt.name, func(t *testing.T) { 501 path, err := initSingleLogsFile(tt.args.ctx, tt.args.fs, tt.args.tbl, time.Now(), tt.args.ext) 502 assert.Nil(t, err) 503 got, err := newETLReader(tt.args.ctx, tt.args.tbl, tt.args.fs, path, tt.args.size, tt.args.mp) 504 assert.Nil(t, err) 505 assert.Equal(t, reflect.TypeOf(tt.want), reflect.TypeOf(got)) 506 defer got.Close() 507 }) 508 } 509 } 510 511 func TestInitMerge(t *testing.T) { 512 type args struct { 513 ctx context.Context 514 SV *config.ObservabilityParameters 515 } 516 tests := []struct { 517 name string 518 args args 519 wantErr assert.ErrorAssertionFunc 520 }{ 521 { 522 name: "normal", 523 args: args{ 524 ctx: context.TODO(), 525 SV: config.NewObservabilityParameters(), 526 }, 527 wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { 528 if err != nil { 529 t.Errorf("%v", i) 530 return false 531 } 532 return true 533 }, 534 }, 535 } 536 for _, tt := range tests { 537 t.Run(tt.name, func(t *testing.T) { 538 tt.wantErr(t, InitMerge(tt.args.ctx, tt.args.SV), fmt.Sprintf("InitMerge(%v, %v)", tt.args.ctx, tt.args.SV)) 539 }) 540 } 541 }