github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/puller/sorter/sorter_test.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package sorter 15 16 import ( 17 "context" 18 "math" 19 _ "net/http/pprof" 20 "os" 21 "path/filepath" 22 "sync/atomic" 23 "testing" 24 "time" 25 26 "github.com/pingcap/check" 27 "github.com/pingcap/failpoint" 28 "github.com/pingcap/log" 29 "github.com/pingcap/ticdc/cdc/model" 30 "github.com/pingcap/ticdc/cdc/puller" 31 "github.com/pingcap/ticdc/pkg/config" 32 "github.com/pingcap/ticdc/pkg/util/testleak" 33 "go.uber.org/zap" 34 "go.uber.org/zap/zapcore" 35 "golang.org/x/sync/errgroup" 36 ) 37 38 const ( 39 numProducers = 16 40 ) 41 42 type sorterSuite struct{} 43 44 var _ = check.SerialSuites(&sorterSuite{}) 45 46 func Test(t *testing.T) { check.TestingT(t) } 47 48 func generateMockRawKV(ts uint64) *model.RawKVEntry { 49 return &model.RawKVEntry{ 50 OpType: model.OpTypePut, 51 Key: []byte{}, 52 Value: []byte{}, 53 OldValue: nil, 54 StartTs: ts - 5, 55 CRTs: ts, 56 RegionID: 0, 57 } 58 } 59 60 func (s *sorterSuite) TestSorterBasic(c *check.C) { 61 defer testleak.AfterTest(c)() 62 defer UnifiedSorterCleanUp() 63 64 conf := config.GetDefaultServerConfig() 65 conf.DataDir = c.MkDir() 66 sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir) 67 conf.Sorter = &config.SorterConfig{ 68 NumConcurrentWorker: 8, 69 ChunkSizeLimit: 1 * 1024 * 1024 * 1024, 70 MaxMemoryPressure: 60, 71 MaxMemoryConsumption: 16 * 1024 * 1024 * 1024, 72 NumWorkerPoolGoroutine: 4, 73 SortDir: sortDir, 74 } 75 config.StoreGlobalServerConfig(conf) 76 77 err := os.MkdirAll(conf.Sorter.SortDir, 0o755) 78 c.Assert(err, check.IsNil) 79 sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0") 80 c.Assert(err, check.IsNil) 81 82 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) 83 defer cancel() 84 err = testSorter(ctx, c, sorter, 10000) 85 c.Assert(err, check.ErrorMatches, ".*context cancel.*") 86 } 87 88 func (s *sorterSuite) TestSorterCancel(c *check.C) { 89 defer testleak.AfterTest(c)() 90 defer UnifiedSorterCleanUp() 91 92 conf := config.GetDefaultServerConfig() 93 conf.DataDir = c.MkDir() 94 sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir) 95 conf.Sorter = &config.SorterConfig{ 96 NumConcurrentWorker: 8, 97 ChunkSizeLimit: 1 * 1024 * 1024 * 1024, 98 MaxMemoryPressure: 60, 99 MaxMemoryConsumption: 0, 100 NumWorkerPoolGoroutine: 4, 101 SortDir: sortDir, 102 } 103 config.StoreGlobalServerConfig(conf) 104 105 err := os.MkdirAll(conf.Sorter.SortDir, 0o755) 106 c.Assert(err, check.IsNil) 107 sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0") 108 c.Assert(err, check.IsNil) 109 110 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 111 defer cancel() 112 113 finishedCh := make(chan struct{}) 114 go func() { 115 err := testSorter(ctx, c, sorter, 10000000) 116 c.Assert(err, check.ErrorMatches, ".*context deadline exceeded.*") 117 close(finishedCh) 118 }() 119 120 after := time.After(30 * time.Second) 121 select { 122 case <-after: 123 c.Fatal("TestSorterCancel timed out") 124 case <-finishedCh: 125 } 126 127 log.Info("Sorter successfully cancelled") 128 } 129 130 func testSorter(ctx context.Context, c *check.C, sorter puller.EventSorter, count int) error { 131 err := failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/sorterDebug", "return(true)") 132 if err != nil { 133 log.Panic("Could not enable failpoint", zap.Error(err)) 134 } 135 136 c.Assert(failpoint.Enable("github.com/pingcap/ticdc/pkg/util/InjectCheckDataDirSatisfied", ""), check.IsNil) 137 defer func() { 138 c.Assert(failpoint.Disable("github.com/pingcap/ticdc/pkg/util/InjectCheckDataDirSatisfied"), check.IsNil) 139 }() 140 141 ctx, cancel := context.WithCancel(ctx) 142 errg, ctx := errgroup.WithContext(ctx) 143 errg.Go(func() error { 144 return sorter.Run(ctx) 145 }) 146 errg.Go(func() error { 147 return RunWorkerPool(ctx) 148 }) 149 150 producerProgress := make([]uint64, numProducers) 151 152 // launch the producers 153 for i := 0; i < numProducers; i++ { 154 finalI := i 155 errg.Go(func() error { 156 for j := 1; j <= count; j++ { 157 select { 158 case <-ctx.Done(): 159 return ctx.Err() 160 default: 161 } 162 163 sorter.AddEntry(ctx, model.NewPolymorphicEvent(generateMockRawKV(uint64(j)<<5))) 164 if j%10000 == 0 { 165 atomic.StoreUint64(&producerProgress[finalI], uint64(j)<<5) 166 } 167 } 168 sorter.AddEntry(ctx, model.NewPolymorphicEvent(generateMockRawKV(uint64(count+1)<<5))) 169 atomic.StoreUint64(&producerProgress[finalI], uint64(count+1)<<5) 170 return nil 171 }) 172 } 173 174 // launch the resolver 175 errg.Go(func() error { 176 ticker := time.NewTicker(1 * time.Second) 177 defer ticker.Stop() 178 for { 179 select { 180 case <-ctx.Done(): 181 return ctx.Err() 182 case <-ticker.C: 183 resolvedTs := uint64(math.MaxUint64) 184 for i := range producerProgress { 185 ts := atomic.LoadUint64(&producerProgress[i]) 186 if resolvedTs > ts { 187 resolvedTs = ts 188 } 189 } 190 sorter.AddEntry(ctx, model.NewResolvedPolymorphicEvent(0, resolvedTs)) 191 if resolvedTs == uint64(count)<<5 { 192 return nil 193 } 194 } 195 } 196 }) 197 198 // launch the consumer 199 errg.Go(func() error { 200 counter := 0 201 lastTs := uint64(0) 202 ticker := time.NewTicker(1 * time.Second) 203 defer ticker.Stop() 204 for { 205 select { 206 case <-ctx.Done(): 207 return ctx.Err() 208 case event := <-sorter.Output(): 209 if event.RawKV.OpType != model.OpTypeResolved { 210 if event.CRTs < lastTs { 211 panic("regressed") 212 } 213 lastTs = event.CRTs 214 counter += 1 215 if counter%10000 == 0 { 216 log.Debug("Messages received", zap.Int("counter", counter)) 217 } 218 if counter >= numProducers*count { 219 log.Debug("Unified Sorter test successful") 220 cancel() 221 } 222 } 223 case <-ticker.C: 224 log.Debug("Consumer is alive") 225 } 226 } 227 }) 228 229 return errg.Wait() 230 } 231 232 func (s *sorterSuite) TestSortDirConfigLocal(c *check.C) { 233 defer testleak.AfterTest(c)() 234 defer UnifiedSorterCleanUp() 235 236 poolMu.Lock() 237 // Clean up the back-end pool if one has been created 238 pool = nil 239 poolMu.Unlock() 240 241 baseDir := c.MkDir() 242 dir := filepath.Join(baseDir, "sorter_local") 243 err := os.MkdirAll(dir, 0o755) 244 c.Assert(err, check.IsNil) 245 // We expect the local setting to override the changefeed setting 246 config.GetGlobalServerConfig().Sorter.SortDir = dir 247 248 _, err = NewUnifiedSorter(filepath.Join(baseDir, "sorter"), /* the changefeed setting */ 249 "test-cf", 250 "test", 251 0, 252 "0.0.0.0:0") 253 c.Assert(err, check.IsNil) 254 255 poolMu.Lock() 256 defer poolMu.Unlock() 257 258 c.Assert(pool, check.NotNil) 259 c.Assert(pool.dir, check.Equals, dir) 260 } 261 262 func (s *sorterSuite) TestSortDirConfigChangeFeed(c *check.C) { 263 defer testleak.AfterTest(c)() 264 defer UnifiedSorterCleanUp() 265 266 poolMu.Lock() 267 // Clean up the back-end pool if one has been created 268 pool = nil 269 poolMu.Unlock() 270 271 dir := c.MkDir() 272 // We expect the changefeed setting to take effect 273 config.GetGlobalServerConfig().Sorter.SortDir = "" 274 275 _, err := NewUnifiedSorter(dir, /* the changefeed setting */ 276 "test-cf", 277 "test", 278 0, 279 "0.0.0.0:0") 280 c.Assert(err, check.IsNil) 281 282 poolMu.Lock() 283 defer poolMu.Unlock() 284 285 c.Assert(pool, check.NotNil) 286 c.Assert(pool.dir, check.Equals, dir) 287 } 288 289 // TestSorterCancelRestart tests the situation where the Unified Sorter is repeatedly canceled and 290 // restarted. There should not be any problem, especially file corruptions. 291 func (s *sorterSuite) TestSorterCancelRestart(c *check.C) { 292 defer testleak.AfterTest(c)() 293 defer UnifiedSorterCleanUp() 294 295 conf := config.GetDefaultServerConfig() 296 conf.DataDir = c.MkDir() 297 sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir) 298 conf.Sorter = &config.SorterConfig{ 299 NumConcurrentWorker: 8, 300 ChunkSizeLimit: 1 * 1024 * 1024 * 1024, 301 MaxMemoryPressure: 0, // disable memory sort 302 MaxMemoryConsumption: 0, 303 NumWorkerPoolGoroutine: 4, 304 SortDir: sortDir, 305 } 306 config.StoreGlobalServerConfig(conf) 307 308 err := os.MkdirAll(conf.Sorter.SortDir, 0o755) 309 c.Assert(err, check.IsNil) 310 311 // enable the failpoint to simulate delays 312 err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/asyncFlushStartDelay", "sleep(100)") 313 c.Assert(err, check.IsNil) 314 defer func() { 315 _ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/asyncFlushStartDelay") 316 }() 317 318 // enable the failpoint to simulate delays 319 err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/asyncFlushInProcessDelay", "1%sleep(1)") 320 c.Assert(err, check.IsNil) 321 defer func() { 322 _ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/asyncFlushInProcessDelay") 323 }() 324 325 for i := 0; i < 5; i++ { 326 sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0") 327 c.Assert(err, check.IsNil) 328 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 329 err = testSorter(ctx, c, sorter, 100000000) 330 c.Assert(err, check.ErrorMatches, ".*context deadline exceeded.*") 331 cancel() 332 } 333 } 334 335 func (s *sorterSuite) TestSorterIOError(c *check.C) { 336 defer testleak.AfterTest(c)() 337 defer UnifiedSorterCleanUp() 338 339 log.SetLevel(zapcore.DebugLevel) 340 defer log.SetLevel(zapcore.InfoLevel) 341 342 conf := config.GetDefaultServerConfig() 343 conf.DataDir = c.MkDir() 344 sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir) 345 conf.Sorter = &config.SorterConfig{ 346 NumConcurrentWorker: 8, 347 ChunkSizeLimit: 1 * 1024 * 1024 * 1024, 348 MaxMemoryPressure: 60, 349 MaxMemoryConsumption: 0, 350 NumWorkerPoolGoroutine: 4, 351 SortDir: sortDir, 352 } 353 config.StoreGlobalServerConfig(conf) 354 355 err := os.MkdirAll(conf.Sorter.SortDir, 0o755) 356 c.Assert(err, check.IsNil) 357 sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0") 358 c.Assert(err, check.IsNil) 359 360 ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) 361 defer cancel() 362 363 // enable the failpoint to simulate backEnd allocation error (usually would happen when creating a file) 364 err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc", "return(true)") 365 c.Assert(err, check.IsNil) 366 defer func() { 367 _ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc") 368 }() 369 370 finishedCh := make(chan struct{}) 371 go func() { 372 err := testSorter(ctx, c, sorter, 10000) 373 c.Assert(err, check.ErrorMatches, ".*injected alloc error.*") 374 close(finishedCh) 375 }() 376 377 after := time.After(60 * time.Second) 378 select { 379 case <-after: 380 c.Fatal("TestSorterIOError timed out") 381 case <-finishedCh: 382 } 383 384 UnifiedSorterCleanUp() 385 _ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc") 386 // enable the failpoint to simulate backEnd write error (usually would happen when writing to a file) 387 err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndWrite", "return(true)") 388 c.Assert(err, check.IsNil) 389 defer func() { 390 _ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndWrite") 391 }() 392 393 sorter, err = NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0") 394 c.Assert(err, check.IsNil) 395 396 finishedCh = make(chan struct{}) 397 go func() { 398 err := testSorter(ctx, c, sorter, 10000) 399 c.Assert(err, check.ErrorMatches, ".*injected write error.*") 400 close(finishedCh) 401 }() 402 403 after = time.After(60 * time.Second) 404 select { 405 case <-after: 406 c.Fatal("TestSorterIOError timed out") 407 case <-finishedCh: 408 } 409 } 410 411 func (s *sorterSuite) TestSorterErrorReportCorrect(c *check.C) { 412 defer testleak.AfterTest(c)() 413 defer UnifiedSorterCleanUp() 414 415 log.SetLevel(zapcore.DebugLevel) 416 defer log.SetLevel(zapcore.InfoLevel) 417 418 conf := config.GetDefaultServerConfig() 419 conf.DataDir = c.MkDir() 420 sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir) 421 conf.Sorter = &config.SorterConfig{ 422 NumConcurrentWorker: 8, 423 ChunkSizeLimit: 1 * 1024 * 1024 * 1024, 424 MaxMemoryPressure: 60, 425 MaxMemoryConsumption: 0, 426 NumWorkerPoolGoroutine: 4, 427 SortDir: sortDir, 428 } 429 config.StoreGlobalServerConfig(conf) 430 431 err := os.MkdirAll(conf.Sorter.SortDir, 0o755) 432 c.Assert(err, check.IsNil) 433 sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0") 434 c.Assert(err, check.IsNil) 435 436 ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) 437 defer cancel() 438 439 // enable the failpoint to simulate backEnd allocation error (usually would happen when creating a file) 440 err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectHeapSorterExitDelay", "sleep(2000)") 441 c.Assert(err, check.IsNil) 442 defer func() { 443 _ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectHeapSorterExitDelay") 444 }() 445 446 err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc", "return(true)") 447 c.Assert(err, check.IsNil) 448 defer func() { 449 _ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc") 450 }() 451 452 finishedCh := make(chan struct{}) 453 go func() { 454 err := testSorter(ctx, c, sorter, 10000) 455 c.Assert(err, check.ErrorMatches, ".*injected alloc error.*") 456 close(finishedCh) 457 }() 458 459 after := time.After(60 * time.Second) 460 select { 461 case <-after: 462 c.Fatal("TestSorterIOError timed out") 463 case <-finishedCh: 464 } 465 } 466 467 func (s *sorterSuite) TestSortClosedAddEntry(c *check.C) { 468 defer testleak.AfterTest(c)() 469 defer UnifiedSorterCleanUp() 470 471 sorter, err := NewUnifiedSorter(c.MkDir(), 472 "test-cf", 473 "test", 474 0, 475 "0.0.0.0:0") 476 c.Assert(err, check.IsNil) 477 478 ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) 479 defer cancel() 480 err = sorter.Run(ctx) 481 c.Assert(err, check.ErrorMatches, ".*deadline.*") 482 483 ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second*10) 484 defer cancel1() 485 for i := 0; i < 10000; i++ { 486 sorter.AddEntry(ctx1, model.NewPolymorphicEvent(generateMockRawKV(uint64(i)))) 487 } 488 489 select { 490 case <-ctx1.Done(): 491 c.Fatal("TestSortClosedAddEntry timed out") 492 default: 493 } 494 cancel1() 495 } 496 497 func (s *sorterSuite) TestUnifiedSorterFileLockConflict(c *check.C) { 498 defer testleak.AfterTest(c)() 499 defer UnifiedSorterCleanUp() 500 501 dir := c.MkDir() 502 captureAddr := "0.0.0.0:0" 503 _, err := newBackEndPool(dir, captureAddr) 504 c.Assert(err, check.IsNil) 505 506 // GlobalServerConfig overrides dir parameter in NewUnifiedSorter. 507 config.GetGlobalServerConfig().Sorter.SortDir = dir 508 _, err = NewUnifiedSorter(dir, "test-cf", "test", 0, captureAddr) 509 c.Assert(err, check.ErrorMatches, ".*file lock conflict.*") 510 }