github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/p2p/server_client_integration_test.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package p2p 15 16 import ( 17 "context" 18 "fmt" 19 "net" 20 "sync" 21 "sync/atomic" 22 "testing" 23 "time" 24 25 "github.com/phayes/freeport" 26 "github.com/pingcap/failpoint" 27 "github.com/pingcap/log" 28 cerror "github.com/pingcap/tiflow/pkg/errors" 29 "github.com/pingcap/tiflow/pkg/security" 30 "github.com/pingcap/tiflow/proto/p2p" 31 "github.com/stretchr/testify/require" 32 "go.uber.org/zap" 33 "google.golang.org/grpc" 34 ) 35 36 // read only 37 var clientConfig4Testing = &MessageClientConfig{ 38 SendChannelSize: 1024, 39 BatchSendInterval: time.Millisecond * 1, // to accelerate testing 40 MaxBatchCount: 128, 41 MaxBatchBytes: 8192, 42 RetryRateLimitPerSecond: 10.0, // using 10.0 instead of 1.0 to accelerate testing 43 DialTimeout: time.Second * 3, 44 MaxRecvMsgSize: 4 * 1024 * 1024, // 4MB 45 } 46 47 type serverConfigOpt = func(config *MessageServerConfig) 48 49 //nolint:unparam 50 func newServerForIntegrationTesting(t *testing.T, serverID string, configOpts ...serverConfigOpt) (server *MessageServer, addr string, cancel func()) { 51 port := freeport.GetPort() 52 addr = fmt.Sprintf("127.0.0.1:%d", port) 53 lis, err := net.Listen("tcp", addr) 54 require.NoError(t, err) 55 56 var opts []grpc.ServerOption 57 grpcServer := grpc.NewServer(opts...) 58 59 serverConfig := *defaultServerConfig4Testing 60 for _, opt := range configOpts { 61 opt(&serverConfig) 62 } 63 64 server = NewMessageServer(serverID, &serverConfig) 65 p2p.RegisterCDCPeerToPeerServer(grpcServer, server) 66 67 var wg sync.WaitGroup 68 wg.Add(1) 69 go func() { 70 defer wg.Done() 71 _ = grpcServer.Serve(lis) 72 }() 73 74 cancel = func() { 75 grpcServer.Stop() 76 wg.Wait() 77 } 78 return 79 } 80 81 func runP2PIntegrationTest( 82 ctx context.Context, 83 t *testing.T, 84 size int, 85 numTopics int, 86 clientConcurrency int, 87 ) { 88 ctx, cancel := context.WithCancel(ctx) 89 defer cancel() 90 91 server, addr, cancelServer := newServerForIntegrationTesting(t, "test-server-1", 92 func(config *MessageServerConfig) { 93 config.AckInterval = time.Millisecond * 1 94 }) 95 defer cancelServer() 96 97 var wg sync.WaitGroup 98 wg.Add(1) 99 go func() { 100 defer wg.Done() 101 for { 102 err := server.Run(ctx, nil) 103 if cerror.ErrPeerMessageInjectedServerRestart.Equal(err) { 104 log.Warn("server restarted") 105 continue 106 } 107 require.Regexp(t, ".*context canceled.*", err.Error()) 108 break 109 } 110 }() 111 112 for j := 0; j < numTopics; j++ { 113 topicName := fmt.Sprintf("test-topic-%d", j) 114 var lastIndex int64 115 errCh := mustAddHandler(ctx, t, server, topicName, &testTopicContent{}, func(senderID string, i interface{}) error { 116 require.Equal(t, "test-client-1", senderID) 117 require.IsType(t, &testTopicContent{}, i) 118 content := i.(*testTopicContent) 119 if clientConcurrency == 1 { 120 require.Equal(t, content.Index-1, lastIndex) 121 lastIndex = content.Index 122 } 123 return nil 124 }) 125 126 wg.Add(1) 127 go func() { 128 defer wg.Done() 129 select { 130 case <-ctx.Done(): 131 case err := <-errCh: 132 require.NoError(t, err) 133 } 134 }() 135 } 136 137 client := NewGrpcMessageClient("test-client-1", clientConfig4Testing) 138 wg.Add(1) 139 go func() { 140 defer wg.Done() 141 err := client.Run(ctx, "tcp", addr, "test-server-1", &security.Credential{}) 142 if err != nil { 143 log.Warn("client returned error", zap.Error(err)) 144 require.Regexp(t, ".*context canceled.*", err.Error()) 145 } 146 }() 147 148 var wg1 sync.WaitGroup 149 wg1.Add(numTopics * clientConcurrency) 150 for j := 0; j < numTopics*clientConcurrency; j++ { 151 topicName := fmt.Sprintf("test-topic-%d", j%numTopics) 152 go func() { 153 defer wg1.Done() 154 var oldSeq Seq 155 for i := 0; i < size; i++ { 156 content := &testTopicContent{Index: int64(i + 1)} 157 seq, err := client.SendMessage(ctx, topicName, content) 158 require.NoError(t, err) 159 160 if clientConcurrency == 1 { 161 require.Equal(t, oldSeq+1, seq) 162 oldSeq = seq 163 } 164 } 165 166 require.Eventuallyf(t, func() bool { 167 seq, ok := client.CurrentAck(topicName) 168 if !ok { 169 return false 170 } 171 return seq >= Seq(size*clientConcurrency) 172 }, time.Second*40, time.Millisecond*20, "failed to wait for ack") 173 }() 174 } 175 176 wg1.Wait() 177 cancel() 178 wg.Wait() 179 } 180 181 func TestMessageClientBasic(t *testing.T) { 182 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout) 183 defer cancel() 184 185 runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeLarge, 1, 4) 186 } 187 188 func TestMessageClientBasicMultiTopics(t *testing.T) { 189 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout*2) 190 defer cancel() 191 192 runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeLarge, 4, 4) 193 } 194 195 func TestMessageClientServerRestart(t *testing.T) { 196 _ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectServerRestart", "1%return(true)") 197 defer func() { 198 _ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectServerRestart") 199 }() 200 201 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout) 202 defer cancel() 203 204 runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeSmall, 1, 1) 205 } 206 207 func TestMessageClientServerRestartMultiTopics(t *testing.T) { 208 _ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectServerRestart", "1%return(true)") 209 defer func() { 210 _ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectServerRestart") 211 }() 212 213 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout*4) 214 defer cancel() 215 216 runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeSmall, 4, 1) 217 } 218 219 func TestMessageClientRestart(t *testing.T) { 220 _ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ClientInjectStreamFailure", "50%return(true)") 221 defer func() { 222 _ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ClientInjectStreamFailure") 223 }() 224 225 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout) 226 defer cancel() 227 228 runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeLarge, 1, 1) 229 } 230 231 func TestMessageClientRestartMultiTopics(t *testing.T) { 232 _ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ClientInjectStreamFailure", "3%return(true)") 233 defer func() { 234 _ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ClientInjectStreamFailure") 235 }() 236 237 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout) 238 defer cancel() 239 240 runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeSmall, 4, 1) 241 } 242 243 func TestMessageClientSenderErrorsMultiTopics(t *testing.T) { 244 _ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ClientBatchSenderInjectError", "3*return(true)") 245 defer func() { 246 _ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ClientBatchSenderInjectError") 247 }() 248 249 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout) 250 defer cancel() 251 252 runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeSmall, 4, 1) 253 } 254 255 func TestMessageClientBasicNonblocking(t *testing.T) { 256 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout) 257 defer cancel() 258 259 server, addr, cancelServer := newServerForIntegrationTesting(t, "test-server-1") 260 defer cancelServer() 261 262 var wg sync.WaitGroup 263 wg.Add(1) 264 go func() { 265 defer wg.Done() 266 err := server.Run(ctx, nil) 267 if err != nil { 268 require.Regexp(t, ".*context canceled.*", err.Error()) 269 } 270 }() 271 272 var lastIndex int64 273 errCh := mustAddHandler(ctx, t, server, "test-topic-1", &testTopicContent{}, func(senderID string, i interface{}) error { 274 require.Equal(t, "test-client-1", senderID) 275 require.IsType(t, &testTopicContent{}, i) 276 content := i.(*testTopicContent) 277 swapped := atomic.CompareAndSwapInt64(&lastIndex, content.Index-1, content.Index) 278 require.True(t, swapped) 279 return nil 280 }) 281 282 wg.Add(1) 283 go func() { 284 defer wg.Done() 285 select { 286 case <-ctx.Done(): 287 case err := <-errCh: 288 require.NoError(t, err) 289 } 290 }() 291 292 client := NewGrpcMessageClient("test-client-1", clientConfig4Testing) 293 wg.Add(1) 294 go func() { 295 defer wg.Done() 296 err := client.Run(ctx, "tcp", addr, "test-server-1", &security.Credential{}) 297 require.Error(t, err) 298 require.Regexp(t, ".*context canceled.*", err.Error()) 299 }() 300 301 var oldSeq Seq 302 for i := 0; i < defaultMessageBatchSizeSmall; i++ { 303 content := &testTopicContent{Index: int64(i + 1)} 304 var ( 305 seq Seq 306 err error 307 ) 308 require.Eventually(t, func() bool { 309 seq, err = client.TrySendMessage(ctx, "test-topic-1", content) 310 return !cerror.ErrPeerMessageSendTryAgain.Equal(err) 311 }, time.Second*5, time.Millisecond*10) 312 require.NoError(t, err) 313 require.Equal(t, oldSeq+1, seq) 314 oldSeq = seq 315 } 316 317 require.Eventually(t, func() bool { 318 seq, ok := client.CurrentAck("test-topic-1") 319 if !ok { 320 return false 321 } 322 return seq >= defaultMessageBatchSizeSmall 323 }, time.Second*10, time.Millisecond*20) 324 325 cancel() 326 wg.Wait() 327 } 328 329 func TestMessageBackPressure(t *testing.T) { 330 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout) 331 defer cancel() 332 333 server, addr, cancelServer := newServerForIntegrationTesting(t, 334 "test-server-1", func(config *MessageServerConfig) { 335 config.MaxPendingTaskCount = 10 336 }) 337 defer cancelServer() 338 339 var wg sync.WaitGroup 340 wg.Add(1) 341 go func() { 342 defer wg.Done() 343 err := server.Run(ctx, nil) 344 if err != nil { 345 require.Regexp(t, ".*context canceled.*", err.Error()) 346 } 347 }() 348 349 // No-op handler. We are only testing for back-pressure. 350 errCh := mustAddHandler(ctx, t, server, "test-topic-1", &testTopicContent{}, func(senderID string, i interface{}) error { 351 return nil 352 }) 353 354 wg.Add(1) 355 go func() { 356 defer wg.Done() 357 select { 358 case <-ctx.Done(): 359 case err := <-errCh: 360 require.NoError(t, err) 361 } 362 }() 363 364 client := NewGrpcMessageClient("test-client-1", clientConfig4Testing) 365 wg.Add(1) 366 go func() { 367 defer wg.Done() 368 err := client.Run(ctx, "tcp", addr, "test-server-1", &security.Credential{}) 369 require.Error(t, err) 370 require.Regexp(t, ".*context canceled.*", err.Error()) 371 }() 372 373 _ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectTaskDelay", "sleep(1)") 374 defer func() { 375 _ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectTaskDelay") 376 }() 377 378 var lastSeq Seq 379 for i := 0; i < defaultMessageBatchSizeLarge; i++ { 380 seq, err := client.SendMessage(ctx, "test-topic-1", &testTopicContent{}) 381 require.NoError(t, err) 382 atomic.StoreInt64(&lastSeq, seq) 383 } 384 385 require.Eventually(t, func() bool { 386 latestAck, ok := client.CurrentAck("test-topic-1") 387 if !ok { 388 return false 389 } 390 log.Info("checked ack", zap.Int64("ack", latestAck)) 391 return latestAck == atomic.LoadInt64(&lastSeq) 392 }, time.Second*10, time.Millisecond*20) 393 cancel() 394 wg.Wait() 395 } 396 397 func TestTopicCongested(t *testing.T) { 398 ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout) 399 defer cancel() 400 401 server, addr, cancelServer := newServerForIntegrationTesting(t, 402 "test-server-1", func(config *MessageServerConfig) { 403 config.MaxPendingMessageCountPerTopic = 10 404 }) 405 defer cancelServer() 406 407 var wg sync.WaitGroup 408 wg.Add(1) 409 go func() { 410 defer wg.Done() 411 err := server.Run(ctx, nil) 412 if err != nil { 413 require.Regexp(t, ".*context canceled.*", err.Error()) 414 } 415 }() 416 417 newClientConfig := *clientConfig4Testing 418 newClientConfig.MaxBatchCount = 1 419 newClientConfig.RetryRateLimitPerSecond = 100 420 client := NewGrpcMessageClient("test-client-1", clientConfig4Testing) 421 wg.Add(1) 422 go func() { 423 defer wg.Done() 424 err := client.Run(ctx, "tcp", addr, "test-server-1", &security.Credential{}) 425 require.Error(t, err) 426 require.Regexp(t, ".*context canceled.*", err.Error()) 427 }() 428 429 var lastSeq Seq 430 wg.Add(1) 431 go func() { 432 defer wg.Done() 433 434 for i := 0; i < 100; i++ { 435 seq, err := client.SendMessage(ctx, "test-topic-1", &testTopicContent{}) 436 require.NoError(t, err) 437 atomic.StoreInt64(&lastSeq, seq) 438 time.Sleep(10 * time.Millisecond) 439 } 440 }() 441 442 // No-op handler. 443 _ = mustAddHandler(ctx, t, server, "test-topic-1", 444 &testTopicContent{}, func(senderID string, i interface{}) error { 445 return nil 446 }) 447 448 time.Sleep(100 * time.Millisecond) 449 err := server.SyncRemoveHandler(ctx, "test-topic-1") 450 require.NoError(t, err) 451 452 time.Sleep(1000 * time.Millisecond) 453 454 // No-op handler. 455 _ = mustAddHandler(ctx, t, server, "test-topic-1", 456 &testTopicContent{}, func(senderID string, i interface{}) error { 457 return nil 458 }) 459 460 require.Eventually(t, func() bool { 461 latestAck, ok := client.CurrentAck("test-topic-1") 462 if !ok { 463 return false 464 } 465 log.Info("checked ack", zap.Int64("ack", latestAck)) 466 return latestAck == 100 467 }, time.Second*10, time.Millisecond*20) 468 469 cancel() 470 wg.Wait() 471 }