github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/p2p/server_client_integration_test.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package p2p
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"net"
    20  	"sync"
    21  	"sync/atomic"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/phayes/freeport"
    26  	"github.com/pingcap/failpoint"
    27  	"github.com/pingcap/log"
    28  	cerror "github.com/pingcap/tiflow/pkg/errors"
    29  	"github.com/pingcap/tiflow/pkg/security"
    30  	"github.com/pingcap/tiflow/proto/p2p"
    31  	"github.com/stretchr/testify/require"
    32  	"go.uber.org/zap"
    33  	"google.golang.org/grpc"
    34  )
    35  
    36  // read only
    37  var clientConfig4Testing = &MessageClientConfig{
    38  	SendChannelSize:         1024,
    39  	BatchSendInterval:       time.Millisecond * 1, // to accelerate testing
    40  	MaxBatchCount:           128,
    41  	MaxBatchBytes:           8192,
    42  	RetryRateLimitPerSecond: 10.0, // using 10.0 instead of 1.0 to accelerate testing
    43  	DialTimeout:             time.Second * 3,
    44  	MaxRecvMsgSize:          4 * 1024 * 1024, // 4MB
    45  }
    46  
    47  type serverConfigOpt = func(config *MessageServerConfig)
    48  
    49  //nolint:unparam
    50  func newServerForIntegrationTesting(t *testing.T, serverID string, configOpts ...serverConfigOpt) (server *MessageServer, addr string, cancel func()) {
    51  	port := freeport.GetPort()
    52  	addr = fmt.Sprintf("127.0.0.1:%d", port)
    53  	lis, err := net.Listen("tcp", addr)
    54  	require.NoError(t, err)
    55  
    56  	var opts []grpc.ServerOption
    57  	grpcServer := grpc.NewServer(opts...)
    58  
    59  	serverConfig := *defaultServerConfig4Testing
    60  	for _, opt := range configOpts {
    61  		opt(&serverConfig)
    62  	}
    63  
    64  	server = NewMessageServer(serverID, &serverConfig)
    65  	p2p.RegisterCDCPeerToPeerServer(grpcServer, server)
    66  
    67  	var wg sync.WaitGroup
    68  	wg.Add(1)
    69  	go func() {
    70  		defer wg.Done()
    71  		_ = grpcServer.Serve(lis)
    72  	}()
    73  
    74  	cancel = func() {
    75  		grpcServer.Stop()
    76  		wg.Wait()
    77  	}
    78  	return
    79  }
    80  
    81  func runP2PIntegrationTest(
    82  	ctx context.Context,
    83  	t *testing.T,
    84  	size int,
    85  	numTopics int,
    86  	clientConcurrency int,
    87  ) {
    88  	ctx, cancel := context.WithCancel(ctx)
    89  	defer cancel()
    90  
    91  	server, addr, cancelServer := newServerForIntegrationTesting(t, "test-server-1",
    92  		func(config *MessageServerConfig) {
    93  			config.AckInterval = time.Millisecond * 1
    94  		})
    95  	defer cancelServer()
    96  
    97  	var wg sync.WaitGroup
    98  	wg.Add(1)
    99  	go func() {
   100  		defer wg.Done()
   101  		for {
   102  			err := server.Run(ctx, nil)
   103  			if cerror.ErrPeerMessageInjectedServerRestart.Equal(err) {
   104  				log.Warn("server restarted")
   105  				continue
   106  			}
   107  			require.Regexp(t, ".*context canceled.*", err.Error())
   108  			break
   109  		}
   110  	}()
   111  
   112  	for j := 0; j < numTopics; j++ {
   113  		topicName := fmt.Sprintf("test-topic-%d", j)
   114  		var lastIndex int64
   115  		errCh := mustAddHandler(ctx, t, server, topicName, &testTopicContent{}, func(senderID string, i interface{}) error {
   116  			require.Equal(t, "test-client-1", senderID)
   117  			require.IsType(t, &testTopicContent{}, i)
   118  			content := i.(*testTopicContent)
   119  			if clientConcurrency == 1 {
   120  				require.Equal(t, content.Index-1, lastIndex)
   121  				lastIndex = content.Index
   122  			}
   123  			return nil
   124  		})
   125  
   126  		wg.Add(1)
   127  		go func() {
   128  			defer wg.Done()
   129  			select {
   130  			case <-ctx.Done():
   131  			case err := <-errCh:
   132  				require.NoError(t, err)
   133  			}
   134  		}()
   135  	}
   136  
   137  	client := NewGrpcMessageClient("test-client-1", clientConfig4Testing)
   138  	wg.Add(1)
   139  	go func() {
   140  		defer wg.Done()
   141  		err := client.Run(ctx, "tcp", addr, "test-server-1", &security.Credential{})
   142  		if err != nil {
   143  			log.Warn("client returned error", zap.Error(err))
   144  			require.Regexp(t, ".*context canceled.*", err.Error())
   145  		}
   146  	}()
   147  
   148  	var wg1 sync.WaitGroup
   149  	wg1.Add(numTopics * clientConcurrency)
   150  	for j := 0; j < numTopics*clientConcurrency; j++ {
   151  		topicName := fmt.Sprintf("test-topic-%d", j%numTopics)
   152  		go func() {
   153  			defer wg1.Done()
   154  			var oldSeq Seq
   155  			for i := 0; i < size; i++ {
   156  				content := &testTopicContent{Index: int64(i + 1)}
   157  				seq, err := client.SendMessage(ctx, topicName, content)
   158  				require.NoError(t, err)
   159  
   160  				if clientConcurrency == 1 {
   161  					require.Equal(t, oldSeq+1, seq)
   162  					oldSeq = seq
   163  				}
   164  			}
   165  
   166  			require.Eventuallyf(t, func() bool {
   167  				seq, ok := client.CurrentAck(topicName)
   168  				if !ok {
   169  					return false
   170  				}
   171  				return seq >= Seq(size*clientConcurrency)
   172  			}, time.Second*40, time.Millisecond*20, "failed to wait for ack")
   173  		}()
   174  	}
   175  
   176  	wg1.Wait()
   177  	cancel()
   178  	wg.Wait()
   179  }
   180  
   181  func TestMessageClientBasic(t *testing.T) {
   182  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout)
   183  	defer cancel()
   184  
   185  	runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeLarge, 1, 4)
   186  }
   187  
   188  func TestMessageClientBasicMultiTopics(t *testing.T) {
   189  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout*2)
   190  	defer cancel()
   191  
   192  	runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeLarge, 4, 4)
   193  }
   194  
   195  func TestMessageClientServerRestart(t *testing.T) {
   196  	_ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectServerRestart", "1%return(true)")
   197  	defer func() {
   198  		_ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectServerRestart")
   199  	}()
   200  
   201  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout)
   202  	defer cancel()
   203  
   204  	runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeSmall, 1, 1)
   205  }
   206  
   207  func TestMessageClientServerRestartMultiTopics(t *testing.T) {
   208  	_ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectServerRestart", "1%return(true)")
   209  	defer func() {
   210  		_ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectServerRestart")
   211  	}()
   212  
   213  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout*4)
   214  	defer cancel()
   215  
   216  	runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeSmall, 4, 1)
   217  }
   218  
   219  func TestMessageClientRestart(t *testing.T) {
   220  	_ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ClientInjectStreamFailure", "50%return(true)")
   221  	defer func() {
   222  		_ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ClientInjectStreamFailure")
   223  	}()
   224  
   225  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout)
   226  	defer cancel()
   227  
   228  	runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeLarge, 1, 1)
   229  }
   230  
   231  func TestMessageClientRestartMultiTopics(t *testing.T) {
   232  	_ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ClientInjectStreamFailure", "3%return(true)")
   233  	defer func() {
   234  		_ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ClientInjectStreamFailure")
   235  	}()
   236  
   237  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout)
   238  	defer cancel()
   239  
   240  	runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeSmall, 4, 1)
   241  }
   242  
   243  func TestMessageClientSenderErrorsMultiTopics(t *testing.T) {
   244  	_ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ClientBatchSenderInjectError", "3*return(true)")
   245  	defer func() {
   246  		_ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ClientBatchSenderInjectError")
   247  	}()
   248  
   249  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout)
   250  	defer cancel()
   251  
   252  	runP2PIntegrationTest(ctx, t, defaultMessageBatchSizeSmall, 4, 1)
   253  }
   254  
   255  func TestMessageClientBasicNonblocking(t *testing.T) {
   256  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout)
   257  	defer cancel()
   258  
   259  	server, addr, cancelServer := newServerForIntegrationTesting(t, "test-server-1")
   260  	defer cancelServer()
   261  
   262  	var wg sync.WaitGroup
   263  	wg.Add(1)
   264  	go func() {
   265  		defer wg.Done()
   266  		err := server.Run(ctx, nil)
   267  		if err != nil {
   268  			require.Regexp(t, ".*context canceled.*", err.Error())
   269  		}
   270  	}()
   271  
   272  	var lastIndex int64
   273  	errCh := mustAddHandler(ctx, t, server, "test-topic-1", &testTopicContent{}, func(senderID string, i interface{}) error {
   274  		require.Equal(t, "test-client-1", senderID)
   275  		require.IsType(t, &testTopicContent{}, i)
   276  		content := i.(*testTopicContent)
   277  		swapped := atomic.CompareAndSwapInt64(&lastIndex, content.Index-1, content.Index)
   278  		require.True(t, swapped)
   279  		return nil
   280  	})
   281  
   282  	wg.Add(1)
   283  	go func() {
   284  		defer wg.Done()
   285  		select {
   286  		case <-ctx.Done():
   287  		case err := <-errCh:
   288  			require.NoError(t, err)
   289  		}
   290  	}()
   291  
   292  	client := NewGrpcMessageClient("test-client-1", clientConfig4Testing)
   293  	wg.Add(1)
   294  	go func() {
   295  		defer wg.Done()
   296  		err := client.Run(ctx, "tcp", addr, "test-server-1", &security.Credential{})
   297  		require.Error(t, err)
   298  		require.Regexp(t, ".*context canceled.*", err.Error())
   299  	}()
   300  
   301  	var oldSeq Seq
   302  	for i := 0; i < defaultMessageBatchSizeSmall; i++ {
   303  		content := &testTopicContent{Index: int64(i + 1)}
   304  		var (
   305  			seq Seq
   306  			err error
   307  		)
   308  		require.Eventually(t, func() bool {
   309  			seq, err = client.TrySendMessage(ctx, "test-topic-1", content)
   310  			return !cerror.ErrPeerMessageSendTryAgain.Equal(err)
   311  		}, time.Second*5, time.Millisecond*10)
   312  		require.NoError(t, err)
   313  		require.Equal(t, oldSeq+1, seq)
   314  		oldSeq = seq
   315  	}
   316  
   317  	require.Eventually(t, func() bool {
   318  		seq, ok := client.CurrentAck("test-topic-1")
   319  		if !ok {
   320  			return false
   321  		}
   322  		return seq >= defaultMessageBatchSizeSmall
   323  	}, time.Second*10, time.Millisecond*20)
   324  
   325  	cancel()
   326  	wg.Wait()
   327  }
   328  
   329  func TestMessageBackPressure(t *testing.T) {
   330  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout)
   331  	defer cancel()
   332  
   333  	server, addr, cancelServer := newServerForIntegrationTesting(t,
   334  		"test-server-1", func(config *MessageServerConfig) {
   335  			config.MaxPendingTaskCount = 10
   336  		})
   337  	defer cancelServer()
   338  
   339  	var wg sync.WaitGroup
   340  	wg.Add(1)
   341  	go func() {
   342  		defer wg.Done()
   343  		err := server.Run(ctx, nil)
   344  		if err != nil {
   345  			require.Regexp(t, ".*context canceled.*", err.Error())
   346  		}
   347  	}()
   348  
   349  	// No-op handler. We are only testing for back-pressure.
   350  	errCh := mustAddHandler(ctx, t, server, "test-topic-1", &testTopicContent{}, func(senderID string, i interface{}) error {
   351  		return nil
   352  	})
   353  
   354  	wg.Add(1)
   355  	go func() {
   356  		defer wg.Done()
   357  		select {
   358  		case <-ctx.Done():
   359  		case err := <-errCh:
   360  			require.NoError(t, err)
   361  		}
   362  	}()
   363  
   364  	client := NewGrpcMessageClient("test-client-1", clientConfig4Testing)
   365  	wg.Add(1)
   366  	go func() {
   367  		defer wg.Done()
   368  		err := client.Run(ctx, "tcp", addr, "test-server-1", &security.Credential{})
   369  		require.Error(t, err)
   370  		require.Regexp(t, ".*context canceled.*", err.Error())
   371  	}()
   372  
   373  	_ = failpoint.Enable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectTaskDelay", "sleep(1)")
   374  	defer func() {
   375  		_ = failpoint.Disable("github.com/pingcap/tiflow/pkg/p2p/ServerInjectTaskDelay")
   376  	}()
   377  
   378  	var lastSeq Seq
   379  	for i := 0; i < defaultMessageBatchSizeLarge; i++ {
   380  		seq, err := client.SendMessage(ctx, "test-topic-1", &testTopicContent{})
   381  		require.NoError(t, err)
   382  		atomic.StoreInt64(&lastSeq, seq)
   383  	}
   384  
   385  	require.Eventually(t, func() bool {
   386  		latestAck, ok := client.CurrentAck("test-topic-1")
   387  		if !ok {
   388  			return false
   389  		}
   390  		log.Info("checked ack", zap.Int64("ack", latestAck))
   391  		return latestAck == atomic.LoadInt64(&lastSeq)
   392  	}, time.Second*10, time.Millisecond*20)
   393  	cancel()
   394  	wg.Wait()
   395  }
   396  
   397  func TestTopicCongested(t *testing.T) {
   398  	ctx, cancel := context.WithTimeout(context.TODO(), defaultTimeout)
   399  	defer cancel()
   400  
   401  	server, addr, cancelServer := newServerForIntegrationTesting(t,
   402  		"test-server-1", func(config *MessageServerConfig) {
   403  			config.MaxPendingMessageCountPerTopic = 10
   404  		})
   405  	defer cancelServer()
   406  
   407  	var wg sync.WaitGroup
   408  	wg.Add(1)
   409  	go func() {
   410  		defer wg.Done()
   411  		err := server.Run(ctx, nil)
   412  		if err != nil {
   413  			require.Regexp(t, ".*context canceled.*", err.Error())
   414  		}
   415  	}()
   416  
   417  	newClientConfig := *clientConfig4Testing
   418  	newClientConfig.MaxBatchCount = 1
   419  	newClientConfig.RetryRateLimitPerSecond = 100
   420  	client := NewGrpcMessageClient("test-client-1", clientConfig4Testing)
   421  	wg.Add(1)
   422  	go func() {
   423  		defer wg.Done()
   424  		err := client.Run(ctx, "tcp", addr, "test-server-1", &security.Credential{})
   425  		require.Error(t, err)
   426  		require.Regexp(t, ".*context canceled.*", err.Error())
   427  	}()
   428  
   429  	var lastSeq Seq
   430  	wg.Add(1)
   431  	go func() {
   432  		defer wg.Done()
   433  
   434  		for i := 0; i < 100; i++ {
   435  			seq, err := client.SendMessage(ctx, "test-topic-1", &testTopicContent{})
   436  			require.NoError(t, err)
   437  			atomic.StoreInt64(&lastSeq, seq)
   438  			time.Sleep(10 * time.Millisecond)
   439  		}
   440  	}()
   441  
   442  	// No-op handler.
   443  	_ = mustAddHandler(ctx, t, server, "test-topic-1",
   444  		&testTopicContent{}, func(senderID string, i interface{}) error {
   445  			return nil
   446  		})
   447  
   448  	time.Sleep(100 * time.Millisecond)
   449  	err := server.SyncRemoveHandler(ctx, "test-topic-1")
   450  	require.NoError(t, err)
   451  
   452  	time.Sleep(1000 * time.Millisecond)
   453  
   454  	// No-op handler.
   455  	_ = mustAddHandler(ctx, t, server, "test-topic-1",
   456  		&testTopicContent{}, func(senderID string, i interface{}) error {
   457  			return nil
   458  		})
   459  
   460  	require.Eventually(t, func() bool {
   461  		latestAck, ok := client.CurrentAck("test-topic-1")
   462  		if !ok {
   463  			return false
   464  		}
   465  		log.Info("checked ack", zap.Int64("ack", latestAck))
   466  		return latestAck == 100
   467  	}, time.Second*10, time.Millisecond*20)
   468  
   469  	cancel()
   470  	wg.Wait()
   471  }