github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/service_commands_test.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"sync/atomic"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/google/uuid"
    24  	"github.com/lni/dragonboat/v4"
    25  	"github.com/lni/goutils/leaktest"
    26  	"github.com/lni/vfs"
    27  	"github.com/stretchr/testify/assert"
    28  	"github.com/stretchr/testify/require"
    29  
    30  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    31  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    32  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    33  )
    34  
    35  func TestBackgroundTickAndHeartbeat(t *testing.T) {
    36  	defer leaktest.AfterTest(t)()
    37  	cfg := DefaultConfig()
    38  	cfg.UUID = uuid.New().String()
    39  	cfg.FS = vfs.NewStrictMem()
    40  	cfg.DeploymentID = 1
    41  	cfg.RTTMillisecond = 5
    42  	cfg.DataDir = "data-1"
    43  	cfg.LogServicePort = 9002
    44  	cfg.RaftPort = 9000
    45  	cfg.GossipPort = 9001
    46  	// below is an unreachable address intentionally set
    47  	cfg.GossipSeedAddresses = []string{"127.0.0.1:9010"}
    48  	cfg.HeartbeatInterval.Duration = 5 * time.Millisecond
    49  	cfg.HAKeeperTickInterval.Duration = 5 * time.Millisecond
    50  	cfg.HAKeeperClientConfig.ServiceAddresses = []string{"127.0.0.1:9002"}
    51  	service, err := NewService(cfg,
    52  		newFS(),
    53  		nil,
    54  		WithBackendFilter(func(msg morpc.Message, backendAddr string) bool {
    55  			return true
    56  		}),
    57  	)
    58  	require.NoError(t, err)
    59  	defer func() {
    60  		assert.NoError(t, service.Close())
    61  	}()
    62  	peers := make(map[uint64]dragonboat.Target)
    63  	peers[1] = service.ID()
    64  	require.NoError(t, service.store.startHAKeeperReplica(1, peers, false))
    65  
    66  	for i := 0; i < 500; i++ {
    67  		done := true
    68  		state, err := service.store.getCheckerState()
    69  		require.NoError(t, err)
    70  		if state.Tick < 10 {
    71  			done = false
    72  		}
    73  		si, ok := state.LogState.Stores[service.ID()]
    74  		if !ok {
    75  			done = false
    76  		} else {
    77  			if si.Tick < 10 {
    78  				done = false
    79  			}
    80  		}
    81  		if done {
    82  			return
    83  		} else {
    84  			time.Sleep(5 * time.Millisecond)
    85  		}
    86  	}
    87  	t.Fatalf("failed to tick/heartbeat")
    88  }
    89  
    90  func TestHandleKillZombie(t *testing.T) {
    91  	fn := func(t *testing.T, s *Service) {
    92  		has, err := hasMetadataRec(s.store.cfg.DataDir, logMetadataFilename, 1, 1, s.store.cfg.FS)
    93  		require.NoError(t, err)
    94  		assert.True(t, has)
    95  
    96  		cmd := pb.ScheduleCommand{
    97  			ConfigChange: &pb.ConfigChange{
    98  				ChangeType: pb.KillZombie,
    99  				Replica: pb.Replica{
   100  					ShardID:   1,
   101  					ReplicaID: 1,
   102  				},
   103  			},
   104  		}
   105  		mustHaveReplica(t, s.store, 1, 1)
   106  		s.handleCommands([]pb.ScheduleCommand{cmd})
   107  		assert.False(t, hasReplica(s.store, 1, 1))
   108  
   109  		has, err = hasMetadataRec(s.store.cfg.DataDir, logMetadataFilename, 1, 1, s.store.cfg.FS)
   110  		require.NoError(t, err)
   111  		assert.False(t, has)
   112  	}
   113  	runServiceTest(t, false, true, fn)
   114  }
   115  
   116  func TestHandleStartReplica(t *testing.T) {
   117  	fn := func(t *testing.T, s *Service) {
   118  		cmd := pb.ScheduleCommand{
   119  			ConfigChange: &pb.ConfigChange{
   120  				ChangeType: pb.StartReplica,
   121  				Replica: pb.Replica{
   122  					ShardID:   1,
   123  					ReplicaID: 1,
   124  				},
   125  				InitialMembers: map[uint64]string{1: s.ID()},
   126  			},
   127  		}
   128  		s.handleCommands([]pb.ScheduleCommand{cmd})
   129  		mustHaveReplica(t, s.store, 1, 1)
   130  
   131  		has, err := hasMetadataRec(s.store.cfg.DataDir, logMetadataFilename, 1, 1, s.store.cfg.FS)
   132  		require.NoError(t, err)
   133  		assert.True(t, has)
   134  	}
   135  	runServiceTest(t, false, false, fn)
   136  }
   137  
   138  func TestHandleStopReplica(t *testing.T) {
   139  	fn := func(t *testing.T, s *Service) {
   140  		cmd := pb.ScheduleCommand{
   141  			ConfigChange: &pb.ConfigChange{
   142  				ChangeType: pb.StartReplica,
   143  				Replica: pb.Replica{
   144  					ShardID:   1,
   145  					ReplicaID: 1,
   146  				},
   147  				InitialMembers: map[uint64]string{1: s.ID()},
   148  			},
   149  		}
   150  		s.handleCommands([]pb.ScheduleCommand{cmd})
   151  		mustHaveReplica(t, s.store, 1, 1)
   152  
   153  		cmd = pb.ScheduleCommand{
   154  			ConfigChange: &pb.ConfigChange{
   155  				ChangeType: pb.StopReplica,
   156  				Replica: pb.Replica{
   157  					ShardID:   1,
   158  					ReplicaID: 1,
   159  				},
   160  			},
   161  		}
   162  		s.handleCommands([]pb.ScheduleCommand{cmd})
   163  		assert.False(t, hasReplica(s.store, 1, 1))
   164  
   165  		has, err := hasMetadataRec(s.store.cfg.DataDir, logMetadataFilename, 1, 1, s.store.cfg.FS)
   166  		require.NoError(t, err)
   167  		assert.True(t, has)
   168  	}
   169  	runServiceTest(t, false, false, fn)
   170  }
   171  
   172  func TestHandleAddReplica(t *testing.T) {
   173  	store1, store2, err := getTestStores()
   174  	require.NoError(t, err)
   175  	defer func() {
   176  		require.NoError(t, store1.close())
   177  		require.NoError(t, store2.close())
   178  	}()
   179  
   180  	service1 := Service{
   181  		store:   store1,
   182  		runtime: runtime.DefaultRuntime(),
   183  	}
   184  	cmd := pb.ScheduleCommand{
   185  		ConfigChange: &pb.ConfigChange{
   186  			ChangeType: pb.AddReplica,
   187  			Replica: pb.Replica{
   188  				UUID:      uuid.New().String(),
   189  				ShardID:   1,
   190  				ReplicaID: 3,
   191  				Epoch:     2,
   192  			},
   193  		},
   194  	}
   195  	service1.handleCommands([]pb.ScheduleCommand{cmd})
   196  	count, ok := checkReplicaCount(store1, 1)
   197  	require.True(t, ok)
   198  	assert.Equal(t, 3, count)
   199  }
   200  
   201  func TestHandleRemoveReplica(t *testing.T) {
   202  	store1, store2, err := getTestStores()
   203  	require.NoError(t, err)
   204  	defer func() {
   205  		require.NoError(t, store1.close())
   206  		require.NoError(t, store2.close())
   207  	}()
   208  
   209  	service1 := Service{
   210  		store:   store1,
   211  		runtime: runtime.DefaultRuntime(),
   212  	}
   213  	cmd := pb.ScheduleCommand{
   214  		ConfigChange: &pb.ConfigChange{
   215  			ChangeType: pb.RemoveReplica,
   216  			Replica: pb.Replica{
   217  				ShardID:   1,
   218  				ReplicaID: 2,
   219  				Epoch:     2,
   220  			},
   221  		},
   222  	}
   223  	service1.handleCommands([]pb.ScheduleCommand{cmd})
   224  	count, ok := checkReplicaCount(store1, 1)
   225  	require.True(t, ok)
   226  	assert.Equal(t, 1, count)
   227  }
   228  
   229  func checkReplicaCount(s *store, shardID uint64) (int, bool) {
   230  	hb := s.getHeartbeatMessage()
   231  	for _, info := range hb.Replicas {
   232  		if info.ShardID == shardID {
   233  			return len(info.Replicas), true
   234  		}
   235  	}
   236  	return 0, false
   237  }
   238  
   239  func TestHandleShutdown(t *testing.T) {
   240  	fn := func(t *testing.T, s *Service) {
   241  		cmd := pb.ScheduleCommand{
   242  			UUID: s.ID(),
   243  			ShutdownStore: &pb.ShutdownStore{
   244  				StoreID: s.ID(),
   245  			},
   246  			ServiceType: pb.LogService,
   247  		}
   248  
   249  		shutdownC := make(chan struct{})
   250  		exit := atomic.Bool{}
   251  		go func() {
   252  			ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   253  			defer func() {
   254  				cancel()
   255  				exit.Store(true)
   256  			}()
   257  
   258  			select {
   259  			case <-ctx.Done():
   260  				panic("deadline reached")
   261  			case <-shutdownC:
   262  				runtime.DefaultRuntime().Logger().Info("received shutdown command")
   263  			}
   264  		}()
   265  
   266  		s.shutdownC = shutdownC
   267  
   268  		for !exit.Load() {
   269  			s.handleCommands([]pb.ScheduleCommand{cmd})
   270  			time.Sleep(time.Millisecond)
   271  		}
   272  
   273  	}
   274  	runServiceTest(t, false, true, fn)
   275  }