github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/integration/placement_change_test.go (about)

     1  //go:build integration
     2  
     3  // Copyright (c) 2018 Uber Technologies, Inc.
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be included in
    13  // all copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  // THE SOFTWARE.
    22  
    23  package integration
    24  
    25  import (
    26  	"sort"
    27  	"testing"
    28  	"time"
    29  
    30  	aggclient "github.com/m3db/m3/src/aggregator/client"
    31  	"github.com/m3db/m3/src/cluster/kv"
    32  	memcluster "github.com/m3db/m3/src/cluster/mem"
    33  	"github.com/m3db/m3/src/cluster/placement"
    34  	maggregation "github.com/m3db/m3/src/metrics/aggregation"
    35  	"github.com/m3db/m3/src/metrics/metadata"
    36  	"github.com/m3db/m3/src/metrics/metric/aggregated"
    37  	"github.com/m3db/m3/src/metrics/policy"
    38  	"github.com/m3db/m3/src/x/instrument"
    39  	xtest "github.com/m3db/m3/src/x/test"
    40  	xtime "github.com/m3db/m3/src/x/time"
    41  
    42  	"github.com/google/go-cmp/cmp"
    43  	"github.com/stretchr/testify/require"
    44  	"go.uber.org/zap"
    45  )
    46  
    47  func TestPlacementChange(t *testing.T) {
    48  	if testing.Short() {
    49  		t.SkipNow()
    50  	}
    51  
    52  	// Clock setup.
    53  	clock := newTestClock(time.Now().Truncate(time.Hour))
    54  
    55  	// Placement setup.
    56  	var (
    57  		numTotalShards = 4
    58  		placementKey   = "/placement"
    59  	)
    60  	multiServerSetup := []struct {
    61  		rawTCPAddr string
    62  		httpAddr   string
    63  		m3MsgAddr  string
    64  	}{
    65  		{
    66  			rawTCPAddr: "localhost:6000",
    67  			httpAddr:   "localhost:16000",
    68  			m3MsgAddr:  "localhost:26000",
    69  		},
    70  		{
    71  			rawTCPAddr: "localhost:6001",
    72  			httpAddr:   "localhost:16001",
    73  			m3MsgAddr:  "localhost:26001",
    74  		},
    75  	}
    76  	initialInstanceConfig := []placementInstanceConfig{
    77  		{
    78  			shardSetID:          1,
    79  			shardStartInclusive: 0,
    80  			shardEndExclusive:   uint32(numTotalShards) - 1,
    81  		},
    82  		{
    83  			shardSetID:          2,
    84  			shardStartInclusive: uint32(numTotalShards) - 1,
    85  			shardEndExclusive:   uint32(numTotalShards),
    86  		},
    87  	}
    88  	finalInstanceConfig := []placementInstanceConfig{
    89  		{
    90  			shardSetID:          1,
    91  			shardStartInclusive: 0,
    92  			shardEndExclusive:   uint32(numTotalShards / 2),
    93  		},
    94  		{
    95  			shardSetID:          2,
    96  			shardStartInclusive: uint32(numTotalShards / 2),
    97  			shardEndExclusive:   uint32(numTotalShards),
    98  		},
    99  	}
   100  
   101  	aggregatorClientType, err := getAggregatorClientTypeFromEnv()
   102  	require.NoError(t, err)
   103  	for i, mss := range multiServerSetup {
   104  		initialInstanceConfig[i].instanceID = mss.rawTCPAddr
   105  		finalInstanceConfig[i].instanceID = mss.rawTCPAddr
   106  		if aggregatorClientType == aggclient.M3MsgAggregatorClient {
   107  			initialInstanceConfig[i].instanceID = mss.m3MsgAddr
   108  			finalInstanceConfig[i].instanceID = mss.m3MsgAddr
   109  		}
   110  	}
   111  
   112  	clusterClient := memcluster.New(kv.NewOverrideOptions())
   113  	initialPlacement := makePlacement(initialInstanceConfig, numTotalShards)
   114  	finalPlacement := makePlacement(finalInstanceConfig, numTotalShards)
   115  	setPlacement(t, placementKey, clusterClient, initialPlacement)
   116  	topicService, err := initializeTopic(defaultTopicName, clusterClient, numTotalShards)
   117  	require.NoError(t, err)
   118  
   119  	// Election cluster setup.
   120  	electionCluster := newTestCluster(t)
   121  
   122  	// Admin client connection options setup.
   123  	connectionOpts := aggclient.NewConnectionOptions().
   124  		SetInitReconnectThreshold(1).
   125  		SetMaxReconnectThreshold(1).
   126  		SetMaxReconnectDuration(2 * time.Second).
   127  		SetWriteTimeout(time.Second)
   128  
   129  	// Create servers.
   130  	servers := make(testServerSetups, 0, len(multiServerSetup))
   131  	for i, mss := range multiServerSetup {
   132  		instrumentOpts := instrument.NewOptions()
   133  		logger := instrumentOpts.Logger().With(
   134  			zap.String("serverAddr", mss.rawTCPAddr),
   135  		)
   136  		instrumentOpts = instrumentOpts.SetLogger(logger)
   137  		serverOpts := newTestServerOptions(t).
   138  			SetClockOptions(clock.Options()).
   139  			SetInstrumentOptions(instrumentOpts).
   140  			SetElectionCluster(electionCluster).
   141  			SetRawTCPAddr(mss.rawTCPAddr).
   142  			SetHTTPAddr(mss.httpAddr).
   143  			SetM3MsgAddr(mss.m3MsgAddr).
   144  			SetInstanceID(initialInstanceConfig[i].instanceID).
   145  			SetClusterClient(clusterClient).
   146  			SetTopicService(topicService).
   147  			SetShardSetID(initialInstanceConfig[i].shardSetID).
   148  			SetClientConnectionOptions(connectionOpts)
   149  		server := newTestServerSetup(t, serverOpts)
   150  		servers = append(servers, server)
   151  	}
   152  
   153  	// Start the servers.
   154  	log := xtest.NewLogger(t)
   155  	for i, server := range servers {
   156  		require.NoError(t, server.startServer())
   157  		log.Sugar().Infof("server %d is now up", i)
   158  	}
   159  
   160  	// Create client for writing to the servers.
   161  	client := servers.newClient(t)
   162  	require.NoError(t, client.connect())
   163  
   164  	for _, server := range servers {
   165  		require.NoError(t, server.waitUntilLeader())
   166  	}
   167  
   168  	var (
   169  		idPrefix = "metric.id"
   170  		numIDs   = 100
   171  
   172  		start1    = clock.Now()
   173  		stop1     = start1.Add(10 * time.Second)
   174  		start2    = stop1.Add(time.Minute + 2*time.Second)
   175  		stop2     = start2.Add(10 * time.Second)
   176  		finalTime = stop2.Add(time.Minute + 2*time.Second)
   177  		interval  = 2 * time.Second
   178  
   179  		sleepDuration = time.Second
   180  	)
   181  	ids := generateTestIDs(idPrefix, numIDs)
   182  	testTimedMetadataTemplate := metadata.TimedMetadata{
   183  		AggregationID: maggregation.MustCompressTypes(maggregation.Sum),
   184  		StoragePolicy: policy.NewStoragePolicy(2*time.Second, xtime.Second, time.Hour),
   185  	}
   186  	metadataFn := func(idx int) metadataUnion {
   187  		timedMetadata := testTimedMetadataTemplate
   188  		return metadataUnion{
   189  			mType:         timedMetadataType,
   190  			timedMetadata: timedMetadata,
   191  		}
   192  	}
   193  	datasets := []testDataset{
   194  		mustGenerateTestDataset(t, datasetGenOpts{
   195  			start:        start1,
   196  			stop:         stop1,
   197  			interval:     interval,
   198  			ids:          ids,
   199  			category:     timedMetric,
   200  			typeFn:       roundRobinMetricTypeFn,
   201  			valueGenOpts: defaultValueGenOpts,
   202  			metadataFn:   metadataFn,
   203  		}),
   204  		mustGenerateTestDataset(t, datasetGenOpts{
   205  			start:        start2,
   206  			stop:         stop2,
   207  			interval:     interval,
   208  			ids:          ids,
   209  			category:     timedMetric,
   210  			typeFn:       roundRobinMetricTypeFn,
   211  			valueGenOpts: defaultValueGenOpts,
   212  			metadataFn:   metadataFn,
   213  		}),
   214  	}
   215  
   216  	for _, data := range datasets[0] {
   217  		clock.SetNow(data.timestamp)
   218  
   219  		for _, mm := range data.metricWithMetadatas {
   220  			require.NoError(t, client.writeTimedMetricWithMetadata(mm.metric.timed, mm.metadata.timedMetadata))
   221  		}
   222  		require.NoError(t, client.flush())
   223  
   224  		// Give server some time to process the incoming packets.
   225  		time.Sleep(sleepDuration)
   226  	}
   227  
   228  	clock.SetNow(start2)
   229  	time.Sleep(waitForDataToFlush)
   230  	setPlacement(t, placementKey, clusterClient, finalPlacement)
   231  	time.Sleep(waitForDataToFlush)
   232  
   233  	for _, data := range datasets[1] {
   234  		clock.SetNow(data.timestamp)
   235  
   236  		for _, mm := range data.metricWithMetadatas {
   237  			require.NoError(t, client.writeTimedMetricWithMetadata(mm.metric.timed, mm.metadata.timedMetadata))
   238  		}
   239  		require.NoError(t, client.flush())
   240  
   241  		// Give server some time to process the incoming packets.
   242  		time.Sleep(sleepDuration)
   243  	}
   244  
   245  	// Move time forward and wait for flushing to happen.
   246  	clock.SetNow(finalTime)
   247  	time.Sleep(waitForDataToFlush)
   248  
   249  	// Remove all the topic consumers before closing clients and servers. This allows to close the
   250  	// connections between servers while they still are running. Otherwise, during server shutdown,
   251  	// the yet-to-be-closed servers would repeatedly try to reconnect to recently closed ones, which
   252  	// results in longer shutdown times.
   253  	require.NoError(t, removeAllTopicConsumers(topicService, defaultTopicName))
   254  
   255  	require.NoError(t, client.close())
   256  
   257  	// Stop the servers.
   258  	for i, server := range servers {
   259  		require.NoError(t, server.stopServer())
   260  		log.Sugar().Infof("server %d is now down", i)
   261  	}
   262  
   263  	actual := make([]aggregated.MetricWithStoragePolicy, 0)
   264  	for _, server := range servers {
   265  		actual = append(actual, server.sortedResults()...)
   266  	}
   267  	sort.Sort(byTimeIDPolicyAscending(actual))
   268  	expected := make([]aggregated.MetricWithStoragePolicy, 0)
   269  	for _, dataset := range datasets {
   270  		results := mustComputeExpectedResults(t, finalTime, dataset, servers[0].aggregatorOpts)
   271  		expected = append(expected, results...)
   272  	}
   273  	sort.Sort(byTimeIDPolicyAscending(expected))
   274  	require.True(t, cmp.Equal(expected, actual, testCmpOpts...), cmp.Diff(expected, actual, testCmpOpts...))
   275  }
   276  
   277  func makePlacement(instanceConfig []placementInstanceConfig, numShards int) placement.Placement {
   278  	instances := make([]placement.Instance, 0, len(instanceConfig))
   279  	for _, config := range instanceConfig {
   280  		instance := config.newPlacementInstance()
   281  		instances = append(instances, instance)
   282  	}
   283  	return newPlacement(numShards, instances)
   284  }