github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/integration/placement_change_test.go (about) 1 //go:build integration 2 3 // Copyright (c) 2018 Uber Technologies, Inc. 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in 13 // all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 // THE SOFTWARE. 22 23 package integration 24 25 import ( 26 "sort" 27 "testing" 28 "time" 29 30 aggclient "github.com/m3db/m3/src/aggregator/client" 31 "github.com/m3db/m3/src/cluster/kv" 32 memcluster "github.com/m3db/m3/src/cluster/mem" 33 "github.com/m3db/m3/src/cluster/placement" 34 maggregation "github.com/m3db/m3/src/metrics/aggregation" 35 "github.com/m3db/m3/src/metrics/metadata" 36 "github.com/m3db/m3/src/metrics/metric/aggregated" 37 "github.com/m3db/m3/src/metrics/policy" 38 "github.com/m3db/m3/src/x/instrument" 39 xtest "github.com/m3db/m3/src/x/test" 40 xtime "github.com/m3db/m3/src/x/time" 41 42 "github.com/google/go-cmp/cmp" 43 "github.com/stretchr/testify/require" 44 "go.uber.org/zap" 45 ) 46 47 func TestPlacementChange(t *testing.T) { 48 if testing.Short() { 49 t.SkipNow() 50 } 51 52 // Clock setup. 53 clock := newTestClock(time.Now().Truncate(time.Hour)) 54 55 // Placement setup. 56 var ( 57 numTotalShards = 4 58 placementKey = "/placement" 59 ) 60 multiServerSetup := []struct { 61 rawTCPAddr string 62 httpAddr string 63 m3MsgAddr string 64 }{ 65 { 66 rawTCPAddr: "localhost:6000", 67 httpAddr: "localhost:16000", 68 m3MsgAddr: "localhost:26000", 69 }, 70 { 71 rawTCPAddr: "localhost:6001", 72 httpAddr: "localhost:16001", 73 m3MsgAddr: "localhost:26001", 74 }, 75 } 76 initialInstanceConfig := []placementInstanceConfig{ 77 { 78 shardSetID: 1, 79 shardStartInclusive: 0, 80 shardEndExclusive: uint32(numTotalShards) - 1, 81 }, 82 { 83 shardSetID: 2, 84 shardStartInclusive: uint32(numTotalShards) - 1, 85 shardEndExclusive: uint32(numTotalShards), 86 }, 87 } 88 finalInstanceConfig := []placementInstanceConfig{ 89 { 90 shardSetID: 1, 91 shardStartInclusive: 0, 92 shardEndExclusive: uint32(numTotalShards / 2), 93 }, 94 { 95 shardSetID: 2, 96 shardStartInclusive: uint32(numTotalShards / 2), 97 shardEndExclusive: uint32(numTotalShards), 98 }, 99 } 100 101 aggregatorClientType, err := getAggregatorClientTypeFromEnv() 102 require.NoError(t, err) 103 for i, mss := range multiServerSetup { 104 initialInstanceConfig[i].instanceID = mss.rawTCPAddr 105 finalInstanceConfig[i].instanceID = mss.rawTCPAddr 106 if aggregatorClientType == aggclient.M3MsgAggregatorClient { 107 initialInstanceConfig[i].instanceID = mss.m3MsgAddr 108 finalInstanceConfig[i].instanceID = mss.m3MsgAddr 109 } 110 } 111 112 clusterClient := memcluster.New(kv.NewOverrideOptions()) 113 initialPlacement := makePlacement(initialInstanceConfig, numTotalShards) 114 finalPlacement := makePlacement(finalInstanceConfig, numTotalShards) 115 setPlacement(t, placementKey, clusterClient, initialPlacement) 116 topicService, err := initializeTopic(defaultTopicName, clusterClient, numTotalShards) 117 require.NoError(t, err) 118 119 // Election cluster setup. 120 electionCluster := newTestCluster(t) 121 122 // Admin client connection options setup. 123 connectionOpts := aggclient.NewConnectionOptions(). 124 SetInitReconnectThreshold(1). 125 SetMaxReconnectThreshold(1). 126 SetMaxReconnectDuration(2 * time.Second). 127 SetWriteTimeout(time.Second) 128 129 // Create servers. 130 servers := make(testServerSetups, 0, len(multiServerSetup)) 131 for i, mss := range multiServerSetup { 132 instrumentOpts := instrument.NewOptions() 133 logger := instrumentOpts.Logger().With( 134 zap.String("serverAddr", mss.rawTCPAddr), 135 ) 136 instrumentOpts = instrumentOpts.SetLogger(logger) 137 serverOpts := newTestServerOptions(t). 138 SetClockOptions(clock.Options()). 139 SetInstrumentOptions(instrumentOpts). 140 SetElectionCluster(electionCluster). 141 SetRawTCPAddr(mss.rawTCPAddr). 142 SetHTTPAddr(mss.httpAddr). 143 SetM3MsgAddr(mss.m3MsgAddr). 144 SetInstanceID(initialInstanceConfig[i].instanceID). 145 SetClusterClient(clusterClient). 146 SetTopicService(topicService). 147 SetShardSetID(initialInstanceConfig[i].shardSetID). 148 SetClientConnectionOptions(connectionOpts) 149 server := newTestServerSetup(t, serverOpts) 150 servers = append(servers, server) 151 } 152 153 // Start the servers. 154 log := xtest.NewLogger(t) 155 for i, server := range servers { 156 require.NoError(t, server.startServer()) 157 log.Sugar().Infof("server %d is now up", i) 158 } 159 160 // Create client for writing to the servers. 161 client := servers.newClient(t) 162 require.NoError(t, client.connect()) 163 164 for _, server := range servers { 165 require.NoError(t, server.waitUntilLeader()) 166 } 167 168 var ( 169 idPrefix = "metric.id" 170 numIDs = 100 171 172 start1 = clock.Now() 173 stop1 = start1.Add(10 * time.Second) 174 start2 = stop1.Add(time.Minute + 2*time.Second) 175 stop2 = start2.Add(10 * time.Second) 176 finalTime = stop2.Add(time.Minute + 2*time.Second) 177 interval = 2 * time.Second 178 179 sleepDuration = time.Second 180 ) 181 ids := generateTestIDs(idPrefix, numIDs) 182 testTimedMetadataTemplate := metadata.TimedMetadata{ 183 AggregationID: maggregation.MustCompressTypes(maggregation.Sum), 184 StoragePolicy: policy.NewStoragePolicy(2*time.Second, xtime.Second, time.Hour), 185 } 186 metadataFn := func(idx int) metadataUnion { 187 timedMetadata := testTimedMetadataTemplate 188 return metadataUnion{ 189 mType: timedMetadataType, 190 timedMetadata: timedMetadata, 191 } 192 } 193 datasets := []testDataset{ 194 mustGenerateTestDataset(t, datasetGenOpts{ 195 start: start1, 196 stop: stop1, 197 interval: interval, 198 ids: ids, 199 category: timedMetric, 200 typeFn: roundRobinMetricTypeFn, 201 valueGenOpts: defaultValueGenOpts, 202 metadataFn: metadataFn, 203 }), 204 mustGenerateTestDataset(t, datasetGenOpts{ 205 start: start2, 206 stop: stop2, 207 interval: interval, 208 ids: ids, 209 category: timedMetric, 210 typeFn: roundRobinMetricTypeFn, 211 valueGenOpts: defaultValueGenOpts, 212 metadataFn: metadataFn, 213 }), 214 } 215 216 for _, data := range datasets[0] { 217 clock.SetNow(data.timestamp) 218 219 for _, mm := range data.metricWithMetadatas { 220 require.NoError(t, client.writeTimedMetricWithMetadata(mm.metric.timed, mm.metadata.timedMetadata)) 221 } 222 require.NoError(t, client.flush()) 223 224 // Give server some time to process the incoming packets. 225 time.Sleep(sleepDuration) 226 } 227 228 clock.SetNow(start2) 229 time.Sleep(waitForDataToFlush) 230 setPlacement(t, placementKey, clusterClient, finalPlacement) 231 time.Sleep(waitForDataToFlush) 232 233 for _, data := range datasets[1] { 234 clock.SetNow(data.timestamp) 235 236 for _, mm := range data.metricWithMetadatas { 237 require.NoError(t, client.writeTimedMetricWithMetadata(mm.metric.timed, mm.metadata.timedMetadata)) 238 } 239 require.NoError(t, client.flush()) 240 241 // Give server some time to process the incoming packets. 242 time.Sleep(sleepDuration) 243 } 244 245 // Move time forward and wait for flushing to happen. 246 clock.SetNow(finalTime) 247 time.Sleep(waitForDataToFlush) 248 249 // Remove all the topic consumers before closing clients and servers. This allows to close the 250 // connections between servers while they still are running. Otherwise, during server shutdown, 251 // the yet-to-be-closed servers would repeatedly try to reconnect to recently closed ones, which 252 // results in longer shutdown times. 253 require.NoError(t, removeAllTopicConsumers(topicService, defaultTopicName)) 254 255 require.NoError(t, client.close()) 256 257 // Stop the servers. 258 for i, server := range servers { 259 require.NoError(t, server.stopServer()) 260 log.Sugar().Infof("server %d is now down", i) 261 } 262 263 actual := make([]aggregated.MetricWithStoragePolicy, 0) 264 for _, server := range servers { 265 actual = append(actual, server.sortedResults()...) 266 } 267 sort.Sort(byTimeIDPolicyAscending(actual)) 268 expected := make([]aggregated.MetricWithStoragePolicy, 0) 269 for _, dataset := range datasets { 270 results := mustComputeExpectedResults(t, finalTime, dataset, servers[0].aggregatorOpts) 271 expected = append(expected, results...) 272 } 273 sort.Sort(byTimeIDPolicyAscending(expected)) 274 require.True(t, cmp.Equal(expected, actual, testCmpOpts...), cmp.Diff(expected, actual, testCmpOpts...)) 275 } 276 277 func makePlacement(instanceConfig []placementInstanceConfig, numShards int) placement.Placement { 278 instances := make([]placement.Instance, 0, len(instanceConfig)) 279 for _, config := range instanceConfig { 280 instance := config.newPlacementInstance() 281 instances = append(instances, instance) 282 } 283 return newPlacement(numShards, instances) 284 }