github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/integration/multi_server_forwarding_pipeline_test.go (about)

     1  //go:build integration
     2  // +build integration
     3  
     4  // Copyright (c) 2018 Uber Technologies, Inc.
     5  //
     6  // Permission is hereby granted, free of charge, to any person obtaining a copy
     7  // of this software and associated documentation files (the "Software"), to deal
     8  // in the Software without restriction, including without limitation the rights
     9  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    10  // copies of the Software, and to permit persons to whom the Software is
    11  // furnished to do so, subject to the following conditions:
    12  //
    13  // The above copyright notice and this permission notice shall be included in
    14  // all copies or substantial portions of the Software.
    15  //
    16  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    17  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    18  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    19  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    20  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    21  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    22  // THE SOFTWARE.
    23  
    24  package integration
    25  
    26  import (
    27  	"math"
    28  	"sort"
    29  	"sync"
    30  	"sync/atomic"
    31  	"testing"
    32  	"time"
    33  
    34  	"github.com/m3db/m3/src/aggregator/aggregation"
    35  	maggregation "github.com/m3db/m3/src/metrics/aggregation"
    36  	"github.com/m3db/m3/src/metrics/metadata"
    37  	"github.com/m3db/m3/src/metrics/metric"
    38  	"github.com/m3db/m3/src/metrics/metric/aggregated"
    39  	"github.com/m3db/m3/src/metrics/pipeline"
    40  	"github.com/m3db/m3/src/metrics/pipeline/applied"
    41  	"github.com/m3db/m3/src/metrics/policy"
    42  	"github.com/m3db/m3/src/metrics/transformation"
    43  	xtest "github.com/m3db/m3/src/x/test"
    44  	xtime "github.com/m3db/m3/src/x/time"
    45  
    46  	"github.com/google/go-cmp/cmp"
    47  	"github.com/stretchr/testify/require"
    48  )
    49  
    50  func TestMultiServerForwardingPipelineKeepNaNAggregatedValues(t *testing.T) {
    51  	testMultiServerForwardingPipeline(t, false)
    52  }
    53  
    54  func TestMultiServerForwardingPipelineDiscardNaNAggregatedValues(t *testing.T) {
    55  	testMultiServerForwardingPipeline(t, true)
    56  }
    57  
    58  func testMultiServerForwardingPipeline(t *testing.T, discardNaNAggregatedValues bool) {
    59  	if testing.Short() {
    60  		t.SkipNow()
    61  	}
    62  
    63  	testParams := newTestServerSetups(t, func(opts testServerOptions) testServerOptions {
    64  		return opts.SetDiscardNaNAggregatedValues(discardNaNAggregatedValues)
    65  	})
    66  	servers := testParams.servers
    67  	clock := testParams.clock
    68  	topicService := testParams.topicService
    69  
    70  	// Start the servers.
    71  	log := xtest.NewLogger(t)
    72  	log.Info("test forwarding pipeline")
    73  	for i, server := range servers {
    74  		require.NoError(t, server.startServer())
    75  		log.Sugar().Infof("server %d is now up", i)
    76  	}
    77  
    78  	// Create clients for writing to the servers.
    79  	client := servers.newClient(t)
    80  	require.NoError(t, client.connect())
    81  
    82  	// Waiting for two leaders to come up.
    83  	var (
    84  		leaders    = make(map[int]struct{})
    85  		leaderCh   = make(chan int, len(servers)/2)
    86  		numLeaders int32
    87  		wg         sync.WaitGroup
    88  	)
    89  	wg.Add(len(servers) / 2)
    90  	for i, server := range servers {
    91  		i, server := i, server
    92  		go func() {
    93  			if err := server.waitUntilLeader(); err == nil {
    94  				res := int(atomic.AddInt32(&numLeaders, 1))
    95  				if res <= len(servers)/2 {
    96  					leaderCh <- i
    97  					wg.Done()
    98  				}
    99  			}
   100  		}()
   101  	}
   102  	wg.Wait()
   103  	close(leaderCh)
   104  
   105  	for i := range leaderCh {
   106  		leaders[i] = struct{}{}
   107  		log.Sugar().Infof("server %d has become the leader", i)
   108  	}
   109  	log.Sugar().Infof("%d servers have become leaders", len(leaders))
   110  
   111  	var (
   112  		idPrefix        = "foo"
   113  		numIDs          = 2
   114  		start           = clock.Now()
   115  		stop            = start.Add(12 * time.Second)
   116  		interval        = time.Second
   117  		storagePolicies = policy.StoragePolicies{
   118  			policy.NewStoragePolicy(2*time.Second, xtime.Second, time.Hour),
   119  			policy.NewStoragePolicy(4*time.Second, xtime.Second, 24*time.Hour),
   120  		}
   121  	)
   122  
   123  	ids := generateTestIDs(idPrefix, numIDs)
   124  	stagedMetadatas := metadata.StagedMetadatas{
   125  		{
   126  			CutoverNanos: 0,
   127  			Tombstoned:   false,
   128  			Metadata: metadata.Metadata{
   129  				Pipelines: []metadata.PipelineMetadata{
   130  					{
   131  						AggregationID:   maggregation.DefaultID,
   132  						StoragePolicies: storagePolicies,
   133  						Pipeline: applied.NewPipeline([]applied.OpUnion{
   134  							{
   135  								Type:           pipeline.TransformationOpType,
   136  								Transformation: pipeline.TransformationOp{Type: transformation.PerSecond},
   137  							},
   138  							{
   139  								Type: pipeline.RollupOpType,
   140  								Rollup: applied.RollupOp{
   141  									ID:            []byte(pipelineRollupID),
   142  									AggregationID: maggregation.MustCompressTypes(maggregation.Sum),
   143  								},
   144  							},
   145  						}),
   146  					},
   147  				},
   148  			},
   149  		},
   150  	}
   151  	metricTypeFn := constantMetricTypeFnFactory(metric.GaugeType)
   152  	valueGenOpts := valueGenOpts{
   153  		untimed: untimedValueGenOpts{
   154  			gaugeValueGenFn: func(intervalIdx, idIdx int) float64 {
   155  				// Each gauge will have two datapoints within the same aggregation window.
   156  				// The first value is 0.0 and should be ignored, and the second value will
   157  				// be used for computing the `PerSecond` value and should result in a `PerSecond`
   158  				// value of 1 that is then forwarded to the next aggregation server.
   159  				if intervalIdx%2 == 0 {
   160  					return 0.0
   161  				}
   162  				return float64(intervalIdx + idIdx)
   163  			},
   164  		},
   165  	}
   166  	metadataFn := func(idx int) metadataUnion {
   167  		return metadataUnion{
   168  			mType:           stagedMetadatasType,
   169  			stagedMetadatas: stagedMetadatas,
   170  		}
   171  	}
   172  	dataset := mustGenerateTestDataset(t, datasetGenOpts{
   173  		start:        start,
   174  		stop:         stop,
   175  		interval:     interval,
   176  		ids:          ids,
   177  		category:     untimedMetric,
   178  		typeFn:       metricTypeFn,
   179  		valueGenOpts: valueGenOpts,
   180  		metadataFn:   metadataFn,
   181  	})
   182  
   183  	for _, data := range dataset {
   184  		clock.SetNow(data.timestamp)
   185  
   186  		for _, mm := range data.metricWithMetadatas {
   187  			require.NoError(t, client.writeUntimedMetricWithMetadatas(mm.metric.untimed, mm.metadata.stagedMetadatas))
   188  		}
   189  		require.NoError(t, client.flush())
   190  
   191  		// Give server some time to process the incoming packets.
   192  		time.Sleep(time.Second)
   193  	}
   194  
   195  	// Move time forward using the larger resolution and wait for flushing to happen
   196  	// at the originating server (where the raw metrics are aggregated).
   197  	originatingServerflushTime := stop.Add(2 * storagePolicies[1].Resolution().Window)
   198  	for currTime := stop; !currTime.After(originatingServerflushTime); currTime = currTime.Add(time.Second) {
   199  		clock.SetNow(currTime)
   200  		time.Sleep(time.Second)
   201  	}
   202  
   203  	// Move time forward using the larger resolution again and wait for flushing to
   204  	// happen at the destination server (where the rollup metrics are aggregated).
   205  	destinationServerflushTime := originatingServerflushTime.Add(2 * storagePolicies[1].Resolution().Window)
   206  	for currTime := originatingServerflushTime; !currTime.After(destinationServerflushTime); currTime = currTime.Add(time.Second) {
   207  		clock.SetNow(currTime)
   208  		time.Sleep(time.Second)
   209  	}
   210  
   211  	// Remove all the topic consumers before closing clients and servers. This allows to close the
   212  	// connections between servers while they still are running. Otherwise, during server shutdown,
   213  	// the yet-to-be-closed servers would repeatedly try to reconnect to recently closed ones, which
   214  	// results in longer shutdown times.
   215  	require.NoError(t, removeAllTopicConsumers(topicService, defaultTopicName))
   216  
   217  	// Stop the client.
   218  	require.NoError(t, client.close())
   219  
   220  	// Stop the servers.
   221  	for i, server := range servers {
   222  		require.NoError(t, server.stopServer())
   223  		log.Sugar().Infof("server %d is now down", i)
   224  	}
   225  
   226  	// Validate results.
   227  	var destinationServer *testServerSetup
   228  	if _, exists := leaders[2]; exists {
   229  		destinationServer = servers[2]
   230  	} else if _, exists = leaders[3]; exists {
   231  		destinationServer = servers[3]
   232  	} else {
   233  		require.Fail(t, "there must exist a leader between server 2 and server 3")
   234  	}
   235  
   236  	aggregatorOpts := destinationServer.aggregatorOpts
   237  	expectedMetricKeyList := []metricKey{
   238  		{
   239  			category:      forwardedMetric,
   240  			typ:           metric.GaugeType,
   241  			id:            pipelineRollupID,
   242  			storagePolicy: storagePolicies[0],
   243  		},
   244  		{
   245  			category:      forwardedMetric,
   246  			typ:           metric.GaugeType,
   247  			id:            pipelineRollupID,
   248  			storagePolicy: storagePolicies[1],
   249  		},
   250  	}
   251  	// Expected results for 2s:1h storage policy.
   252  	expectedValuesByTimeList := []valuesByTime{
   253  		make(valuesByTime),
   254  		make(valuesByTime),
   255  	}
   256  	expectedValuesList := [][]float64{
   257  		{
   258  			math.NaN(),
   259  			float64(numIDs),
   260  			float64(numIDs),
   261  			float64(numIDs),
   262  			float64(numIDs),
   263  			float64(numIDs),
   264  		},
   265  		{
   266  			math.NaN(),
   267  			float64(numIDs),
   268  			float64(numIDs),
   269  		},
   270  	}
   271  	for spIdx := 0; spIdx < len(storagePolicies); spIdx++ {
   272  		storagePolicy := storagePolicies[spIdx]
   273  		for i := 0; i < len(expectedValuesList[spIdx]); i++ {
   274  			if discardNaNAggregatedValues && math.IsNaN(expectedValuesList[spIdx][i]) {
   275  				continue
   276  			}
   277  			currTime := start.Add(time.Duration(i+1) * storagePolicy.Resolution().Window)
   278  			instrumentOpts := aggregatorOpts.InstrumentOptions()
   279  			agg := aggregation.NewGauge(aggregation.NewOptions(instrumentOpts))
   280  			expectedAnnotation := generateAnnotation(metric.GaugeType, numIDs-1)
   281  			agg.Update(time.Now(), expectedValuesList[spIdx][i], expectedAnnotation)
   282  			expectedValuesByTimeList[spIdx][currTime.UnixNano()] = agg
   283  		}
   284  	}
   285  
   286  	var expectedResultsFlattened []aggregated.MetricWithStoragePolicy
   287  	for i := 0; i < len(storagePolicies); i++ {
   288  		expectedDatapointsByID := datapointsByID{
   289  			expectedMetricKeyList[i]: expectedValuesByTimeList[i],
   290  		}
   291  		expectedBuckets := []aggregationBucket{
   292  			{
   293  				key: aggregationKey{
   294  					aggregationID: maggregation.MustCompressTypes(maggregation.Sum),
   295  					storagePolicy: storagePolicies[i],
   296  				},
   297  				data: expectedDatapointsByID,
   298  			},
   299  		}
   300  		expectedResults, err := computeExpectedAggregationOutput(
   301  			destinationServerflushTime,
   302  			expectedBuckets,
   303  			aggregatorOpts,
   304  		)
   305  		require.NoError(t, err)
   306  		expectedResultsFlattened = append(expectedResultsFlattened, expectedResults...)
   307  	}
   308  	sort.Sort(byTimeIDPolicyAscending(expectedResultsFlattened))
   309  	actual := destinationServer.sortedResults()
   310  	require.True(t, cmp.Equal(expectedResultsFlattened, actual, testCmpOpts...))
   311  }