github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/integration/aggregator/aggregator.go (about)

     1  // Copyright (c) 2021  Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  // Package aggregator contains integration tests for aggregators.
    22  package aggregator
    23  
    24  import (
    25  	"errors"
    26  	"fmt"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/m3db/m3/src/dbnode/generated/thrift/rpc"
    31  	"github.com/m3db/m3/src/integration/resources"
    32  	"github.com/m3db/m3/src/query/generated/proto/prompb"
    33  	"github.com/m3db/m3/src/x/headers"
    34  
    35  	"github.com/stretchr/testify/require"
    36  )
    37  
    38  const (
    39  	// TestAggregatorDBNodeConfig is the test config for the dbnode.
    40  	TestAggregatorDBNodeConfig = `
    41  db: {}
    42  coordinator: {}
    43  `
    44  
    45  	// TestAggregatorCoordinatorConfig is the test config for the coordinator.
    46  	TestAggregatorCoordinatorConfig = `
    47  listenAddress: 0.0.0.0:7202
    48  metrics:
    49    scope:
    50      prefix: "coordinator"
    51    prometheus:
    52      handlerPath: /metrics
    53      listenAddress: 0.0.0.0:7303
    54    sanitization: prometheus
    55    samplingRate: 1.0
    56    extended: none
    57  carbon:
    58    ingester:
    59      listenAddress: "0.0.0.0:7204"
    60      rules:
    61        - pattern: .*
    62          aggregation:
    63            type: mean
    64          policies:
    65            - resolution: 5s
    66              retention: 6h
    67  downsample:
    68    rules:
    69      rollupRules:
    70        - name: "requests per second by status code"
    71          filter: "__name__:http_requests app:* status_code:* endpoint:*"
    72          transforms:
    73            - transform:
    74                type: "PerSecond"
    75            - rollup:
    76                metricName: "http_requests_by_status_code"
    77                groupBy: ["app", "status_code", "endpoint"]
    78                aggregations: ["Sum"]
    79          storagePolicies:
    80            - resolution: 5s
    81              retention: 6h
    82    remoteAggregator:
    83      client:
    84        type: m3msg
    85        m3msg:
    86          producer:
    87            writer:
    88              topicName: aggregator_ingest
    89              topicServiceOverride:
    90                zone: embedded
    91                environment: default_env
    92              placement:
    93                isStaged: true
    94              placementServiceOverride:
    95                namespaces:
    96                  placement: /placement
    97              connection:
    98                numConnections: 4
    99              messagePool:
   100                size: 16384
   101                watermark:
   102                  low: 0.2
   103                  high: 0.5
   104  ingest:
   105    ingester:
   106      workerPoolSize: 10000
   107      opPool:
   108        size: 10000
   109      retry:
   110        maxRetries: 3
   111        jitter: true
   112      logSampleRate: 0.01
   113    m3msg:
   114      server:
   115        listenAddress: "0.0.0.0:7507"
   116        retry:
   117          maxBackoff: 10s
   118          jitter: true
   119  storeMetricsType: true
   120  
   121  clusterManagement:
   122    etcd:
   123      env: default_env
   124      zone: embedded
   125      service: m3db
   126      cacheDir: /var/lib/m3kv
   127      etcdClusters:
   128      - zone: embedded
   129        endpoints:
   130        - 127.0.0.1:2379
   131  `
   132  
   133  	// TestAggregatorAggregatorConfig is the test config for the aggregators.
   134  	TestAggregatorAggregatorConfig = `
   135  `
   136  
   137  	// defaultCarbonPort is the default port of coordinator to receive carbon metrics.
   138  	defaultCarbonPort = 7204
   139  )
   140  
   141  var (
   142  	errEmptyResult = errors.New("empty query result")
   143  	errQueryResult = errors.New("wrong query result")
   144  )
   145  
   146  // RunTest contains the logic for running the aggregator test.
   147  func RunTest(t *testing.T, m3 resources.M3Resources) {
   148  	t.Run("test_aggregated_graphite_metric", func(t *testing.T) {
   149  		testAggregatedGraphiteMetric(t, m3)
   150  	})
   151  
   152  	t.Run("test_rollup_rule", func(t *testing.T) {
   153  		testRollupRule(t, m3)
   154  	})
   155  
   156  	t.Run("test_metric_type_survives_aggregation", func(t *testing.T) {
   157  		testMetricTypeSurvivesAggregation(t, m3)
   158  	})
   159  }
   160  
   161  // testAggregatedGraphiteMetric tests the write and read of aggregated graphtie metrics.
   162  func testAggregatedGraphiteMetric(t *testing.T, m3 resources.M3Resources) {
   163  	var (
   164  		carbonName         = "foo.bar.baz"
   165  		carbonTarget       = "foo.bar.*"
   166  		carbonLow          = float64(40)
   167  		carbonHigh         = float64(44)
   168  		expectedCarbonMean = float64(42)
   169  	)
   170  
   171  	doneCh := make(chan struct{})
   172  	defer func() {
   173  		doneCh <- struct{}{}
   174  		close(doneCh)
   175  	}()
   176  	go func() {
   177  		for {
   178  			select {
   179  			case <-doneCh:
   180  				return
   181  			default:
   182  				require.NoError(t, m3.Coordinator().WriteCarbon(defaultCarbonPort, carbonName, carbonLow, time.Now()))
   183  				require.NoError(t, m3.Coordinator().WriteCarbon(defaultCarbonPort, carbonName, carbonHigh, time.Now()))
   184  				time.Sleep(1 * time.Second)
   185  			}
   186  		}
   187  	}()
   188  
   189  	require.NoError(t, resources.RetryWithMaxTime(func() error {
   190  		return verifyGraphiteQuery(m3, carbonTarget, expectedCarbonMean)
   191  	}, 2*time.Minute))
   192  }
   193  
   194  func verifyGraphiteQuery(m3 resources.M3Resources, target string, expected float64) error {
   195  	datapoints, err := m3.Coordinator().GraphiteQuery(resources.GraphiteQueryRequest{
   196  		Target: target,
   197  		From:   time.Now().Add(-1000 * time.Second),
   198  		Until:  time.Now(),
   199  	})
   200  	if err != nil {
   201  		return err
   202  	}
   203  	nonNullDPs := filterNull(datapoints)
   204  	if len(nonNullDPs) == 0 {
   205  		return errEmptyResult
   206  	}
   207  	if v := *nonNullDPs[0].Value; v != expected {
   208  		return fmt.Errorf("wrong datapoint result: expected=%f, actual=%f", expected, v)
   209  	}
   210  	return nil
   211  }
   212  
   213  func filterNull(datapoints []resources.Datapoint) []resources.Datapoint {
   214  	nonNull := make([]resources.Datapoint, 0, len(datapoints))
   215  	for _, dp := range datapoints {
   216  		if dp.Value != nil {
   217  			nonNull = append(nonNull, dp)
   218  		}
   219  	}
   220  	return nonNull
   221  }
   222  
   223  // testRollupRule tests metrics aggregated with a rollup rule.
   224  func testRollupRule(t *testing.T, m3 resources.M3Resources) {
   225  	var (
   226  		numDatapoints = 5
   227  		resolutionSec = 5
   228  		nowTime       = time.Now()
   229  		initWriteTime = nowTime.Truncate(time.Duration(resolutionSec) * time.Second)
   230  		metricName    = "http_requests"
   231  
   232  		initVal1 = 42
   233  		valRate1 = 22
   234  		valInc1  = valRate1 * resolutionSec
   235  		tags1    = map[string]string{
   236  			"app":         "nginx_edge",
   237  			"status_code": "500",
   238  			"endpoint":    "/foo/bar",
   239  		}
   240  
   241  		initVal2 = 84
   242  		valRate2 = 4
   243  		valInc2  = valRate2 * resolutionSec
   244  		tags2    = map[string]string{
   245  			"app":         "nginx_edge",
   246  			"status_code": "500",
   247  			"endpoint":    "/foo/baz",
   248  		}
   249  	)
   250  
   251  	for i := 0; i < numDatapoints; i++ {
   252  		err := m3.Coordinator().WriteProm(
   253  			metricName,
   254  			tags1,
   255  			[]prompb.Sample{{
   256  				Value:     float64(initVal1 + i*valInc1),
   257  				Timestamp: initWriteTime.Add(time.Duration(i*resolutionSec)*time.Second).Unix() * 1000,
   258  			}},
   259  			resources.Headers{headers.PromTypeHeader: []string{"counter"}},
   260  		)
   261  		require.NoError(t, err)
   262  	}
   263  
   264  	for i := 0; i < numDatapoints; i++ {
   265  		err := m3.Coordinator().WriteProm(
   266  			metricName,
   267  			tags2,
   268  			[]prompb.Sample{{
   269  				Value:     float64(initVal2 + i*valInc2),
   270  				Timestamp: initWriteTime.Add(time.Duration(i*resolutionSec)*time.Second).Unix() * 1000,
   271  			}},
   272  			resources.Headers{headers.PromTypeHeader: []string{"gauge"}},
   273  		)
   274  		require.NoError(t, err)
   275  	}
   276  
   277  	require.NoError(t, resources.RetryWithMaxTime(func() error {
   278  		return verifyPromQuery(
   279  			m3,
   280  			`http_requests_by_status_code{endpoint="/foo/bar"}`,
   281  			float64(valRate1),
   282  		)
   283  	}, 2*time.Minute))
   284  
   285  	require.NoError(t, resources.RetryWithMaxTime(func() error {
   286  		return verifyPromQuery(
   287  			m3,
   288  			`http_requests_by_status_code{endpoint="/foo/baz"}`,
   289  			float64(valRate2),
   290  		)
   291  	}, 2*time.Minute))
   292  }
   293  
   294  func verifyPromQuery(
   295  	m3 resources.M3Resources,
   296  	queryStr string,
   297  	expected float64,
   298  ) error {
   299  	results, err := m3.Coordinator().RangeQuery(
   300  		resources.RangeQueryRequest{
   301  			Query: queryStr,
   302  			Start: time.Now().Add(-1 * time.Hour),
   303  			End:   time.Now().Add(1 * time.Hour),
   304  			Step:  30 * time.Second,
   305  		},
   306  		map[string][]string{
   307  			"M3-Metrics-Type":   {"aggregated"},
   308  			"M3-Storage-Policy": {"5s:6h"},
   309  		},
   310  	)
   311  	if err != nil {
   312  		return err
   313  	}
   314  	if len(results) == 0 {
   315  		return errEmptyResult
   316  	}
   317  	if len(results) > 1 {
   318  		return errors.New("more results than expected")
   319  	}
   320  	if v := float64(results[0].Values[0].Value); v != expected {
   321  		return fmt.Errorf("wrong datapoint result: expected=%f, actual=%f", expected, v)
   322  	}
   323  	return nil
   324  }
   325  
   326  // testMetricTypeSurvivesAggregation verifies that the metric type information
   327  // is stored in db after the aggregation.
   328  func testMetricTypeSurvivesAggregation(t *testing.T, m3 resources.M3Resources) {
   329  	nowTime := time.Now()
   330  	value := 42
   331  	metricName := "metric_type_test"
   332  
   333  	require.NoError(t, m3.Coordinator().WriteProm(
   334  		metricName,
   335  		map[string]string{
   336  			"label0": "label0",
   337  			"label1": "label1",
   338  			"label2": "label2",
   339  		},
   340  		[]prompb.Sample{{
   341  			Value:     float64(value),
   342  			Timestamp: nowTime.Unix() * 1000,
   343  		}},
   344  		resources.Headers{headers.PromTypeHeader: []string{"counter"}},
   345  	))
   346  
   347  	node := m3.Nodes()[0]
   348  	require.NoError(t, resources.Retry(func() error {
   349  		res, err := node.Fetch(&rpc.FetchRequest{
   350  			NameSpace:  "aggregated",
   351  			ID:         `{__name__="metric_type_test",label0="label0",label1="label1",label2="label2"}`,
   352  			RangeStart: nowTime.Add(-1 * time.Hour).Unix(),
   353  			RangeEnd:   nowTime.Add(time.Hour).Unix(),
   354  		})
   355  		if err != nil {
   356  			return err
   357  		}
   358  		if len(res.Datapoints) == 0 {
   359  			return errEmptyResult
   360  		}
   361  		if len(res.Datapoints[0].Annotation) == 0 {
   362  			return errQueryResult
   363  		}
   364  		if res.Datapoints[0].Value != float64(value) {
   365  			return errQueryResult
   366  		}
   367  		return nil
   368  	}))
   369  }