github.com/m3db/m3@v1.5.0/src/integration/aggregator/aggregator.go (about)

     1  // Copyright (c) 2021  Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  // Package aggregator contains integration tests for aggregators.
    22  package aggregator
    23  
    24  import (
    25  	"errors"
    26  	"fmt"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/stretchr/testify/require"
    31  
    32  	"github.com/m3db/m3/src/dbnode/generated/thrift/rpc"
    33  	"github.com/m3db/m3/src/integration/resources"
    34  	"github.com/m3db/m3/src/query/generated/proto/prompb"
    35  	"github.com/m3db/m3/src/x/headers"
    36  )
    37  
    38  const (
    39  	// TestAggregatorDBNodeConfig is the test config for the dbnode.
    40  	TestAggregatorDBNodeConfig = `
    41  db: {}
    42  coordinator: {}
    43  `
    44  
    45  	// TestAggregatorCoordinatorConfig is the test config for the coordinator.
    46  	TestAggregatorCoordinatorConfig = `
    47  listenAddress: 0.0.0.0:7202
    48  metrics:
    49    scope:
    50      prefix: "coordinator"
    51    prometheus:
    52      handlerPath: /metrics
    53      listenAddress: 0.0.0.0:7303
    54    sanitization: prometheus
    55    samplingRate: 1.0
    56    extended: none
    57  carbon:
    58    ingester:
    59      listenAddress: "0.0.0.0:7204"
    60      rules:
    61        - pattern: .*
    62          aggregation:
    63            type: mean
    64          policies:
    65            - resolution: 5s
    66              retention: 6h
    67  downsample:
    68    rules:
    69      rollupRules:
    70        - name: "requests per second by status code"
    71          filter: "__name__:http_requests app:* status_code:* endpoint:*"
    72          transforms:
    73            - transform:
    74                type: "PerSecond"
    75            - rollup:
    76                metricName: "http_requests_by_status_code"
    77                groupBy: ["app", "status_code", "endpoint"]
    78                aggregations: ["Sum"]
    79          storagePolicies:
    80            - resolution: 5s
    81              retention: 6h
    82    remoteAggregator:
    83      client:
    84        type: m3msg
    85        m3msg:
    86          producer:
    87            writer:
    88              topicName: aggregator_ingest
    89              topicServiceOverride:
    90                zone: embedded
    91                environment: default_env
    92              placement:
    93                isStaged: true
    94              placementServiceOverride:
    95                namespaces:
    96                  placement: /placement
    97              connection:
    98                numConnections: 4
    99              messagePool:
   100                size: 16384
   101                watermark:
   102                  low: 0.2
   103                  high: 0.5
   104  ingest:
   105    ingester:
   106      workerPoolSize: 10000
   107      opPool:
   108        size: 10000
   109      retry:
   110        maxRetries: 3
   111        jitter: true
   112      logSampleRate: 0.01
   113    m3msg:
   114      server:
   115        listenAddress: "0.0.0.0:7507"
   116        retry:
   117          maxBackoff: 10s
   118          jitter: true
   119  storeMetricsType: true
   120  `
   121  
   122  	// TestAggregatorAggregatorConfig is the test config for the aggregators.
   123  	TestAggregatorAggregatorConfig = `
   124  `
   125  
   126  	// defaultCarbonPort is the default port of coordinator to receive carbon metrics.
   127  	defaultCarbonPort = 7204
   128  )
   129  
   130  var (
   131  	errEmptyResult = errors.New("empty query result")
   132  	errQueryResult = errors.New("wrong query result")
   133  )
   134  
   135  // RunTest contains the logic for running the aggregator test.
   136  func RunTest(t *testing.T, m3 resources.M3Resources) {
   137  	t.Run("test_aggregated_graphite_metric", func(t *testing.T) {
   138  		testAggregatedGraphiteMetric(t, m3)
   139  	})
   140  
   141  	t.Run("test_rollup_rule", func(t *testing.T) {
   142  		testRollupRule(t, m3)
   143  	})
   144  
   145  	t.Run("test_metric_type_survives_aggregation", func(t *testing.T) {
   146  		testMetricTypeSurvivesAggregation(t, m3)
   147  	})
   148  }
   149  
   150  // testAggregatedGraphiteMetric tests the write and read of aggregated graphtie metrics.
   151  func testAggregatedGraphiteMetric(t *testing.T, m3 resources.M3Resources) {
   152  	var (
   153  		carbonName         = "foo.bar.baz"
   154  		carbonTarget       = "foo.bar.*"
   155  		carbonLow          = float64(40)
   156  		carbonHigh         = float64(44)
   157  		expectedCarbonMean = float64(42)
   158  	)
   159  
   160  	doneCh := make(chan struct{})
   161  	defer func() {
   162  		doneCh <- struct{}{}
   163  		close(doneCh)
   164  	}()
   165  	go func() {
   166  		for {
   167  			select {
   168  			case <-doneCh:
   169  				return
   170  			default:
   171  				require.NoError(t, m3.Coordinator().WriteCarbon(defaultCarbonPort, carbonName, carbonLow, time.Now()))
   172  				require.NoError(t, m3.Coordinator().WriteCarbon(defaultCarbonPort, carbonName, carbonHigh, time.Now()))
   173  				time.Sleep(1 * time.Second)
   174  			}
   175  		}
   176  	}()
   177  
   178  	require.NoError(t, resources.RetryWithMaxTime(func() error {
   179  		return verifyGraphiteQuery(m3, carbonTarget, expectedCarbonMean)
   180  	}, 2*time.Minute))
   181  }
   182  
   183  func verifyGraphiteQuery(m3 resources.M3Resources, target string, expected float64) error {
   184  	datapoints, err := m3.Coordinator().GraphiteQuery(resources.GraphiteQueryRequest{
   185  		Target: target,
   186  		From:   time.Now().Add(-1000 * time.Second),
   187  		Until:  time.Now(),
   188  	})
   189  	if err != nil {
   190  		return err
   191  	}
   192  	nonNullDPs := filterNull(datapoints)
   193  	if len(nonNullDPs) == 0 {
   194  		return errEmptyResult
   195  	}
   196  	if v := *nonNullDPs[0].Value; v != expected {
   197  		return fmt.Errorf("wrong datapoint result: expected=%f, actual=%f", expected, v)
   198  	}
   199  	return nil
   200  }
   201  
   202  func filterNull(datapoints []resources.Datapoint) []resources.Datapoint {
   203  	nonNull := make([]resources.Datapoint, 0, len(datapoints))
   204  	for _, dp := range datapoints {
   205  		if dp.Value != nil {
   206  			nonNull = append(nonNull, dp)
   207  		}
   208  	}
   209  	return nonNull
   210  }
   211  
   212  // testRollupRule tests metrics aggregated with a rollup rule.
   213  func testRollupRule(t *testing.T, m3 resources.M3Resources) {
   214  	var (
   215  		numDatapoints = 5
   216  		resolutionSec = 5
   217  		nowTime       = time.Now()
   218  		initWriteTime = nowTime.Truncate(time.Duration(resolutionSec) * time.Second)
   219  		metricName    = "http_requests"
   220  
   221  		initVal1 = 42
   222  		valRate1 = 22
   223  		valInc1  = valRate1 * resolutionSec
   224  		tags1    = map[string]string{
   225  			"app":         "nginx_edge",
   226  			"status_code": "500",
   227  			"endpoint":    "/foo/bar",
   228  		}
   229  
   230  		initVal2 = 84
   231  		valRate2 = 4
   232  		valInc2  = valRate2 * resolutionSec
   233  		tags2    = map[string]string{
   234  			"app":         "nginx_edge",
   235  			"status_code": "500",
   236  			"endpoint":    "/foo/baz",
   237  		}
   238  	)
   239  
   240  	for i := 0; i < numDatapoints; i++ {
   241  		err := m3.Coordinator().WriteProm(
   242  			metricName,
   243  			tags1,
   244  			[]prompb.Sample{{
   245  				Value:     float64(initVal1 + i*valInc1),
   246  				Timestamp: initWriteTime.Add(time.Duration(i*resolutionSec)*time.Second).Unix() * 1000,
   247  			}},
   248  			resources.Headers{headers.PromTypeHeader: []string{"counter"}},
   249  		)
   250  		require.NoError(t, err)
   251  	}
   252  
   253  	for i := 0; i < numDatapoints; i++ {
   254  		err := m3.Coordinator().WriteProm(
   255  			metricName,
   256  			tags2,
   257  			[]prompb.Sample{{
   258  				Value:     float64(initVal2 + i*valInc2),
   259  				Timestamp: initWriteTime.Add(time.Duration(i*resolutionSec)*time.Second).Unix() * 1000,
   260  			}},
   261  			resources.Headers{headers.PromTypeHeader: []string{"gauge"}},
   262  		)
   263  		require.NoError(t, err)
   264  	}
   265  
   266  	require.NoError(t, resources.RetryWithMaxTime(func() error {
   267  		return verifyPromQuery(
   268  			m3,
   269  			`http_requests_by_status_code{endpoint="/foo/bar"}`,
   270  			float64(valRate1),
   271  		)
   272  	}, 2*time.Minute))
   273  
   274  	require.NoError(t, resources.RetryWithMaxTime(func() error {
   275  		return verifyPromQuery(
   276  			m3,
   277  			`http_requests_by_status_code{endpoint="/foo/baz"}`,
   278  			float64(valRate2),
   279  		)
   280  	}, 2*time.Minute))
   281  }
   282  
   283  func verifyPromQuery(
   284  	m3 resources.M3Resources,
   285  	queryStr string,
   286  	expected float64,
   287  ) error {
   288  	results, err := m3.Coordinator().RangeQuery(
   289  		resources.RangeQueryRequest{
   290  			Query: queryStr,
   291  			Start: time.Now().Add(-1 * time.Hour),
   292  			End:   time.Now().Add(1 * time.Hour),
   293  			Step:  30 * time.Second,
   294  		},
   295  		map[string][]string{
   296  			"M3-Metrics-Type":   {"aggregated"},
   297  			"M3-Storage-Policy": {"5s:6h"},
   298  		},
   299  	)
   300  	if err != nil {
   301  		return err
   302  	}
   303  	if len(results) == 0 {
   304  		return errEmptyResult
   305  	}
   306  	if len(results) > 1 {
   307  		return errors.New("more results than expected")
   308  	}
   309  	if v := float64(results[0].Values[0].Value); v != expected {
   310  		return fmt.Errorf("wrong datapoint result: expected=%f, actual=%f", expected, v)
   311  	}
   312  	return nil
   313  }
   314  
   315  // testMetricTypeSurvivesAggregation verifies that the metric type information
   316  // is stored in db after the aggregation.
   317  func testMetricTypeSurvivesAggregation(t *testing.T, m3 resources.M3Resources) {
   318  	nowTime := time.Now()
   319  	value := 42
   320  	metricName := "metric_type_test"
   321  
   322  	require.NoError(t, m3.Coordinator().WriteProm(
   323  		metricName,
   324  		map[string]string{
   325  			"label0": "label0",
   326  			"label1": "label1",
   327  			"label2": "label2",
   328  		},
   329  		[]prompb.Sample{{
   330  			Value:     float64(value),
   331  			Timestamp: nowTime.Unix() * 1000,
   332  		}},
   333  		resources.Headers{headers.PromTypeHeader: []string{"counter"}},
   334  	))
   335  
   336  	node := m3.Nodes()[0]
   337  	require.NoError(t, resources.Retry(func() error {
   338  		res, err := node.Fetch(&rpc.FetchRequest{
   339  			NameSpace:  "aggregated",
   340  			ID:         `{__name__="metric_type_test",label0="label0",label1="label1",label2="label2"}`,
   341  			RangeStart: nowTime.Add(-1 * time.Hour).Unix(),
   342  			RangeEnd:   nowTime.Add(time.Hour).Unix(),
   343  		})
   344  		if err != nil {
   345  			return err
   346  		}
   347  		if len(res.Datapoints) == 0 {
   348  			return errEmptyResult
   349  		}
   350  		if len(res.Datapoints[0].Annotation) == 0 {
   351  			return errQueryResult
   352  		}
   353  		if res.Datapoints[0].Value != float64(value) {
   354  			return errQueryResult
   355  		}
   356  		return nil
   357  	}))
   358  }