github.com/m3db/m3@v1.5.0/src/integration/aggregator/aggregator.go (about) 1 // Copyright (c) 2021 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 // Package aggregator contains integration tests for aggregators. 22 package aggregator 23 24 import ( 25 "errors" 26 "fmt" 27 "testing" 28 "time" 29 30 "github.com/stretchr/testify/require" 31 32 "github.com/m3db/m3/src/dbnode/generated/thrift/rpc" 33 "github.com/m3db/m3/src/integration/resources" 34 "github.com/m3db/m3/src/query/generated/proto/prompb" 35 "github.com/m3db/m3/src/x/headers" 36 ) 37 38 const ( 39 // TestAggregatorDBNodeConfig is the test config for the dbnode. 40 TestAggregatorDBNodeConfig = ` 41 db: {} 42 coordinator: {} 43 ` 44 45 // TestAggregatorCoordinatorConfig is the test config for the coordinator. 46 TestAggregatorCoordinatorConfig = ` 47 listenAddress: 0.0.0.0:7202 48 metrics: 49 scope: 50 prefix: "coordinator" 51 prometheus: 52 handlerPath: /metrics 53 listenAddress: 0.0.0.0:7303 54 sanitization: prometheus 55 samplingRate: 1.0 56 extended: none 57 carbon: 58 ingester: 59 listenAddress: "0.0.0.0:7204" 60 rules: 61 - pattern: .* 62 aggregation: 63 type: mean 64 policies: 65 - resolution: 5s 66 retention: 6h 67 downsample: 68 rules: 69 rollupRules: 70 - name: "requests per second by status code" 71 filter: "__name__:http_requests app:* status_code:* endpoint:*" 72 transforms: 73 - transform: 74 type: "PerSecond" 75 - rollup: 76 metricName: "http_requests_by_status_code" 77 groupBy: ["app", "status_code", "endpoint"] 78 aggregations: ["Sum"] 79 storagePolicies: 80 - resolution: 5s 81 retention: 6h 82 remoteAggregator: 83 client: 84 type: m3msg 85 m3msg: 86 producer: 87 writer: 88 topicName: aggregator_ingest 89 topicServiceOverride: 90 zone: embedded 91 environment: default_env 92 placement: 93 isStaged: true 94 placementServiceOverride: 95 namespaces: 96 placement: /placement 97 connection: 98 numConnections: 4 99 messagePool: 100 size: 16384 101 watermark: 102 low: 0.2 103 high: 0.5 104 ingest: 105 ingester: 106 workerPoolSize: 10000 107 opPool: 108 size: 10000 109 retry: 110 maxRetries: 3 111 jitter: true 112 logSampleRate: 0.01 113 m3msg: 114 server: 115 listenAddress: "0.0.0.0:7507" 116 retry: 117 maxBackoff: 10s 118 jitter: true 119 storeMetricsType: true 120 ` 121 122 // TestAggregatorAggregatorConfig is the test config for the aggregators. 123 TestAggregatorAggregatorConfig = ` 124 ` 125 126 // defaultCarbonPort is the default port of coordinator to receive carbon metrics. 127 defaultCarbonPort = 7204 128 ) 129 130 var ( 131 errEmptyResult = errors.New("empty query result") 132 errQueryResult = errors.New("wrong query result") 133 ) 134 135 // RunTest contains the logic for running the aggregator test. 136 func RunTest(t *testing.T, m3 resources.M3Resources) { 137 t.Run("test_aggregated_graphite_metric", func(t *testing.T) { 138 testAggregatedGraphiteMetric(t, m3) 139 }) 140 141 t.Run("test_rollup_rule", func(t *testing.T) { 142 testRollupRule(t, m3) 143 }) 144 145 t.Run("test_metric_type_survives_aggregation", func(t *testing.T) { 146 testMetricTypeSurvivesAggregation(t, m3) 147 }) 148 } 149 150 // testAggregatedGraphiteMetric tests the write and read of aggregated graphtie metrics. 151 func testAggregatedGraphiteMetric(t *testing.T, m3 resources.M3Resources) { 152 var ( 153 carbonName = "foo.bar.baz" 154 carbonTarget = "foo.bar.*" 155 carbonLow = float64(40) 156 carbonHigh = float64(44) 157 expectedCarbonMean = float64(42) 158 ) 159 160 doneCh := make(chan struct{}) 161 defer func() { 162 doneCh <- struct{}{} 163 close(doneCh) 164 }() 165 go func() { 166 for { 167 select { 168 case <-doneCh: 169 return 170 default: 171 require.NoError(t, m3.Coordinator().WriteCarbon(defaultCarbonPort, carbonName, carbonLow, time.Now())) 172 require.NoError(t, m3.Coordinator().WriteCarbon(defaultCarbonPort, carbonName, carbonHigh, time.Now())) 173 time.Sleep(1 * time.Second) 174 } 175 } 176 }() 177 178 require.NoError(t, resources.RetryWithMaxTime(func() error { 179 return verifyGraphiteQuery(m3, carbonTarget, expectedCarbonMean) 180 }, 2*time.Minute)) 181 } 182 183 func verifyGraphiteQuery(m3 resources.M3Resources, target string, expected float64) error { 184 datapoints, err := m3.Coordinator().GraphiteQuery(resources.GraphiteQueryRequest{ 185 Target: target, 186 From: time.Now().Add(-1000 * time.Second), 187 Until: time.Now(), 188 }) 189 if err != nil { 190 return err 191 } 192 nonNullDPs := filterNull(datapoints) 193 if len(nonNullDPs) == 0 { 194 return errEmptyResult 195 } 196 if v := *nonNullDPs[0].Value; v != expected { 197 return fmt.Errorf("wrong datapoint result: expected=%f, actual=%f", expected, v) 198 } 199 return nil 200 } 201 202 func filterNull(datapoints []resources.Datapoint) []resources.Datapoint { 203 nonNull := make([]resources.Datapoint, 0, len(datapoints)) 204 for _, dp := range datapoints { 205 if dp.Value != nil { 206 nonNull = append(nonNull, dp) 207 } 208 } 209 return nonNull 210 } 211 212 // testRollupRule tests metrics aggregated with a rollup rule. 213 func testRollupRule(t *testing.T, m3 resources.M3Resources) { 214 var ( 215 numDatapoints = 5 216 resolutionSec = 5 217 nowTime = time.Now() 218 initWriteTime = nowTime.Truncate(time.Duration(resolutionSec) * time.Second) 219 metricName = "http_requests" 220 221 initVal1 = 42 222 valRate1 = 22 223 valInc1 = valRate1 * resolutionSec 224 tags1 = map[string]string{ 225 "app": "nginx_edge", 226 "status_code": "500", 227 "endpoint": "/foo/bar", 228 } 229 230 initVal2 = 84 231 valRate2 = 4 232 valInc2 = valRate2 * resolutionSec 233 tags2 = map[string]string{ 234 "app": "nginx_edge", 235 "status_code": "500", 236 "endpoint": "/foo/baz", 237 } 238 ) 239 240 for i := 0; i < numDatapoints; i++ { 241 err := m3.Coordinator().WriteProm( 242 metricName, 243 tags1, 244 []prompb.Sample{{ 245 Value: float64(initVal1 + i*valInc1), 246 Timestamp: initWriteTime.Add(time.Duration(i*resolutionSec)*time.Second).Unix() * 1000, 247 }}, 248 resources.Headers{headers.PromTypeHeader: []string{"counter"}}, 249 ) 250 require.NoError(t, err) 251 } 252 253 for i := 0; i < numDatapoints; i++ { 254 err := m3.Coordinator().WriteProm( 255 metricName, 256 tags2, 257 []prompb.Sample{{ 258 Value: float64(initVal2 + i*valInc2), 259 Timestamp: initWriteTime.Add(time.Duration(i*resolutionSec)*time.Second).Unix() * 1000, 260 }}, 261 resources.Headers{headers.PromTypeHeader: []string{"gauge"}}, 262 ) 263 require.NoError(t, err) 264 } 265 266 require.NoError(t, resources.RetryWithMaxTime(func() error { 267 return verifyPromQuery( 268 m3, 269 `http_requests_by_status_code{endpoint="/foo/bar"}`, 270 float64(valRate1), 271 ) 272 }, 2*time.Minute)) 273 274 require.NoError(t, resources.RetryWithMaxTime(func() error { 275 return verifyPromQuery( 276 m3, 277 `http_requests_by_status_code{endpoint="/foo/baz"}`, 278 float64(valRate2), 279 ) 280 }, 2*time.Minute)) 281 } 282 283 func verifyPromQuery( 284 m3 resources.M3Resources, 285 queryStr string, 286 expected float64, 287 ) error { 288 results, err := m3.Coordinator().RangeQuery( 289 resources.RangeQueryRequest{ 290 Query: queryStr, 291 Start: time.Now().Add(-1 * time.Hour), 292 End: time.Now().Add(1 * time.Hour), 293 Step: 30 * time.Second, 294 }, 295 map[string][]string{ 296 "M3-Metrics-Type": {"aggregated"}, 297 "M3-Storage-Policy": {"5s:6h"}, 298 }, 299 ) 300 if err != nil { 301 return err 302 } 303 if len(results) == 0 { 304 return errEmptyResult 305 } 306 if len(results) > 1 { 307 return errors.New("more results than expected") 308 } 309 if v := float64(results[0].Values[0].Value); v != expected { 310 return fmt.Errorf("wrong datapoint result: expected=%f, actual=%f", expected, v) 311 } 312 return nil 313 } 314 315 // testMetricTypeSurvivesAggregation verifies that the metric type information 316 // is stored in db after the aggregation. 317 func testMetricTypeSurvivesAggregation(t *testing.T, m3 resources.M3Resources) { 318 nowTime := time.Now() 319 value := 42 320 metricName := "metric_type_test" 321 322 require.NoError(t, m3.Coordinator().WriteProm( 323 metricName, 324 map[string]string{ 325 "label0": "label0", 326 "label1": "label1", 327 "label2": "label2", 328 }, 329 []prompb.Sample{{ 330 Value: float64(value), 331 Timestamp: nowTime.Unix() * 1000, 332 }}, 333 resources.Headers{headers.PromTypeHeader: []string{"counter"}}, 334 )) 335 336 node := m3.Nodes()[0] 337 require.NoError(t, resources.Retry(func() error { 338 res, err := node.Fetch(&rpc.FetchRequest{ 339 NameSpace: "aggregated", 340 ID: `{__name__="metric_type_test",label0="label0",label1="label1",label2="label2"}`, 341 RangeStart: nowTime.Add(-1 * time.Hour).Unix(), 342 RangeEnd: nowTime.Add(time.Hour).Unix(), 343 }) 344 if err != nil { 345 return err 346 } 347 if len(res.Datapoints) == 0 { 348 return errEmptyResult 349 } 350 if len(res.Datapoints[0].Annotation) == 0 { 351 return errQueryResult 352 } 353 if res.Datapoints[0].Value != float64(value) { 354 return errQueryResult 355 } 356 return nil 357 })) 358 }