github.com/m3db/m3@v1.5.0/src/aggregator/integration/setup.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package integration 22 23 import ( 24 "encoding/json" 25 "errors" 26 "fmt" 27 "io/ioutil" 28 "net/http" 29 "sort" 30 "sync" 31 "testing" 32 "time" 33 34 "github.com/stretchr/testify/require" 35 "github.com/uber-go/tally" 36 37 "github.com/m3db/m3/src/aggregator/aggregator" 38 "github.com/m3db/m3/src/aggregator/aggregator/handler" 39 "github.com/m3db/m3/src/aggregator/aggregator/handler/writer" 40 aggclient "github.com/m3db/m3/src/aggregator/client" 41 "github.com/m3db/m3/src/aggregator/runtime" 42 httpserver "github.com/m3db/m3/src/aggregator/server/http" 43 m3msgserver "github.com/m3db/m3/src/aggregator/server/m3msg" 44 rawtcpserver "github.com/m3db/m3/src/aggregator/server/rawtcp" 45 "github.com/m3db/m3/src/cluster/placement" 46 "github.com/m3db/m3/src/cluster/services" 47 "github.com/m3db/m3/src/cmd/services/m3aggregator/serve" 48 "github.com/m3db/m3/src/metrics/metric/aggregated" 49 "github.com/m3db/m3/src/metrics/policy" 50 "github.com/m3db/m3/src/msg/consumer" 51 "github.com/m3db/m3/src/msg/producer" 52 "github.com/m3db/m3/src/msg/producer/buffer" 53 msgwriter "github.com/m3db/m3/src/msg/producer/writer" 54 "github.com/m3db/m3/src/x/instrument" 55 xio "github.com/m3db/m3/src/x/io" 56 "github.com/m3db/m3/src/x/retry" 57 xserver "github.com/m3db/m3/src/x/server" 58 xsync "github.com/m3db/m3/src/x/sync" 59 ) 60 61 var ( 62 errServerStartTimedOut = errors.New("server took too long to start") 63 errLeaderElectionTimeout = errors.New("took too long to become leader") 64 ) 65 66 type testServerSetups []*testServerSetup 67 68 type testServerSetup struct { 69 opts testServerOptions 70 m3msgAddr string 71 rawTCPAddr string 72 httpAddr string 73 clientOptions aggclient.Options 74 m3msgServerOpts m3msgserver.Options 75 rawTCPServerOpts rawtcpserver.Options 76 httpServerOpts httpserver.Options 77 aggregator aggregator.Aggregator 78 aggregatorOpts aggregator.Options 79 handler handler.Handler 80 electionKey string 81 leaderValue string 82 leaderService services.LeaderService 83 electionCluster *testCluster 84 workerPool xsync.WorkerPool 85 results map[resultKey]aggregated.MetricWithStoragePolicy 86 resultLock *sync.Mutex 87 88 // Signals. 89 doneCh chan struct{} 90 closedCh chan struct{} 91 } 92 93 func newTestServerSetup(t *testing.T, opts testServerOptions) *testServerSetup { 94 if opts == nil { 95 opts = newTestServerOptions(t) 96 } 97 98 // TODO: based on environment variable, use M3MSG aggregator as default 99 // server and client, run both legacy and M3MSG tests by setting it to 100 // different type in the Makefile. 101 102 // Set up worker pool. 103 workerPool := xsync.NewWorkerPool(opts.WorkerPoolSize()) 104 workerPool.Init() 105 106 // Create the server options. 107 rwOpts := xio.NewOptions() 108 rawTCPServerOpts := rawtcpserver.NewOptions().SetRWOptions(rwOpts) 109 m3msgServerOpts := m3msgserver.NewOptions(). 110 SetInstrumentOptions(opts.InstrumentOptions()). 111 SetServerOptions(xserver.NewOptions()). 112 SetConsumerOptions(consumer.NewOptions()) 113 httpServerOpts := httpserver.NewOptions(). 114 // use a new mux per test to avoid collisions registering the same handlers between tests. 115 SetMux(http.NewServeMux()) 116 117 // Creating the aggregator options. 118 clockOpts := opts.ClockOptions() 119 aggregatorOpts := aggregator.NewOptions(clockOpts). 120 SetInstrumentOptions(opts.InstrumentOptions()). 121 SetAggregationTypesOptions(opts.AggregationTypesOptions()). 122 SetEntryCheckInterval(opts.EntryCheckInterval()). 123 SetMaxAllowedForwardingDelayFn(opts.MaxAllowedForwardingDelayFn()). 124 SetBufferForPastTimedMetric(opts.BufferForPastTimedMetric()). 125 SetBufferForPastTimedMetricFn(func(resolution time.Duration) time.Duration { 126 return resolution + opts.BufferForPastTimedMetric() 127 }). 128 SetDiscardNaNAggregatedValues(opts.DiscardNaNAggregatedValues()). 129 SetEntryTTL(opts.EntryTTL()) 130 131 // Set up placement manager. 132 kvStore, err := opts.ClusterClient().KV() 133 require.NoError(t, err) 134 placementWatcherOpts := placement.NewWatcherOptions(). 135 SetInstrumentOptions(opts.InstrumentOptions()). 136 SetStagedPlacementKey(opts.PlacementKVKey()). 137 SetStagedPlacementStore(kvStore) 138 placementManagerOpts := aggregator.NewPlacementManagerOptions(). 139 SetInstrumentOptions(opts.InstrumentOptions()). 140 SetInstanceID(opts.InstanceID()). 141 SetWatcherOptions(placementWatcherOpts) 142 placementManager := aggregator.NewPlacementManager(placementManagerOpts) 143 aggregatorOpts = aggregatorOpts. 144 SetInstrumentOptions(opts.InstrumentOptions()). 145 SetShardFn(opts.ShardFn()). 146 SetPlacementManager(placementManager) 147 148 // Set up flush times manager. 149 flushTimesManagerOpts := aggregator.NewFlushTimesManagerOptions(). 150 SetClockOptions(clockOpts). 151 SetInstrumentOptions(opts.InstrumentOptions()). 152 SetFlushTimesKeyFmt(opts.FlushTimesKeyFmt()). 153 SetFlushTimesStore(kvStore) 154 flushTimesManager := aggregator.NewFlushTimesManager(flushTimesManagerOpts) 155 aggregatorOpts = aggregatorOpts.SetFlushTimesManager(flushTimesManager) 156 157 // Set up election manager. 158 leaderValue := opts.InstanceID() 159 campaignOpts, err := services.NewCampaignOptions() 160 require.NoError(t, err) 161 campaignOpts = campaignOpts.SetLeaderValue(leaderValue) 162 electionKey := fmt.Sprintf(opts.ElectionKeyFmt(), opts.ShardSetID()) 163 electionCluster := opts.ElectionCluster() 164 if electionCluster == nil { 165 electionCluster = newTestCluster(t) 166 } 167 leaderService := electionCluster.LeaderService() 168 electionManagerOpts := aggregator.NewElectionManagerOptions(). 169 SetClockOptions(clockOpts). 170 SetInstrumentOptions(opts.InstrumentOptions()). 171 SetCampaignOptions(campaignOpts). 172 SetElectionKeyFmt(opts.ElectionKeyFmt()). 173 SetLeaderService(leaderService). 174 SetPlacementManager(placementManager). 175 SetFlushTimesManager(flushTimesManager) 176 electionManager := aggregator.NewElectionManager(electionManagerOpts) 177 aggregatorOpts = aggregatorOpts.SetElectionManager(electionManager) 178 179 // Set up flush manager. 180 flushManagerOpts := aggregator.NewFlushManagerOptions(). 181 SetClockOptions(clockOpts). 182 SetInstrumentOptions(opts.InstrumentOptions()). 183 SetPlacementManager(placementManager). 184 SetFlushTimesManager(flushTimesManager). 185 SetElectionManager(electionManager). 186 SetJitterEnabled(opts.JitterEnabled()). 187 SetMaxJitterFn(opts.MaxJitterFn()). 188 SetBufferForPastTimedMetric(aggregatorOpts.BufferForPastTimedMetric()) 189 flushManager := aggregator.NewFlushManager(flushManagerOpts) 190 aggregatorOpts = aggregatorOpts.SetFlushManager(flushManager) 191 192 // Set up admin client. 193 m3msgOpts := aggclient.NewM3MsgOptions() 194 if opts.AggregatorClientType() == aggclient.M3MsgAggregatorClient { 195 producer, err := newM3MsgProducer(opts) 196 require.NoError(t, err) 197 m3msgOpts = m3msgOpts.SetProducer(producer) 198 } 199 200 clientOpts := aggclient.NewOptions(). 201 SetClockOptions(clockOpts). 202 SetConnectionOptions(opts.ClientConnectionOptions()). 203 SetShardFn(opts.ShardFn()). 204 SetWatcherOptions(placementWatcherOpts). 205 SetRWOptions(rwOpts). 206 SetM3MsgOptions(m3msgOpts). 207 SetAggregatorClientType(opts.AggregatorClientType()) 208 c, err := aggclient.NewClient(clientOpts) 209 require.NoError(t, err) 210 adminClient, ok := c.(aggclient.AdminClient) 211 require.True(t, ok) 212 require.NoError(t, adminClient.Init()) 213 aggregatorOpts = aggregatorOpts.SetAdminClient(adminClient) 214 215 testClientOpts := clientOpts.SetAggregatorClientType(opts.AggregatorClientType()) 216 217 // Set up the handler. 218 var ( 219 results map[resultKey]aggregated.MetricWithStoragePolicy 220 resultLock sync.Mutex 221 ) 222 results = make(map[resultKey]aggregated.MetricWithStoragePolicy) 223 handler := &capturingHandler{results: results, resultLock: &resultLock} 224 pw, err := handler.NewWriter(tally.NoopScope) 225 if err != nil { 226 panic(err.Error()) 227 } 228 aggregatorOpts = aggregatorOpts.SetFlushHandler(handler).SetPassthroughWriter(pw) 229 230 // Set up entry pool. 231 runtimeOpts := runtime.NewOptions() 232 entryPool := aggregator.NewEntryPool(nil) 233 entryPool.Init(func() *aggregator.Entry { 234 return aggregator.NewEntry(nil, runtimeOpts, aggregatorOpts) 235 }) 236 aggregatorOpts = aggregatorOpts.SetEntryPool(entryPool) 237 238 // Set up elem pools. 239 counterElemPool := aggregator.NewCounterElemPool(nil) 240 aggregatorOpts = aggregatorOpts.SetCounterElemPool(counterElemPool) 241 elemOpts := aggregator.NewElemOptions(aggregatorOpts) 242 counterElemPool.Init(func() *aggregator.CounterElem { 243 return aggregator.MustNewCounterElem(aggregator.ElemData{}, elemOpts) 244 }) 245 246 timerElemPool := aggregator.NewTimerElemPool(nil) 247 aggregatorOpts = aggregatorOpts.SetTimerElemPool(timerElemPool) 248 timerElemPool.Init(func() *aggregator.TimerElem { 249 return aggregator.MustNewTimerElem(aggregator.ElemData{}, elemOpts) 250 }) 251 252 gaugeElemPool := aggregator.NewGaugeElemPool(nil) 253 aggregatorOpts = aggregatorOpts.SetGaugeElemPool(gaugeElemPool) 254 gaugeElemPool.Init(func() *aggregator.GaugeElem { 255 return aggregator.MustNewGaugeElem(aggregator.ElemData{}, elemOpts) 256 }) 257 258 return &testServerSetup{ 259 opts: opts, 260 rawTCPAddr: opts.RawTCPAddr(), 261 httpAddr: opts.HTTPAddr(), 262 m3msgAddr: opts.M3MsgAddr(), 263 clientOptions: testClientOpts, 264 rawTCPServerOpts: rawTCPServerOpts, 265 m3msgServerOpts: m3msgServerOpts, 266 httpServerOpts: httpServerOpts, 267 aggregatorOpts: aggregatorOpts, 268 handler: handler, 269 electionKey: electionKey, 270 leaderValue: leaderValue, 271 leaderService: leaderService, 272 electionCluster: electionCluster, 273 workerPool: workerPool, 274 results: results, 275 resultLock: &resultLock, 276 doneCh: make(chan struct{}), 277 closedCh: make(chan struct{}), 278 } 279 } 280 281 func (ts *testServerSetup) newClient(t *testing.T) *client { 282 clientType := ts.opts.AggregatorClientType() 283 clientOpts := ts.clientOptions. 284 SetAggregatorClientType(clientType) 285 286 if clientType == aggclient.M3MsgAggregatorClient { 287 producer, err := newM3MsgProducer(ts.opts) 288 require.NoError(t, err) 289 m3msgOpts := aggclient.NewM3MsgOptions().SetProducer(producer) 290 clientOpts = clientOpts.SetM3MsgOptions(m3msgOpts) 291 } 292 293 testClient, err := aggclient.NewClient(clientOpts) 294 require.NoError(t, err) 295 testAdminClient, ok := testClient.(aggclient.AdminClient) 296 require.True(t, ok) 297 return newClient(testAdminClient) 298 } 299 300 func (ts *testServerSetup) getStatusResponse(path string, response interface{}) error { 301 resp, err := http.Get("http://" + ts.httpAddr + path) //nolint 302 if err != nil { 303 return err 304 } 305 306 defer resp.Body.Close() //nolint:errcheck 307 b, err := ioutil.ReadAll(resp.Body) 308 if err != nil { 309 return err 310 } 311 if resp.StatusCode != http.StatusOK { 312 return fmt.Errorf("got a non-200 status code: %v", resp.StatusCode) 313 } 314 return json.Unmarshal(b, response) 315 } 316 317 func (ts *testServerSetup) waitUntilServerIsUp() error { 318 isUp := func() bool { 319 var resp httpserver.Response 320 if err := ts.getStatusResponse(httpserver.HealthPath, &resp); err != nil { 321 return false 322 } 323 324 if resp.State == "OK" { 325 return true 326 } 327 328 return false 329 } 330 331 if waitUntil(isUp, ts.opts.ServerStateChangeTimeout()) { 332 return nil 333 } 334 335 return errServerStartTimedOut 336 } 337 338 func (ts *testServerSetup) startServer() error { 339 errCh := make(chan error, 1) 340 341 // Creating the aggregator. 342 ts.aggregator = aggregator.NewAggregator(ts.aggregatorOpts) 343 if err := ts.aggregator.Open(); err != nil { 344 return err 345 } 346 347 instrumentOpts := instrument.NewOptions() 348 serverOpts := serve.NewOptions(instrumentOpts). 349 SetM3MsgAddr(ts.m3msgAddr). 350 SetM3MsgServerOpts(ts.m3msgServerOpts). 351 SetRawTCPAddr(ts.rawTCPAddr). 352 SetRawTCPServerOpts(ts.rawTCPServerOpts). 353 SetHTTPAddr(ts.httpAddr). 354 SetHTTPServerOpts(ts.httpServerOpts). 355 SetRWOptions(xio.NewOptions()) 356 357 go func() { 358 if err := serve.Serve( 359 ts.aggregator, 360 ts.doneCh, 361 serverOpts, 362 ); err != nil { 363 select { 364 case errCh <- err: 365 default: 366 } 367 } 368 close(ts.closedCh) 369 }() 370 371 go func() { 372 select { 373 case errCh <- ts.waitUntilServerIsUp(): 374 default: 375 } 376 }() 377 378 return <-errCh 379 } 380 381 func (ts *testServerSetup) waitUntilLeader() error { 382 isLeader := func() bool { 383 var resp httpserver.StatusResponse 384 if err := ts.getStatusResponse(httpserver.StatusPath, &resp); err != nil { 385 return false 386 } 387 388 if resp.Status.FlushStatus.ElectionState == aggregator.LeaderState { 389 return true 390 } 391 return false 392 } 393 394 if waitUntil(isLeader, ts.opts.ElectionStateChangeTimeout()) { 395 return nil 396 } 397 398 return errLeaderElectionTimeout 399 } 400 401 // return the metric value for the provided params. 402 func (ts *testServerSetup) value(timeNanos int64, metricID string, sp policy.StoragePolicy) float64 { 403 ts.resultLock.Lock() 404 defer ts.resultLock.Unlock() 405 return ts.results[resultKey{ 406 timeNanos: timeNanos, 407 metricID: metricID, 408 storagePolicy: sp, 409 }].Value 410 } 411 412 // remove the value from the results if it matches the provided value. 413 func (ts *testServerSetup) removeIf(timeNanos int64, metricID string, sp policy.StoragePolicy, value float64) bool { 414 ts.resultLock.Lock() 415 defer ts.resultLock.Unlock() 416 417 key := resultKey{ 418 timeNanos: timeNanos, 419 metricID: metricID, 420 storagePolicy: sp, 421 } 422 m, ok := ts.results[key] 423 if !ok { 424 return false 425 } 426 if m.Value != value { 427 return false 428 } 429 delete(ts.results, key) 430 return true 431 } 432 433 type resultKey struct { 434 timeNanos int64 435 metricID string 436 storagePolicy policy.StoragePolicy 437 } 438 439 func (ts *testServerSetup) sortedResults() []aggregated.MetricWithStoragePolicy { 440 ts.resultLock.Lock() 441 defer ts.resultLock.Unlock() 442 metrics := make([]aggregated.MetricWithStoragePolicy, 0, len(ts.results)) 443 for _, r := range ts.results { 444 metrics = append(metrics, r) 445 } 446 sort.Sort(byTimeIDPolicyAscending(metrics)) 447 return metrics 448 } 449 450 func (ts *testServerSetup) stopServer() error { 451 if err := ts.aggregator.Close(); err != nil { 452 return err 453 } 454 close(ts.doneCh) 455 456 // Wait for graceful server shutdown. 457 <-ts.closedCh 458 return nil 459 } 460 461 func (ts *testServerSetup) close() { 462 ts.electionCluster.Close() 463 } 464 465 func (tss testServerSetups) newClient(t *testing.T) *client { 466 require.NotEmpty(t, tss) 467 // NB: the client can be constructed from any of the setups. The client does the routing and 468 // sends the writes to the server which holds related shard. 469 return tss[0].newClient(t) 470 } 471 472 func newM3MsgProducer(opts testServerOptions) (producer.Producer, error) { 473 svcs, err := opts.ClusterClient().Services(nil) 474 if err != nil { 475 return nil, err 476 } 477 478 bufferOpts := buffer.NewOptions(). 479 // NB: the default values of cleanup retry options causes very slow m3msg client shutdowns 480 // in some of the tests. The values below were set to avoid that. 481 SetCleanupRetryOptions(retry.NewOptions().SetInitialBackoff(100 * time.Millisecond).SetMaxRetries(0)) 482 buffer, err := buffer.NewBuffer(bufferOpts) 483 if err != nil { 484 return nil, err 485 } 486 connectionOpts := msgwriter.NewConnectionOptions(). 487 SetNumConnections(1). 488 SetFlushInterval(1 * time.Millisecond) 489 writerOpts := msgwriter.NewOptions(). 490 SetInstrumentOptions(opts.InstrumentOptions()). 491 SetTopicName(opts.TopicName()). 492 SetTopicService(opts.TopicService()). 493 SetServiceDiscovery(svcs). 494 SetMessageQueueNewWritesScanInterval(10 * time.Millisecond). 495 SetMessageQueueFullScanInterval(100 * time.Millisecond). 496 SetConnectionOptions(connectionOpts) 497 writer := msgwriter.NewWriter(writerOpts) 498 producerOpts := producer.NewOptions(). 499 SetBuffer(buffer). 500 SetWriter(writer) 501 producer := producer.NewProducer(producerOpts) 502 return producer, nil 503 } 504 505 type capturingWriter struct { 506 results map[resultKey]aggregated.MetricWithStoragePolicy 507 resultLock *sync.Mutex 508 } 509 510 func (w *capturingWriter) Write(mp aggregated.ChunkedMetricWithStoragePolicy) error { 511 w.resultLock.Lock() 512 defer w.resultLock.Unlock() 513 var fullID []byte 514 fullID = append(fullID, mp.ChunkedID.Prefix...) 515 fullID = append(fullID, mp.ChunkedID.Data...) 516 fullID = append(fullID, mp.ChunkedID.Suffix...) 517 var clonedAnnotation []byte 518 clonedAnnotation = append(clonedAnnotation, mp.Annotation...) 519 metric := aggregated.Metric{ 520 ID: fullID, 521 TimeNanos: mp.TimeNanos, 522 Value: mp.Value, 523 Annotation: clonedAnnotation, 524 } 525 key := resultKey{ 526 timeNanos: metric.TimeNanos, 527 metricID: string(fullID), 528 storagePolicy: mp.StoragePolicy, 529 } 530 w.results[key] = aggregated.MetricWithStoragePolicy{ 531 Metric: metric, 532 StoragePolicy: mp.StoragePolicy, 533 } 534 return nil 535 } 536 537 func (w *capturingWriter) Flush() error { return nil } 538 func (w *capturingWriter) Close() error { return nil } 539 540 type capturingHandler struct { 541 results map[resultKey]aggregated.MetricWithStoragePolicy 542 resultLock *sync.Mutex 543 } 544 545 func (h *capturingHandler) NewWriter(tally.Scope) (writer.Writer, error) { 546 return &capturingWriter{results: h.results, resultLock: h.resultLock}, nil 547 } 548 549 func (h *capturingHandler) Close() {}