github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cmd/services/m3aggregator/config/aggregator.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package config 22 23 import ( 24 "errors" 25 "fmt" 26 "math" 27 "net" 28 "os" 29 "runtime" 30 "sort" 31 "strings" 32 "time" 33 34 "github.com/m3db/m3/src/aggregator/aggregation/quantile/cm" 35 "github.com/m3db/m3/src/aggregator/aggregator" 36 "github.com/m3db/m3/src/aggregator/aggregator/handler" 37 "github.com/m3db/m3/src/aggregator/aggregator/handler/writer" 38 aggclient "github.com/m3db/m3/src/aggregator/client" 39 aggruntime "github.com/m3db/m3/src/aggregator/runtime" 40 "github.com/m3db/m3/src/aggregator/sharding" 41 "github.com/m3db/m3/src/cluster/client" 42 "github.com/m3db/m3/src/cluster/kv" 43 "github.com/m3db/m3/src/cluster/placement" 44 "github.com/m3db/m3/src/cluster/services" 45 "github.com/m3db/m3/src/cmd/services/m3aggregator/serve" 46 "github.com/m3db/m3/src/metrics/aggregation" 47 "github.com/m3db/m3/src/metrics/policy" 48 "github.com/m3db/m3/src/x/clock" 49 "github.com/m3db/m3/src/x/config/hostid" 50 "github.com/m3db/m3/src/x/instrument" 51 xio "github.com/m3db/m3/src/x/io" 52 "github.com/m3db/m3/src/x/pool" 53 "github.com/m3db/m3/src/x/retry" 54 "github.com/m3db/m3/src/x/sync" 55 ) 56 57 var ( 58 errNoKVClientConfiguration = errors.New("no kv client configuration") 59 errEmptyJitterBucketList = errors.New("empty jitter bucket list") 60 ) 61 62 var ( 63 defaultNumPassthroughWriters = 8 64 defaultHostID = "m3aggregator_local" 65 ) 66 67 // AggregatorConfiguration contains aggregator configuration. 68 type AggregatorConfiguration struct { 69 // HostID is the local host ID configuration. 70 HostID *hostid.Configuration `yaml:"hostID"` 71 72 // InstanceID is the instance ID configuration. 73 InstanceID InstanceIDConfiguration `yaml:"instanceID"` 74 75 // VerboseErrors sets whether or not to use verbose errors when 76 // value arrives too early, late, or other bad request like operation. 77 VerboseErrors bool `yaml:"verboseErrors"` 78 79 // AggregationTypes configs the aggregation types. 80 AggregationTypes aggregation.TypesConfiguration `yaml:"aggregationTypes"` 81 82 // Common metric prefix. 83 MetricPrefix *string `yaml:"metricPrefix"` 84 85 // Counter metric prefix. 86 CounterPrefix *string `yaml:"counterPrefix"` 87 88 // Timer metric prefix. 89 TimerPrefix *string `yaml:"timerPrefix"` 90 91 // Gauge metric prefix. 92 GaugePrefix *string `yaml:"gaugePrefix"` 93 94 // Stream configuration for computing quantiles. 95 Stream streamConfiguration `yaml:"stream"` 96 97 // Client configuration. 98 Client aggclient.Configuration `yaml:"client"` 99 100 // Placement manager. 101 PlacementManager placementManagerConfiguration `yaml:"placementManager"` 102 103 // Hash type used for sharding. 104 HashType *sharding.HashType `yaml:"hashType"` 105 106 // Amount of time we buffer writes before shard cutover. 107 BufferDurationBeforeShardCutover time.Duration `yaml:"bufferDurationBeforeShardCutover"` 108 109 // Amount of time we buffer writes after shard cutoff. 110 BufferDurationAfterShardCutoff time.Duration `yaml:"bufferDurationAfterShardCutoff"` 111 112 // Amount of time we buffer timed metrics in the past. 113 BufferDurationForPastTimedMetric time.Duration `yaml:"bufferDurationForPastTimedMetric"` 114 115 // Amount of time we buffer timed metrics in the future. 116 BufferDurationForFutureTimedMetric time.Duration `yaml:"bufferDurationForFutureTimedMetric"` 117 118 // Resign timeout. 119 ResignTimeout time.Duration `yaml:"resignTimeout"` 120 121 // ShutdownWaitTimeout if non-zero will be how long the aggregator waits from 122 // receiving a shutdown signal to exit. This can make coordinating graceful 123 // shutdowns between two replicas safer. 124 ShutdownWaitTimeout time.Duration `yaml:"shutdownWaitTimeout"` 125 126 // Flush times manager. 127 FlushTimesManager flushTimesManagerConfiguration `yaml:"flushTimesManager"` 128 129 // Election manager. 130 ElectionManager electionManagerConfiguration `yaml:"electionManager"` 131 132 // Flush manager. 133 FlushManager flushManagerConfiguration `yaml:"flushManager"` 134 135 // Flushing handler configuration. 136 Flush handler.FlushConfiguration `yaml:"flush"` 137 138 // Passthrough controls the passthrough knobs. 139 Passthrough *passthroughConfiguration `yaml:"passthrough"` 140 141 // Forwarding configuration. 142 Forwarding forwardingConfiguration `yaml:"forwarding"` 143 144 // EntryTTL determines how long an entry remains alive before it may be expired due to inactivity. 145 EntryTTL time.Duration `yaml:"entryTTL"` 146 147 // EntryCheckInterval determines how often entries are checked for expiration. 148 EntryCheckInterval time.Duration `yaml:"entryCheckInterval"` 149 150 // EntryCheckBatchPercent determines the percentage of entries checked in a batch. 151 EntryCheckBatchPercent float64 `yaml:"entryCheckBatchPercent" validate:"min=0.0,max=1.0"` 152 153 // MaxTimerBatchSizePerWrite determines the maximum timer batch size for each batched write. 154 MaxTimerBatchSizePerWrite int `yaml:"maxTimerBatchSizePerWrite" validate:"min=0"` 155 156 // Default storage policies. 157 DefaultStoragePolicies []policy.StoragePolicy `yaml:"defaultStoragePolicies"` 158 159 // Maximum number of cached source sets. 160 MaxNumCachedSourceSets *int `yaml:"maxNumCachedSourceSets"` 161 162 // Whether to discard NaN aggregated values. 163 DiscardNaNAggregatedValues *bool `yaml:"discardNaNAggregatedValues"` 164 165 // Pool of counter elements. 166 CounterElemPool pool.ObjectPoolConfiguration `yaml:"counterElemPool"` 167 168 // Pool of timer elements. 169 TimerElemPool pool.ObjectPoolConfiguration `yaml:"timerElemPool"` 170 171 // Pool of gauge elements. 172 GaugeElemPool pool.ObjectPoolConfiguration `yaml:"gaugeElemPool"` 173 174 // Pool of entries. 175 EntryPool pool.ObjectPoolConfiguration `yaml:"entryPool"` 176 177 // AddToReset is the yaml config for aggregator.Options.AddToReset 178 AddToReset bool `yaml:"addToReset"` 179 180 // TimedMetricsFlushOffsetEnabled enables using FlushOffset for timed metrics. 181 TimedMetricsFlushOffsetEnabled bool `yaml:"timedMetricsFlushOffsetEnabled"` 182 183 // FeatureFlags are feature flags to apply. 184 FeatureFlags aggregator.FeatureFlagConfigurations `yaml:"featureFlags"` 185 186 // WritesIgnoreCutoffCutover allows accepting writes ignoring cutoff/cutover timestamp. 187 // Must be in sync with m3msg WriterConfiguration.IgnoreCutoffCutover. 188 WritesIgnoreCutoffCutover bool `yaml:"writesIgnoreCutoffCutover"` 189 } 190 191 // InstanceIDType is the instance ID type that defines how the 192 // instance ID is constructed, which is then used to lookup the 193 // aggregator instance in the placement. 194 type InstanceIDType uint 195 196 const ( 197 // HostIDPortInstanceIDType specifies to use the host ID 198 // concatenated with the port to be used for lookup 199 // in the placement. 200 // NB: this is a legacy instance ID type and is how the instance 201 // ID used to be constructed which imposed the strange 202 // requirement that the instance ID in the topology used to require 203 // the port concat'd with the host ID). 204 HostIDPortInstanceIDType InstanceIDType = iota 205 // HostIDInstanceIDType specifies to just use the host ID 206 // as the instance ID for lookup in the placement. 207 HostIDInstanceIDType 208 209 // defaultInstanceIDType must be used as the legacy instance ID 210 // since the config needs to be backwards compatible and for those 211 // not explicitly specifying the instance ID type it will cause 212 // existing placements to not work with latest versions of the aggregator 213 // in a backwards compatible fashion. 214 defaultInstanceIDType = HostIDPortInstanceIDType 215 ) 216 217 func (t InstanceIDType) String() string { 218 switch t { 219 case HostIDInstanceIDType: 220 return "host_id" 221 case HostIDPortInstanceIDType: 222 return "host_id_port" 223 } 224 return "unknown" 225 } 226 227 var validInstanceIDTypes = []InstanceIDType{ 228 HostIDInstanceIDType, 229 HostIDPortInstanceIDType, 230 } 231 232 // MarshalYAML returns the YAML representation of the InstanceIDType. 233 func (t InstanceIDType) MarshalYAML() (interface{}, error) { 234 return t.String(), nil 235 } 236 237 // UnmarshalYAML unmarshals a InstanceIDType into a valid type from string. 238 func (t *InstanceIDType) UnmarshalYAML(unmarshal func(interface{}) error) error { 239 var str string 240 if err := unmarshal(&str); err != nil { 241 return err 242 } 243 if str == "" { 244 *t = defaultInstanceIDType 245 return nil 246 } 247 strs := make([]string, 0, len(validInstanceIDTypes)) 248 for _, valid := range validInstanceIDTypes { 249 if str == valid.String() { 250 *t = valid 251 return nil 252 } 253 strs = append(strs, "'"+valid.String()+"'") 254 } 255 return fmt.Errorf( 256 "invalid InstanceIDType '%s' valid types are: %s", str, strings.Join(strs, ", ")) 257 } 258 259 // InstanceIDConfiguration is the instance ID configuration. 260 type InstanceIDConfiguration struct { 261 // InstanceIDType specifies how to construct the instance ID 262 // that is used for lookup of the aggregator in the placement. 263 InstanceIDType InstanceIDType `yaml:"type"` 264 } 265 266 // NewAggregatorOptions creates a new set of aggregator options. 267 func (c *AggregatorConfiguration) NewAggregatorOptions( 268 address string, 269 client client.Client, 270 serveOpts serve.Options, 271 runtimeOptsManager aggruntime.OptionsManager, 272 clockOpts clock.Options, 273 instrumentOpts instrument.Options, 274 ) (aggregator.Options, error) { 275 opts := aggregator.NewOptions(clockOpts). 276 SetInstrumentOptions(instrumentOpts). 277 SetRuntimeOptionsManager(runtimeOptsManager). 278 SetVerboseErrors(c.VerboseErrors). 279 SetAddToReset(c.AddToReset). 280 SetTimedMetricsFlushOffsetEnabled(c.TimedMetricsFlushOffsetEnabled). 281 SetFeatureFlagBundlesParsed(c.FeatureFlags.Parse()) 282 283 rwOpts := serveOpts.RWOptions() 284 if rwOpts == nil { 285 rwOpts = xio.NewOptions() 286 } 287 288 // Set the aggregation types options. 289 aggTypesOpts, err := c.AggregationTypes.NewOptions(instrumentOpts) 290 if err != nil { 291 return nil, err 292 } 293 opts = opts.SetAggregationTypesOptions(aggTypesOpts) 294 295 // Set the prefix for metrics aggregations. 296 opts = setMetricPrefix(opts, c.MetricPrefix, opts.SetMetricPrefix) 297 opts = setMetricPrefix(opts, c.CounterPrefix, opts.SetCounterPrefix) 298 opts = setMetricPrefix(opts, c.TimerPrefix, opts.SetTimerPrefix) 299 opts = setMetricPrefix(opts, c.GaugePrefix, opts.SetGaugePrefix) 300 301 // Set stream options. 302 scope := instrumentOpts.MetricsScope() 303 iOpts := instrumentOpts.SetMetricsScope(scope.SubScope("stream")) 304 streamOpts, err := c.Stream.NewStreamOptions(iOpts) 305 if err != nil { 306 return nil, err 307 } 308 opts = opts.SetStreamOptions(streamOpts) 309 310 // Set administrative client. 311 // TODO(xichen): client retry threshold likely needs to be low for faster retries. 312 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("client")) 313 adminClient, err := c.Client.NewAdminClient( 314 client, clock.NewOptions(), iOpts, rwOpts) 315 if err != nil { 316 return nil, err 317 } 318 if err = adminClient.Init(); err != nil { 319 return nil, err 320 } 321 opts = opts.SetAdminClient(adminClient) 322 323 // Set instance ID. 324 instanceID, err := c.newInstanceID(address) 325 if err != nil { 326 return nil, err 327 } 328 329 // Set placement manager. 330 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("placement-manager")) 331 placementManager, err := c.PlacementManager.NewPlacementManager(client, instanceID, iOpts) 332 if err != nil { 333 return nil, err 334 } 335 opts = opts.SetPlacementManager(placementManager) 336 337 // Set sharding function. 338 hashType := sharding.DefaultHash 339 if c.HashType != nil { 340 hashType = *c.HashType 341 } 342 shardFn, err := hashType.ShardFn() 343 if err != nil { 344 return nil, err 345 } 346 opts = opts.SetShardFn(shardFn) 347 348 // Set buffer durations for shard cutovers and shard cutoffs. 349 if c.BufferDurationBeforeShardCutover != 0 { 350 opts = opts.SetBufferDurationBeforeShardCutover(c.BufferDurationBeforeShardCutover) 351 } 352 if c.BufferDurationAfterShardCutoff != 0 { 353 opts = opts.SetBufferDurationAfterShardCutoff(c.BufferDurationAfterShardCutoff) 354 } 355 if c.BufferDurationForPastTimedMetric != 0 { 356 opts = opts.SetBufferForPastTimedMetric(c.BufferDurationForPastTimedMetric). 357 SetBufferForPastTimedMetricFn(bufferForPastTimedMetricFn(c.BufferDurationForPastTimedMetric)) 358 } 359 if c.BufferDurationForFutureTimedMetric != 0 { 360 opts = opts.SetBufferForFutureTimedMetric(c.BufferDurationForFutureTimedMetric) 361 } 362 363 // Set resign timeout. 364 if c.ResignTimeout != 0 { 365 opts = opts.SetResignTimeout(c.ResignTimeout) 366 } 367 368 // Set flush times manager. 369 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("flush-times-manager")) 370 flushTimesManager, err := c.FlushTimesManager.NewFlushTimesManager(client, iOpts) 371 if err != nil { 372 return nil, err 373 } 374 opts = opts.SetFlushTimesManager(flushTimesManager) 375 376 // Set election manager. 377 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("election-manager")) 378 placementNamespace := c.PlacementManager.KVConfig.Namespace 379 electionManager, err := c.ElectionManager.NewElectionManager( 380 client, 381 instanceID, 382 placementNamespace, 383 placementManager, 384 flushTimesManager, 385 clockOpts, 386 iOpts, 387 ) 388 if err != nil { 389 return nil, err 390 } 391 opts = opts.SetElectionManager(electionManager) 392 393 // Set flush manager. 394 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("flush-manager")) 395 flushManagerOpts, err := c.FlushManager.NewFlushManagerOptions( 396 placementManager, 397 electionManager, 398 flushTimesManager, 399 iOpts, 400 opts.BufferForPastTimedMetric(), 401 ) 402 if err != nil { 403 return nil, err 404 } 405 flushManager := aggregator.NewFlushManager(flushManagerOpts) 406 opts = opts.SetFlushManager(flushManager) 407 408 // Set flushing handler. 409 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("flush-handler")) 410 flushHandler, err := c.Flush.NewHandler(client, iOpts, rwOpts) 411 if err != nil { 412 return nil, err 413 } 414 opts = opts.SetFlushHandler(flushHandler) 415 416 // Set passthrough writer. 417 aggShardFn, err := hashType.AggregatedShardFn() 418 if err != nil { 419 return nil, err 420 } 421 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("passthrough-writer")) 422 passthroughWriter, err := c.newPassthroughWriter(flushHandler, iOpts, aggShardFn) 423 if err != nil { 424 return nil, err 425 } 426 opts = opts.SetPassthroughWriter(passthroughWriter) 427 428 // Set max allowed forwarding delay function. 429 jitterEnabled := flushManagerOpts.JitterEnabled() 430 maxJitterFn := flushManagerOpts.MaxJitterFn() 431 maxAllowedForwardingDelayFn := c.Forwarding.MaxAllowedForwardingDelayFn(jitterEnabled, maxJitterFn) 432 opts = opts.SetMaxAllowedForwardingDelayFn(maxAllowedForwardingDelayFn) 433 434 // Set entry options. 435 if c.EntryTTL != 0 { 436 opts = opts.SetEntryTTL(c.EntryTTL) 437 } 438 if c.EntryCheckInterval != 0 { 439 opts = opts.SetEntryCheckInterval(c.EntryCheckInterval) 440 } 441 if c.EntryCheckBatchPercent != 0.0 { 442 opts = opts.SetEntryCheckBatchPercent(c.EntryCheckBatchPercent) 443 } 444 if c.MaxTimerBatchSizePerWrite != 0 { 445 opts = opts.SetMaxTimerBatchSizePerWrite(c.MaxTimerBatchSizePerWrite) 446 } 447 448 // Set default storage policies. 449 storagePolicies := make([]policy.StoragePolicy, len(c.DefaultStoragePolicies)) 450 copy(storagePolicies, c.DefaultStoragePolicies) 451 opts = opts.SetDefaultStoragePolicies(storagePolicies) 452 453 // Set cached source sets options. 454 if c.MaxNumCachedSourceSets != nil { 455 opts = opts.SetMaxNumCachedSourceSets(*c.MaxNumCachedSourceSets) 456 } 457 458 // Set whether to discard NaN aggregated values. 459 if c.DiscardNaNAggregatedValues != nil { 460 opts = opts.SetDiscardNaNAggregatedValues(*c.DiscardNaNAggregatedValues) 461 } 462 463 // Set counter elem pool. 464 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("counter-elem-pool")) 465 counterElemPoolOpts := c.CounterElemPool.NewObjectPoolOptions(iOpts) 466 counterElemPool := aggregator.NewCounterElemPool(counterElemPoolOpts) 467 opts = opts.SetCounterElemPool(counterElemPool) 468 // use a singleton ElemOptions to avoid allocs per elem. 469 elemOpts := aggregator.NewElemOptions(opts) 470 counterElemPool.Init(func() *aggregator.CounterElem { 471 return aggregator.MustNewCounterElem(aggregator.ElemData{}, elemOpts) 472 }) 473 474 // Set timer elem pool. 475 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("timer-elem-pool")) 476 timerElemPoolOpts := c.TimerElemPool.NewObjectPoolOptions(iOpts) 477 timerElemPool := aggregator.NewTimerElemPool(timerElemPoolOpts) 478 opts = opts.SetTimerElemPool(timerElemPool) 479 timerElemPool.Init(func() *aggregator.TimerElem { 480 return aggregator.MustNewTimerElem(aggregator.ElemData{}, elemOpts) 481 }) 482 483 // Set gauge elem pool. 484 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("gauge-elem-pool")) 485 gaugeElemPoolOpts := c.GaugeElemPool.NewObjectPoolOptions(iOpts) 486 gaugeElemPool := aggregator.NewGaugeElemPool(gaugeElemPoolOpts) 487 opts = opts.SetGaugeElemPool(gaugeElemPool) 488 gaugeElemPool.Init(func() *aggregator.GaugeElem { 489 return aggregator.MustNewGaugeElem(aggregator.ElemData{}, elemOpts) 490 }) 491 492 // Set entry pool. 493 iOpts = instrumentOpts.SetMetricsScope(scope.SubScope("entry-pool")) 494 entryPoolOpts := c.EntryPool.NewObjectPoolOptions(iOpts) 495 entryPool := aggregator.NewEntryPool(entryPoolOpts) 496 runtimeOpts := runtimeOptsManager.RuntimeOptions() 497 opts = opts.SetEntryPool(entryPool) 498 // allocate metrics only once to reduce memory utilization 499 metrics := aggregator.NewEntryMetrics(iOpts.MetricsScope()) 500 entryPool.Init(func() *aggregator.Entry { 501 return aggregator.NewEntryWithMetrics(nil, metrics, runtimeOpts, opts) 502 }) 503 504 opts = opts.SetWritesIgnoreCutoffCutover(c.WritesIgnoreCutoffCutover) 505 506 return opts, nil 507 } 508 509 // HostIDOrDefault returns the host ID or default. 510 func (c *AggregatorConfiguration) HostIDOrDefault() hostid.Configuration { 511 if c.HostID == nil { 512 return hostid.Configuration{ 513 Resolver: hostid.ConfigResolver, 514 Value: &defaultHostID, 515 } 516 } 517 518 return *c.HostID 519 } 520 521 func (c *AggregatorConfiguration) newInstanceID(address string) (string, error) { 522 var ( 523 hostIDValue string 524 err error 525 ) 526 if c.HostID != nil { 527 hostIDValue, err = c.HostID.Resolve() 528 } else { 529 hostIDValue, err = os.Hostname() 530 } 531 if err != nil { 532 return "", fmt.Errorf("error determining host ID: %v", err) 533 } 534 535 switch c.InstanceID.InstanceIDType { 536 case HostIDInstanceIDType: 537 return hostIDValue, nil 538 case HostIDPortInstanceIDType: 539 _, port, err := net.SplitHostPort(address) 540 if err != nil { 541 return "", fmt.Errorf("error parsing server address %s: %v", address, err) 542 } 543 return net.JoinHostPort(hostIDValue, port), nil 544 default: 545 return "", fmt.Errorf("unknown instance ID type: value=%d, str=%s", 546 c.InstanceID.InstanceIDType, c.InstanceID.InstanceIDType.String()) 547 } 548 } 549 550 func bufferForPastTimedMetricFn(buffer time.Duration) aggregator.BufferForPastTimedMetricFn { 551 return func(resolution time.Duration) time.Duration { 552 return buffer + resolution 553 } 554 } 555 556 // streamConfiguration contains configuration for quantile-related metric streams. 557 type streamConfiguration struct { 558 // Error epsilon for quantile computation. 559 Eps float64 `yaml:"eps"` 560 561 // Initial sample pool capacity for quantile computation. 562 Capacity int `yaml:"capacity"` 563 564 // Insertion and compression frequency. 565 InsertAndCompressEvery int `yaml:"insertAndCompressEvery"` 566 567 // FlushEvery is deprecated. 568 FlushEvery int `yaml:"flushEvery"` 569 570 // StreamPool is deprecated. 571 StreamPool pool.ObjectPoolConfiguration `yaml:"streamPool"` 572 573 // SamplePool is deprecated. 574 SamplePool *pool.ObjectPoolConfiguration `yaml:"samplePool"` 575 576 // FloatsPool is deprecated. 577 FloatsPool pool.BucketizedPoolConfiguration `yaml:"floatsPool"` 578 } 579 580 func (c *streamConfiguration) NewStreamOptions(_ instrument.Options) (cm.Options, error) { 581 opts := cm.NewOptions(). 582 SetEps(c.Eps). 583 SetCapacity(c.Capacity) 584 585 if c.InsertAndCompressEvery != 0 { 586 opts = opts.SetInsertAndCompressEvery(c.InsertAndCompressEvery) 587 } 588 589 if err := opts.Validate(); err != nil { 590 return nil, err 591 } 592 return opts, nil 593 } 594 595 type placementManagerConfiguration struct { 596 KVConfig kv.OverrideConfiguration `yaml:"kvConfig"` 597 Watcher placement.WatcherConfiguration `yaml:"placementWatcher"` 598 } 599 600 func (c placementManagerConfiguration) NewPlacementManager( 601 client client.Client, 602 instanceID string, 603 instrumentOpts instrument.Options, 604 ) (aggregator.PlacementManager, error) { 605 kvOpts, err := c.KVConfig.NewOverrideOptions() 606 if err != nil { 607 return nil, err 608 } 609 store, err := client.Store(kvOpts) 610 if err != nil { 611 return nil, err 612 } 613 scope := instrumentOpts.MetricsScope() 614 iOpts := instrumentOpts.SetMetricsScope(scope.SubScope("placement-watcher")) 615 placementWatcherOpts := c.Watcher.NewOptions(store, iOpts) 616 placementManagerOpts := aggregator.NewPlacementManagerOptions(). 617 SetInstrumentOptions(instrumentOpts). 618 SetInstanceID(instanceID). 619 SetWatcherOptions(placementWatcherOpts) 620 return aggregator.NewPlacementManager(placementManagerOpts), nil 621 } 622 623 type forwardingConfiguration struct { 624 // MaxSingleDelay is the maximum delay for a single forward step. 625 MaxSingleDelay time.Duration `yaml:"maxSingleDelay"` 626 // MaxConstDelay is the maximum delay for a forward step as a constant + resolution*numForwardedTimes. 627 MaxConstDelay time.Duration `yaml:"maxConstDelay"` 628 } 629 630 func (c forwardingConfiguration) MaxAllowedForwardingDelayFn( 631 jitterEnabled bool, 632 maxJitterFn aggregator.FlushJitterFn, 633 ) aggregator.MaxAllowedForwardingDelayFn { 634 if v := c.MaxConstDelay; v > 0 { 635 return func(resolution time.Duration, numForwardedTimes int) time.Duration { 636 return v + (resolution * time.Duration(numForwardedTimes)) 637 } 638 } 639 640 return func(resolution time.Duration, numForwardedTimes int) time.Duration { 641 // If jittering is enabled, we use max jitter fn to determine the initial jitter. 642 // Otherwise, flushing may start at any point within a resolution interval so we 643 // assume the full resolution interval may be used for initial jittering. 644 initialJitter := resolution 645 if jitterEnabled { 646 initialJitter = maxJitterFn(resolution) 647 } 648 return initialJitter + c.MaxSingleDelay*time.Duration(numForwardedTimes) 649 } 650 } 651 652 type flushTimesManagerConfiguration struct { 653 // KV Configuration. 654 KVConfig kv.OverrideConfiguration `yaml:"kvConfig"` 655 656 // Flush times key format. 657 FlushTimesKeyFmt string `yaml:"flushTimesKeyFmt" validate:"nonzero"` 658 659 // Retrier for persisting flush times. 660 FlushTimesPersistRetrier retry.Configuration `yaml:"flushTimesPersistRetrier"` 661 } 662 663 func (c flushTimesManagerConfiguration) NewFlushTimesManager( 664 client client.Client, 665 instrumentOpts instrument.Options, 666 ) (aggregator.FlushTimesManager, error) { 667 kvOpts, err := c.KVConfig.NewOverrideOptions() 668 if err != nil { 669 return nil, err 670 } 671 store, err := client.Store(kvOpts) 672 if err != nil { 673 return nil, err 674 } 675 scope := instrumentOpts.MetricsScope() 676 retrier := c.FlushTimesPersistRetrier.NewRetrier(scope.SubScope("flush-times-persist-retrier")) 677 flushTimesManagerOpts := aggregator.NewFlushTimesManagerOptions(). 678 SetInstrumentOptions(instrumentOpts). 679 SetFlushTimesKeyFmt(c.FlushTimesKeyFmt). 680 SetFlushTimesStore(store). 681 SetFlushTimesPersistRetrier(retrier) 682 return aggregator.NewFlushTimesManager(flushTimesManagerOpts), nil 683 } 684 685 type electionManagerConfiguration struct { 686 Election electionConfiguration `yaml:"election"` 687 ServiceID serviceIDConfiguration `yaml:"serviceID"` 688 LeaderValue string `yaml:"leaderValue"` 689 ElectionKeyFmt string `yaml:"electionKeyFmt" validate:"nonzero"` 690 CampaignRetrier retry.Configuration `yaml:"campaignRetrier"` 691 ChangeRetrier retry.Configuration `yaml:"changeRetrier"` 692 ResignRetrier retry.Configuration `yaml:"resignRetrier"` 693 CampaignStateCheckInterval time.Duration `yaml:"campaignStateCheckInterval"` 694 ShardCutoffCheckOffset time.Duration `yaml:"shardCutoffCheckOffset"` 695 } 696 697 func (c electionManagerConfiguration) NewElectionManager( 698 client client.Client, 699 instanceID string, 700 placementNamespace string, 701 placementManager aggregator.PlacementManager, 702 flushTimesManager aggregator.FlushTimesManager, 703 clockOpts clock.Options, 704 instrumentOpts instrument.Options, 705 ) (aggregator.ElectionManager, error) { 706 electionOpts, err := c.Election.NewElectionOptions() 707 if err != nil { 708 return nil, err 709 } 710 serviceID := c.ServiceID.NewServiceID() 711 namespaceOpts := services.NewNamespaceOptions().SetPlacementNamespace(placementNamespace) 712 serviceOpts := services.NewOverrideOptions().SetNamespaceOptions(namespaceOpts) 713 svcs, err := client.Services(serviceOpts) 714 if err != nil { 715 return nil, err 716 } 717 leaderService, err := svcs.LeaderService(serviceID, electionOpts) 718 if err != nil { 719 return nil, err 720 } 721 campaignOpts, err := services.NewCampaignOptions() 722 if err != nil { 723 return nil, err 724 } 725 leaderValue := instanceID 726 if c.LeaderValue != "" { 727 leaderValue = c.LeaderValue 728 } 729 campaignOpts = campaignOpts.SetLeaderValue(leaderValue) 730 scope := instrumentOpts.MetricsScope() 731 campaignRetryOpts := c.CampaignRetrier.NewOptions(scope.SubScope("campaign-retrier")) 732 changeRetryOpts := c.ChangeRetrier.NewOptions(scope.SubScope("change-retrier")) 733 resignRetryOpts := c.ResignRetrier.NewOptions(scope.SubScope("resign-retrier")) 734 opts := aggregator.NewElectionManagerOptions(). 735 SetClockOptions(clockOpts). 736 SetInstrumentOptions(instrumentOpts). 737 SetElectionOptions(electionOpts). 738 SetCampaignOptions(campaignOpts). 739 SetCampaignRetryOptions(campaignRetryOpts). 740 SetChangeRetryOptions(changeRetryOpts). 741 SetResignRetryOptions(resignRetryOpts). 742 SetElectionKeyFmt(c.ElectionKeyFmt). 743 SetLeaderService(leaderService). 744 SetPlacementManager(placementManager). 745 SetFlushTimesManager(flushTimesManager) 746 if c.CampaignStateCheckInterval != 0 { 747 opts = opts.SetCampaignStateCheckInterval(c.CampaignStateCheckInterval) 748 } 749 if c.ShardCutoffCheckOffset != 0 { 750 opts = opts.SetShardCutoffCheckOffset(c.ShardCutoffCheckOffset) 751 } 752 electionManager := aggregator.NewElectionManager(opts) 753 return electionManager, nil 754 } 755 756 type electionConfiguration struct { 757 LeaderTimeout time.Duration `yaml:"leaderTimeout"` 758 ResignTimeout time.Duration `yaml:"resignTimeout"` 759 TTLSeconds int `yaml:"ttlSeconds"` 760 } 761 762 func (c electionConfiguration) NewElectionOptions() (services.ElectionOptions, error) { 763 opts := services.NewElectionOptions() 764 if c.LeaderTimeout != 0 { 765 opts = opts.SetLeaderTimeout(c.LeaderTimeout) 766 } 767 if c.ResignTimeout != 0 { 768 opts = opts.SetResignTimeout(c.ResignTimeout) 769 } 770 if c.TTLSeconds != 0 { 771 opts = opts.SetTTLSecs(c.TTLSeconds) 772 } 773 return opts, nil 774 } 775 776 // TODO: move this to m3cluster. 777 type serviceIDConfiguration struct { 778 Name string `yaml:"name"` 779 Environment string `yaml:"environment"` 780 Zone string `yaml:"zone"` 781 } 782 783 func (c serviceIDConfiguration) NewServiceID() services.ServiceID { 784 sid := services.NewServiceID() 785 if c.Name != "" { 786 sid = sid.SetName(c.Name) 787 } 788 if c.Environment != "" { 789 sid = sid.SetEnvironment(c.Environment) 790 } 791 if c.Zone != "" { 792 sid = sid.SetZone(c.Zone) 793 } 794 return sid 795 } 796 797 type flushManagerConfiguration struct { 798 // How frequently the flush manager checks for next flush. 799 CheckEvery time.Duration `yaml:"checkEvery"` 800 801 // Whether jittering is enabled. 802 JitterEnabled *bool `yaml:"jitterEnabled"` 803 804 // Buckets for determining max jitter amounts. 805 MaxJitters []jitterBucket `yaml:"maxJitters"` 806 807 // Number of workers per CPU. 808 NumWorkersPerCPU float64 `yaml:"numWorkersPerCPU" validate:"min=0.0,max=1.0"` 809 810 // DeprecatedFlushTimesPersistEvery controlled how often flush times were 811 // persisted, but is now deprecated. 812 DeprecatedFlushTimesPersistEvery time.Duration `yaml:"flushTimesPersistEvery"` 813 814 // Maximum buffer size. 815 MaxBufferSize time.Duration `yaml:"maxBufferSize"` 816 817 // Window size for a forced flush. 818 ForcedFlushWindowSize time.Duration `yaml:"forcedFlushWindowSize"` 819 } 820 821 func (c flushManagerConfiguration) NewFlushManagerOptions( 822 placementManager aggregator.PlacementManager, 823 electionManager aggregator.ElectionManager, 824 flushTimesManager aggregator.FlushTimesManager, 825 instrumentOpts instrument.Options, 826 bufferForPastTimedMetric time.Duration, 827 ) (aggregator.FlushManagerOptions, error) { 828 opts := aggregator.NewFlushManagerOptions(). 829 SetInstrumentOptions(instrumentOpts). 830 SetPlacementManager(placementManager). 831 SetElectionManager(electionManager). 832 SetFlushTimesManager(flushTimesManager). 833 SetBufferForPastTimedMetric(bufferForPastTimedMetric) 834 if c.CheckEvery != 0 { 835 opts = opts.SetCheckEvery(c.CheckEvery) 836 } 837 if c.JitterEnabled != nil { 838 opts = opts.SetJitterEnabled(*c.JitterEnabled) 839 } 840 if c.MaxJitters != nil { 841 maxJitterFn, err := jitterBuckets(c.MaxJitters).NewMaxJitterFn() 842 if err != nil { 843 return nil, err 844 } 845 opts = opts.SetMaxJitterFn(maxJitterFn) 846 } 847 if c.NumWorkersPerCPU != 0 { 848 runtimeCPU := float64(runtime.GOMAXPROCS(0)) 849 numWorkers := c.NumWorkersPerCPU * runtimeCPU 850 workerPoolSize := int(math.Ceil(numWorkers)) 851 if workerPoolSize < 1 { 852 workerPoolSize = 1 853 } 854 workerPool := sync.NewWorkerPool(workerPoolSize) 855 workerPool.Init() 856 opts = opts.SetWorkerPool(workerPool) 857 } 858 if c.MaxBufferSize != 0 { 859 opts = opts.SetMaxBufferSize(c.MaxBufferSize) 860 } 861 if c.ForcedFlushWindowSize != 0 { 862 opts = opts.SetForcedFlushWindowSize(c.ForcedFlushWindowSize) 863 } 864 return opts, nil 865 } 866 867 // jitterBucket determines the max jitter percent for lists whose flush 868 // intervals are no more than the bucket flush interval. 869 type jitterBucket struct { 870 FlushInterval time.Duration `yaml:"flushInterval" validate:"nonzero"` 871 MaxJitterPercent float64 `yaml:"maxJitterPercent" validate:"min=0.0,max=1.0"` 872 } 873 874 type jitterBuckets []jitterBucket 875 876 func (buckets jitterBuckets) NewMaxJitterFn() (aggregator.FlushJitterFn, error) { 877 numBuckets := len(buckets) 878 if numBuckets == 0 { 879 return nil, errEmptyJitterBucketList 880 } 881 res := make([]jitterBucket, numBuckets) 882 copy(res, buckets) 883 sort.Sort(jitterBucketsByIntervalAscending(res)) 884 885 return func(interval time.Duration) time.Duration { 886 idx := sort.Search(numBuckets, func(i int) bool { 887 return res[i].FlushInterval >= interval 888 }) 889 if idx == numBuckets { 890 idx-- 891 } 892 return time.Duration(res[idx].MaxJitterPercent * float64(interval)) 893 }, nil 894 } 895 896 type jitterBucketsByIntervalAscending []jitterBucket 897 898 func (b jitterBucketsByIntervalAscending) Len() int { return len(b) } 899 func (b jitterBucketsByIntervalAscending) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 900 901 func (b jitterBucketsByIntervalAscending) Less(i, j int) bool { 902 return b[i].FlushInterval < b[j].FlushInterval 903 } 904 905 type metricPrefixSetter func(b []byte) aggregator.Options 906 907 func setMetricPrefix( 908 opts aggregator.Options, 909 str *string, 910 fn metricPrefixSetter, 911 ) aggregator.Options { 912 if str == nil { 913 return opts 914 } 915 return fn([]byte(*str)) 916 } 917 918 // PassthroughConfiguration contains the knobs for pass-through server. 919 type passthroughConfiguration struct { 920 // Enabled controls whether the passthrough server/writer is enabled. 921 Enabled bool `yaml:"enabled"` 922 923 // NumWriters controls the number of passthrough writers used. 924 NumWriters int `yaml:"numWriters"` 925 } 926 927 func (c *AggregatorConfiguration) newPassthroughWriter( 928 flushHandler handler.Handler, 929 iOpts instrument.Options, 930 shardFn sharding.AggregatedShardFn, 931 ) (writer.Writer, error) { 932 // fallback gracefully 933 if c.Passthrough == nil || !c.Passthrough.Enabled { 934 iOpts.Logger().Info("passthrough writer disabled, blackholing all passthrough writes") 935 return writer.NewBlackholeWriter(), nil 936 } 937 938 count := defaultNumPassthroughWriters 939 if c.Passthrough.NumWriters != 0 { 940 count = c.Passthrough.NumWriters 941 } 942 943 writers := make([]writer.Writer, 0, count) 944 for i := 0; i < count; i++ { 945 writer, err := flushHandler.NewWriter(iOpts.MetricsScope()) 946 if err != nil { 947 return nil, err 948 } 949 writers = append(writers, writer) 950 } 951 952 return writer.NewShardedWriter(writers, shardFn, iOpts) 953 }