github.com/adityamillind98/nomad@v0.11.8/nomad/config.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "net" 7 "os" 8 "runtime" 9 "time" 10 11 log "github.com/hashicorp/go-hclog" 12 13 "github.com/hashicorp/memberlist" 14 "github.com/hashicorp/nomad/helper/pluginutils/loader" 15 "github.com/hashicorp/nomad/helper/uuid" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/nomad/structs/config" 18 "github.com/hashicorp/nomad/scheduler" 19 "github.com/hashicorp/raft" 20 "github.com/hashicorp/serf/serf" 21 ) 22 23 const ( 24 DefaultRegion = "global" 25 DefaultDC = "dc1" 26 DefaultSerfPort = 4648 27 ) 28 29 // These are the protocol versions that Nomad can understand 30 const ( 31 ProtocolVersionMin uint8 = 1 32 ProtocolVersionMax = 1 33 ) 34 35 // ProtocolVersionMap is the mapping of Nomad protocol versions 36 // to Serf protocol versions. We mask the Serf protocols using 37 // our own protocol version. 38 var protocolVersionMap map[uint8]uint8 39 40 func init() { 41 protocolVersionMap = map[uint8]uint8{ 42 1: 4, 43 } 44 } 45 46 func DefaultRPCAddr() *net.TCPAddr { 47 return &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 4647} 48 } 49 50 // Config is used to parameterize the server 51 type Config struct { 52 // Bootstrapped indicates if Server has bootstrapped or not. 53 // Its value must be 0 (not bootstrapped) or 1 (bootstrapped). 54 // All operations on Bootstrapped must be handled via `atomic.*Int32()` calls 55 Bootstrapped int32 56 57 // BootstrapExpect mode is used to automatically bring up a 58 // collection of Nomad servers. This can be used to automatically 59 // bring up a collection of nodes. 60 // 61 // The BootstrapExpect can be of any of the following values: 62 // 1: Server will form a single node cluster and become a leader immediately 63 // N, larger than 1: Server will wait until it's connected to N servers 64 // before attempting leadership and forming the cluster. No Raft Log operation 65 // will succeed until then. 66 // 0: Server will wait to get a Raft configuration from another node and may not 67 // attempt to form a cluster or establish leadership on its own. 68 BootstrapExpect int 69 70 // DataDir is the directory to store our state in 71 DataDir string 72 73 // DevMode is used for development purposes only and limits the 74 // use of persistence or state. 75 DevMode bool 76 77 // EnableDebug is used to enable debugging RPC endpoints 78 // in the absence of ACLs 79 EnableDebug bool 80 81 // LogOutput is the location to write logs to. If this is not set, 82 // logs will go to stderr. 83 LogOutput io.Writer 84 85 // Logger is the logger used by the server. 86 Logger log.InterceptLogger 87 88 // ProtocolVersion is the protocol version to speak. This must be between 89 // ProtocolVersionMin and ProtocolVersionMax. 90 ProtocolVersion uint8 91 92 // RPCAddr is the RPC address used by Nomad. This should be reachable 93 // by the other servers and clients 94 RPCAddr *net.TCPAddr 95 96 // ClientRPCAdvertise is the address that is advertised to client nodes for 97 // the RPC endpoint. This can differ from the RPC address, if for example 98 // the RPCAddr is unspecified "0.0.0.0:4646", but this address must be 99 // reachable 100 ClientRPCAdvertise *net.TCPAddr 101 102 // ServerRPCAdvertise is the address that is advertised to other servers for 103 // the RPC endpoint. This can differ from the RPC address, if for example 104 // the RPCAddr is unspecified "0.0.0.0:4646", but this address must be 105 // reachable 106 ServerRPCAdvertise *net.TCPAddr 107 108 // RaftConfig is the configuration used for Raft in the local DC 109 RaftConfig *raft.Config 110 111 // RaftTimeout is applied to any network traffic for raft. Defaults to 10s. 112 RaftTimeout time.Duration 113 114 // (Enterprise-only) NonVoter is used to prevent this server from being added 115 // as a voting member of the Raft cluster. 116 NonVoter bool 117 118 // (Enterprise-only) RedundancyZone is the redundancy zone to use for this server. 119 RedundancyZone string 120 121 // (Enterprise-only) UpgradeVersion is the custom upgrade version to use when 122 // performing upgrade migrations. 123 UpgradeVersion string 124 125 // SerfConfig is the configuration for the serf cluster 126 SerfConfig *serf.Config 127 128 // Node name is the name we use to advertise. Defaults to hostname. 129 NodeName string 130 131 // NodeID is the uuid of this server. 132 NodeID string 133 134 // Region is the region this Nomad server belongs to. 135 Region string 136 137 // AuthoritativeRegion is the region which is treated as the authoritative source 138 // for ACLs and Policies. This provides a single source of truth to resolve conflicts. 139 AuthoritativeRegion string 140 141 // Datacenter is the datacenter this Nomad server belongs to. 142 Datacenter string 143 144 // Build is a string that is gossiped around, and can be used to help 145 // operators track which versions are actively deployed 146 Build string 147 148 // NumSchedulers is the number of scheduler thread that are run. 149 // This can be as many as one per core, or zero to disable this server 150 // from doing any scheduling work. 151 NumSchedulers int 152 153 // EnabledSchedulers controls the set of sub-schedulers that are 154 // enabled for this server to handle. This will restrict the evaluations 155 // that the workers dequeue for processing. 156 EnabledSchedulers []string 157 158 // ReconcileInterval controls how often we reconcile the strongly 159 // consistent store with the Serf info. This is used to handle nodes 160 // that are force removed, as well as intermittent unavailability during 161 // leader election. 162 ReconcileInterval time.Duration 163 164 // EvalGCInterval is how often we dispatch a job to GC evaluations 165 EvalGCInterval time.Duration 166 167 // EvalGCThreshold is how "old" an evaluation must be to be eligible 168 // for GC. This gives users some time to debug a failed evaluation. 169 EvalGCThreshold time.Duration 170 171 // JobGCInterval is how often we dispatch a job to GC jobs that are 172 // available for garbage collection. 173 JobGCInterval time.Duration 174 175 // JobGCThreshold is how old a job must be before it eligible for GC. This gives 176 // the user time to inspect the job. 177 JobGCThreshold time.Duration 178 179 // NodeGCInterval is how often we dispatch a job to GC failed nodes. 180 NodeGCInterval time.Duration 181 182 // NodeGCThreshold is how "old" a node must be to be eligible 183 // for GC. This gives users some time to view and debug a failed nodes. 184 NodeGCThreshold time.Duration 185 186 // DeploymentGCInterval is how often we dispatch a job to GC terminal 187 // deployments. 188 DeploymentGCInterval time.Duration 189 190 // DeploymentGCThreshold is how "old" a deployment must be to be eligible 191 // for GC. This gives users some time to view terminal deployments. 192 DeploymentGCThreshold time.Duration 193 194 // CSIPluginGCInterval is how often we dispatch a job to GC unused plugins. 195 CSIPluginGCInterval time.Duration 196 197 // CSIPluginGCThreshold is how "old" a plugin must be to be eligible 198 // for GC. This gives users some time to debug plugins. 199 CSIPluginGCThreshold time.Duration 200 201 // CSIVolumeClaimGCInterval is how often we dispatch a job to GC 202 // volume claims. 203 CSIVolumeClaimGCInterval time.Duration 204 205 // CSIVolumeClaimGCThreshold is how "old" a volume must be to be 206 // eligible for GC. This gives users some time to debug volumes. 207 CSIVolumeClaimGCThreshold time.Duration 208 209 // EvalNackTimeout controls how long we allow a sub-scheduler to 210 // work on an evaluation before we consider it failed and Nack it. 211 // This allows that evaluation to be handed to another sub-scheduler 212 // to work on. Defaults to 60 seconds. This should be long enough that 213 // no evaluation hits it unless the sub-scheduler has failed. 214 EvalNackTimeout time.Duration 215 216 // EvalDeliveryLimit is the limit of attempts we make to deliver and 217 // process an evaluation. This is used so that an eval that will never 218 // complete eventually fails out of the system. 219 EvalDeliveryLimit int 220 221 // EvalNackInitialReenqueueDelay is the delay applied before reenqueuing a 222 // Nacked evaluation for the first time. This value should be small as the 223 // initial Nack can be due to a down machine and the eval should be retried 224 // quickly for liveliness. 225 EvalNackInitialReenqueueDelay time.Duration 226 227 // EvalNackSubsequentReenqueueDelay is the delay applied before reenqueuing 228 // an evaluation that has been Nacked more than once. This delay is 229 // compounding after the first Nack. This value should be significantly 230 // longer than the initial delay as the purpose it severs is to apply 231 // back-pressure as evaluations are being Nacked either due to scheduler 232 // failures or because they are hitting their Nack timeout, both of which 233 // are signs of high server resource usage. 234 EvalNackSubsequentReenqueueDelay time.Duration 235 236 // EvalFailedFollowupBaselineDelay is the minimum time waited before 237 // retrying a failed evaluation. 238 EvalFailedFollowupBaselineDelay time.Duration 239 240 // EvalFailedFollowupDelayRange defines the range of additional time from 241 // the baseline in which to wait before retrying a failed evaluation. The 242 // additional delay is selected from this range randomly. 243 EvalFailedFollowupDelayRange time.Duration 244 245 // MinHeartbeatTTL is the minimum time between heartbeats. 246 // This is used as a floor to prevent excessive updates. 247 MinHeartbeatTTL time.Duration 248 249 // MaxHeartbeatsPerSecond is the maximum target rate of heartbeats 250 // being processed per second. This allows the TTL to be increased 251 // to meet the target rate. 252 MaxHeartbeatsPerSecond float64 253 254 // HeartbeatGrace is the additional time given as a grace period 255 // beyond the TTL to account for network and processing delays 256 // as well as clock skew. 257 HeartbeatGrace time.Duration 258 259 // FailoverHeartbeatTTL is the TTL applied to heartbeats after 260 // a new leader is elected, since we no longer know the status 261 // of all the heartbeats. 262 FailoverHeartbeatTTL time.Duration 263 264 // ConsulConfig is this Agent's Consul configuration 265 ConsulConfig *config.ConsulConfig 266 267 // VaultConfig is this Agent's Vault configuration 268 VaultConfig *config.VaultConfig 269 270 // RPCHoldTimeout is how long an RPC can be "held" before it is errored. 271 // This is used to paper over a loss of leadership by instead holding RPCs, 272 // so that the caller experiences a slow response rather than an error. 273 // This period is meant to be long enough for a leader election to take 274 // place, and a small jitter is applied to avoid a thundering herd. 275 RPCHoldTimeout time.Duration 276 277 // TLSConfig holds various TLS related configurations 278 TLSConfig *config.TLSConfig 279 280 // ACLEnabled controls if ACL enforcement and management is enabled. 281 ACLEnabled bool 282 283 // ReplicationBackoff is how much we backoff when replication errors. 284 // This is a tunable knob for testing primarily. 285 ReplicationBackoff time.Duration 286 287 // ReplicationToken is the ACL Token Secret ID used to fetch from 288 // the Authoritative Region. 289 ReplicationToken string 290 291 // SentinelGCInterval is the interval that we GC unused policies. 292 SentinelGCInterval time.Duration 293 294 // SentinelConfig is this Agent's Sentinel configuration 295 SentinelConfig *config.SentinelConfig 296 297 // StatsCollectionInterval is the interval at which the Nomad server 298 // publishes metrics which are periodic in nature like updating gauges 299 StatsCollectionInterval time.Duration 300 301 // DisableTaggedMetrics determines whether metrics will be displayed via a 302 // key/value/tag format, or simply a key/value format 303 DisableTaggedMetrics bool 304 305 // DisableDispatchedJobSummaryMetrics allows for ignore dispatched jobs when 306 // publishing Job summary metrics 307 DisableDispatchedJobSummaryMetrics bool 308 309 // BackwardsCompatibleMetrics determines whether to show methods of 310 // displaying metrics for older versions, or to only show the new format 311 BackwardsCompatibleMetrics bool 312 313 // AutopilotConfig is used to apply the initial autopilot config when 314 // bootstrapping. 315 AutopilotConfig *structs.AutopilotConfig 316 317 // ServerHealthInterval is the frequency with which the health of the 318 // servers in the cluster will be updated. 319 ServerHealthInterval time.Duration 320 321 // AutopilotInterval is the frequency with which the leader will perform 322 // autopilot tasks, such as promoting eligible non-voters and removing 323 // dead servers. 324 AutopilotInterval time.Duration 325 326 // DefaultSchedulerConfig configures the initial scheduler config to be persisted in Raft. 327 // Once the cluster is bootstrapped, and Raft persists the config (from here or through API), 328 // This value is ignored. 329 DefaultSchedulerConfig structs.SchedulerConfiguration `hcl:"default_scheduler_config"` 330 331 // PluginLoader is used to load plugins. 332 PluginLoader loader.PluginCatalog 333 334 // PluginSingletonLoader is a plugin loader that will returns singleton 335 // instances of the plugins. 336 PluginSingletonLoader loader.PluginCatalog 337 338 // RPCHandshakeTimeout is the deadline by which RPC handshakes must 339 // complete. The RPC handshake includes the first byte read as well as 340 // the TLS handshake and subsequent byte read if TLS is enabled. 341 // 342 // The deadline is reset after the first byte is read so when TLS is 343 // enabled RPC connections may take (timeout * 2) to complete. 344 // 345 // 0 means no timeout. 346 RPCHandshakeTimeout time.Duration 347 348 // RPCMaxConnsPerClient is the maximum number of concurrent RPC 349 // connections from a single IP address. nil/0 means no limit. 350 RPCMaxConnsPerClient int 351 } 352 353 // CheckVersion is used to check if the ProtocolVersion is valid 354 func (c *Config) CheckVersion() error { 355 if c.ProtocolVersion < ProtocolVersionMin { 356 return fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]", 357 c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax) 358 } else if c.ProtocolVersion > ProtocolVersionMax { 359 return fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]", 360 c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax) 361 } 362 return nil 363 } 364 365 // DefaultConfig returns the default configuration. Only used as the basis for 366 // merging agent or test parameters. 367 func DefaultConfig() *Config { 368 hostname, err := os.Hostname() 369 if err != nil { 370 panic(err) 371 } 372 373 c := &Config{ 374 Region: DefaultRegion, 375 AuthoritativeRegion: DefaultRegion, 376 Datacenter: DefaultDC, 377 NodeName: hostname, 378 NodeID: uuid.Generate(), 379 ProtocolVersion: ProtocolVersionMax, 380 RaftConfig: raft.DefaultConfig(), 381 RaftTimeout: 10 * time.Second, 382 LogOutput: os.Stderr, 383 RPCAddr: DefaultRPCAddr(), 384 SerfConfig: serf.DefaultConfig(), 385 NumSchedulers: 1, 386 ReconcileInterval: 60 * time.Second, 387 EvalGCInterval: 5 * time.Minute, 388 EvalGCThreshold: 1 * time.Hour, 389 JobGCInterval: 5 * time.Minute, 390 JobGCThreshold: 4 * time.Hour, 391 NodeGCInterval: 5 * time.Minute, 392 NodeGCThreshold: 24 * time.Hour, 393 DeploymentGCInterval: 5 * time.Minute, 394 DeploymentGCThreshold: 1 * time.Hour, 395 CSIPluginGCInterval: 5 * time.Minute, 396 CSIPluginGCThreshold: 1 * time.Hour, 397 CSIVolumeClaimGCInterval: 5 * time.Minute, 398 CSIVolumeClaimGCThreshold: 1 * time.Hour, 399 EvalNackTimeout: 60 * time.Second, 400 EvalDeliveryLimit: 3, 401 EvalNackInitialReenqueueDelay: 1 * time.Second, 402 EvalNackSubsequentReenqueueDelay: 20 * time.Second, 403 EvalFailedFollowupBaselineDelay: 1 * time.Minute, 404 EvalFailedFollowupDelayRange: 5 * time.Minute, 405 MinHeartbeatTTL: 10 * time.Second, 406 MaxHeartbeatsPerSecond: 50.0, 407 HeartbeatGrace: 10 * time.Second, 408 FailoverHeartbeatTTL: 300 * time.Second, 409 ConsulConfig: config.DefaultConsulConfig(), 410 VaultConfig: config.DefaultVaultConfig(), 411 RPCHoldTimeout: 5 * time.Second, 412 StatsCollectionInterval: 1 * time.Minute, 413 TLSConfig: &config.TLSConfig{}, 414 ReplicationBackoff: 30 * time.Second, 415 SentinelGCInterval: 30 * time.Second, 416 AutopilotConfig: &structs.AutopilotConfig{ 417 CleanupDeadServers: true, 418 LastContactThreshold: 200 * time.Millisecond, 419 MaxTrailingLogs: 250, 420 ServerStabilizationTime: 10 * time.Second, 421 }, 422 ServerHealthInterval: 2 * time.Second, 423 AutopilotInterval: 10 * time.Second, 424 DefaultSchedulerConfig: structs.SchedulerConfiguration{ 425 SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack, 426 PreemptionConfig: structs.PreemptionConfig{ 427 SystemSchedulerEnabled: true, 428 BatchSchedulerEnabled: false, 429 ServiceSchedulerEnabled: false, 430 }, 431 }, 432 } 433 434 // Enable all known schedulers by default 435 c.EnabledSchedulers = make([]string, 0, len(scheduler.BuiltinSchedulers)) 436 for name := range scheduler.BuiltinSchedulers { 437 c.EnabledSchedulers = append(c.EnabledSchedulers, name) 438 } 439 c.EnabledSchedulers = append(c.EnabledSchedulers, structs.JobTypeCore) 440 441 // Default the number of schedulers to match the cores 442 c.NumSchedulers = runtime.NumCPU() 443 444 // Increase our reap interval to 3 days instead of 24h. 445 c.SerfConfig.ReconnectTimeout = 3 * 24 * time.Hour 446 447 // Serf should use the WAN timing, since we are using it 448 // to communicate between DC's 449 c.SerfConfig.MemberlistConfig = memberlist.DefaultWANConfig() 450 c.SerfConfig.MemberlistConfig.BindPort = DefaultSerfPort 451 452 // Disable shutdown on removal 453 c.RaftConfig.ShutdownOnRemove = false 454 455 // Default to Raft v2, update to v3 to enable new Raft and autopilot features. 456 c.RaftConfig.ProtocolVersion = 2 457 458 return c 459 }