github.com/manicqin/nomad@v0.9.5/nomad/config.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "net" 7 "os" 8 "runtime" 9 "time" 10 11 log "github.com/hashicorp/go-hclog" 12 13 "github.com/hashicorp/memberlist" 14 "github.com/hashicorp/nomad/helper/pluginutils/loader" 15 "github.com/hashicorp/nomad/helper/uuid" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/nomad/structs/config" 18 "github.com/hashicorp/nomad/scheduler" 19 "github.com/hashicorp/raft" 20 "github.com/hashicorp/serf/serf" 21 ) 22 23 const ( 24 DefaultRegion = "global" 25 DefaultDC = "dc1" 26 DefaultSerfPort = 4648 27 ) 28 29 // These are the protocol versions that Nomad can understand 30 const ( 31 ProtocolVersionMin uint8 = 1 32 ProtocolVersionMax = 1 33 ) 34 35 // ProtocolVersionMap is the mapping of Nomad protocol versions 36 // to Serf protocol versions. We mask the Serf protocols using 37 // our own protocol version. 38 var protocolVersionMap map[uint8]uint8 39 40 func init() { 41 protocolVersionMap = map[uint8]uint8{ 42 1: 4, 43 } 44 } 45 46 var ( 47 DefaultRPCAddr = &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 4647} 48 ) 49 50 // Config is used to parameterize the server 51 type Config struct { 52 // Bootstrap mode is used to bring up the first Nomad server. It is 53 // required so that it can elect a leader without any other nodes 54 // being present 55 Bootstrap bool 56 57 // BootstrapExpect mode is used to automatically bring up a 58 // collection of Nomad servers. This can be used to automatically 59 // bring up a collection of nodes. All operations on BootstrapExpect 60 // must be handled via `atomic.*Int32()` calls. 61 BootstrapExpect int32 62 63 // DataDir is the directory to store our state in 64 DataDir string 65 66 // DevMode is used for development purposes only and limits the 67 // use of persistence or state. 68 DevMode bool 69 70 // EnableDebug is used to enable debugging RPC endpoints 71 // in the absence of ACLs 72 EnableDebug bool 73 74 // DevDisableBootstrap is used to disable bootstrap mode while 75 // in DevMode. This is largely used for testing. 76 DevDisableBootstrap bool 77 78 // LogOutput is the location to write logs to. If this is not set, 79 // logs will go to stderr. 80 LogOutput io.Writer 81 82 // Logger is the logger used by the server. 83 Logger log.InterceptLogger 84 85 // ProtocolVersion is the protocol version to speak. This must be between 86 // ProtocolVersionMin and ProtocolVersionMax. 87 ProtocolVersion uint8 88 89 // RPCAddr is the RPC address used by Nomad. This should be reachable 90 // by the other servers and clients 91 RPCAddr *net.TCPAddr 92 93 // ClientRPCAdvertise is the address that is advertised to client nodes for 94 // the RPC endpoint. This can differ from the RPC address, if for example 95 // the RPCAddr is unspecified "0.0.0.0:4646", but this address must be 96 // reachable 97 ClientRPCAdvertise *net.TCPAddr 98 99 // ServerRPCAdvertise is the address that is advertised to other servers for 100 // the RPC endpoint. This can differ from the RPC address, if for example 101 // the RPCAddr is unspecified "0.0.0.0:4646", but this address must be 102 // reachable 103 ServerRPCAdvertise *net.TCPAddr 104 105 // RaftConfig is the configuration used for Raft in the local DC 106 RaftConfig *raft.Config 107 108 // RaftTimeout is applied to any network traffic for raft. Defaults to 10s. 109 RaftTimeout time.Duration 110 111 // (Enterprise-only) NonVoter is used to prevent this server from being added 112 // as a voting member of the Raft cluster. 113 NonVoter bool 114 115 // (Enterprise-only) RedundancyZone is the redundancy zone to use for this server. 116 RedundancyZone string 117 118 // (Enterprise-only) UpgradeVersion is the custom upgrade version to use when 119 // performing upgrade migrations. 120 UpgradeVersion string 121 122 // SerfConfig is the configuration for the serf cluster 123 SerfConfig *serf.Config 124 125 // Node name is the name we use to advertise. Defaults to hostname. 126 NodeName string 127 128 // NodeID is the uuid of this server. 129 NodeID string 130 131 // Region is the region this Nomad server belongs to. 132 Region string 133 134 // AuthoritativeRegion is the region which is treated as the authoritative source 135 // for ACLs and Policies. This provides a single source of truth to resolve conflicts. 136 AuthoritativeRegion string 137 138 // Datacenter is the datacenter this Nomad server belongs to. 139 Datacenter string 140 141 // Build is a string that is gossiped around, and can be used to help 142 // operators track which versions are actively deployed 143 Build string 144 145 // NumSchedulers is the number of scheduler thread that are run. 146 // This can be as many as one per core, or zero to disable this server 147 // from doing any scheduling work. 148 NumSchedulers int 149 150 // EnabledSchedulers controls the set of sub-schedulers that are 151 // enabled for this server to handle. This will restrict the evaluations 152 // that the workers dequeue for processing. 153 EnabledSchedulers []string 154 155 // ReconcileInterval controls how often we reconcile the strongly 156 // consistent store with the Serf info. This is used to handle nodes 157 // that are force removed, as well as intermittent unavailability during 158 // leader election. 159 ReconcileInterval time.Duration 160 161 // EvalGCInterval is how often we dispatch a job to GC evaluations 162 EvalGCInterval time.Duration 163 164 // EvalGCThreshold is how "old" an evaluation must be to be eligible 165 // for GC. This gives users some time to debug a failed evaluation. 166 EvalGCThreshold time.Duration 167 168 // JobGCInterval is how often we dispatch a job to GC jobs that are 169 // available for garbage collection. 170 JobGCInterval time.Duration 171 172 // JobGCThreshold is how old a job must be before it eligible for GC. This gives 173 // the user time to inspect the job. 174 JobGCThreshold time.Duration 175 176 // NodeGCInterval is how often we dispatch a job to GC failed nodes. 177 NodeGCInterval time.Duration 178 179 // NodeGCThreshold is how "old" a node must be to be eligible 180 // for GC. This gives users some time to view and debug a failed nodes. 181 NodeGCThreshold time.Duration 182 183 // DeploymentGCInterval is how often we dispatch a job to GC terminal 184 // deployments. 185 DeploymentGCInterval time.Duration 186 187 // DeploymentGCThreshold is how "old" a deployment must be to be eligible 188 // for GC. This gives users some time to view terminal deployments. 189 DeploymentGCThreshold time.Duration 190 191 // EvalNackTimeout controls how long we allow a sub-scheduler to 192 // work on an evaluation before we consider it failed and Nack it. 193 // This allows that evaluation to be handed to another sub-scheduler 194 // to work on. Defaults to 60 seconds. This should be long enough that 195 // no evaluation hits it unless the sub-scheduler has failed. 196 EvalNackTimeout time.Duration 197 198 // EvalDeliveryLimit is the limit of attempts we make to deliver and 199 // process an evaluation. This is used so that an eval that will never 200 // complete eventually fails out of the system. 201 EvalDeliveryLimit int 202 203 // EvalNackInitialReenqueueDelay is the delay applied before reenqueuing a 204 // Nacked evaluation for the first time. This value should be small as the 205 // initial Nack can be due to a down machine and the eval should be retried 206 // quickly for liveliness. 207 EvalNackInitialReenqueueDelay time.Duration 208 209 // EvalNackSubsequentReenqueueDelay is the delay applied before reenqueuing 210 // an evaluation that has been Nacked more than once. This delay is 211 // compounding after the first Nack. This value should be significantly 212 // longer than the initial delay as the purpose it severs is to apply 213 // back-pressure as evaluations are being Nacked either due to scheduler 214 // failures or because they are hitting their Nack timeout, both of which 215 // are signs of high server resource usage. 216 EvalNackSubsequentReenqueueDelay time.Duration 217 218 // EvalFailedFollowupBaselineDelay is the minimum time waited before 219 // retrying a failed evaluation. 220 EvalFailedFollowupBaselineDelay time.Duration 221 222 // EvalFailedFollowupDelayRange defines the range of additional time from 223 // the baseline in which to wait before retrying a failed evaluation. The 224 // additional delay is selected from this range randomly. 225 EvalFailedFollowupDelayRange time.Duration 226 227 // MinHeartbeatTTL is the minimum time between heartbeats. 228 // This is used as a floor to prevent excessive updates. 229 MinHeartbeatTTL time.Duration 230 231 // MaxHeartbeatsPerSecond is the maximum target rate of heartbeats 232 // being processed per second. This allows the TTL to be increased 233 // to meet the target rate. 234 MaxHeartbeatsPerSecond float64 235 236 // HeartbeatGrace is the additional time given as a grace period 237 // beyond the TTL to account for network and processing delays 238 // as well as clock skew. 239 HeartbeatGrace time.Duration 240 241 // FailoverHeartbeatTTL is the TTL applied to heartbeats after 242 // a new leader is elected, since we no longer know the status 243 // of all the heartbeats. 244 FailoverHeartbeatTTL time.Duration 245 246 // ConsulConfig is this Agent's Consul configuration 247 ConsulConfig *config.ConsulConfig 248 249 // VaultConfig is this Agent's Vault configuration 250 VaultConfig *config.VaultConfig 251 252 // RPCHoldTimeout is how long an RPC can be "held" before it is errored. 253 // This is used to paper over a loss of leadership by instead holding RPCs, 254 // so that the caller experiences a slow response rather than an error. 255 // This period is meant to be long enough for a leader election to take 256 // place, and a small jitter is applied to avoid a thundering herd. 257 RPCHoldTimeout time.Duration 258 259 // TLSConfig holds various TLS related configurations 260 TLSConfig *config.TLSConfig 261 262 // ACLEnabled controls if ACL enforcement and management is enabled. 263 ACLEnabled bool 264 265 // ACLEnforceNode controls if ACL enforced on node endpoints 266 ACLEnforceNode bool 267 268 // ReplicationBackoff is how much we backoff when replication errors. 269 // This is a tunable knob for testing primarily. 270 ReplicationBackoff time.Duration 271 272 // ReplicationToken is the ACL Token Secret ID used to fetch from 273 // the Authoritative Region. 274 ReplicationToken string 275 276 // SentinelGCInterval is the interval that we GC unused policies. 277 SentinelGCInterval time.Duration 278 279 // SentinelConfig is this Agent's Sentinel configuration 280 SentinelConfig *config.SentinelConfig 281 282 // StatsCollectionInterval is the interval at which the Nomad server 283 // publishes metrics which are periodic in nature like updating gauges 284 StatsCollectionInterval time.Duration 285 286 // DisableTaggedMetrics determines whether metrics will be displayed via a 287 // key/value/tag format, or simply a key/value format 288 DisableTaggedMetrics bool 289 290 // DisableDispatchedJobSummaryMetrics allows for ignore dispatched jobs when 291 // publishing Job summary metrics 292 DisableDispatchedJobSummaryMetrics bool 293 294 // BackwardsCompatibleMetrics determines whether to show methods of 295 // displaying metrics for older versions, or to only show the new format 296 BackwardsCompatibleMetrics bool 297 298 // AutopilotConfig is used to apply the initial autopilot config when 299 // bootstrapping. 300 AutopilotConfig *structs.AutopilotConfig 301 302 // ServerHealthInterval is the frequency with which the health of the 303 // servers in the cluster will be updated. 304 ServerHealthInterval time.Duration 305 306 // AutopilotInterval is the frequency with which the leader will perform 307 // autopilot tasks, such as promoting eligible non-voters and removing 308 // dead servers. 309 AutopilotInterval time.Duration 310 311 // PluginLoader is used to load plugins. 312 PluginLoader loader.PluginCatalog 313 314 // PluginSingletonLoader is a plugin loader that will returns singleton 315 // instances of the plugins. 316 PluginSingletonLoader loader.PluginCatalog 317 } 318 319 // CheckVersion is used to check if the ProtocolVersion is valid 320 func (c *Config) CheckVersion() error { 321 if c.ProtocolVersion < ProtocolVersionMin { 322 return fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]", 323 c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax) 324 } else if c.ProtocolVersion > ProtocolVersionMax { 325 return fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]", 326 c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax) 327 } 328 return nil 329 } 330 331 // DefaultConfig returns the default configuration 332 func DefaultConfig() *Config { 333 hostname, err := os.Hostname() 334 if err != nil { 335 panic(err) 336 } 337 338 c := &Config{ 339 Region: DefaultRegion, 340 AuthoritativeRegion: DefaultRegion, 341 Datacenter: DefaultDC, 342 NodeName: hostname, 343 NodeID: uuid.Generate(), 344 ProtocolVersion: ProtocolVersionMax, 345 RaftConfig: raft.DefaultConfig(), 346 RaftTimeout: 10 * time.Second, 347 LogOutput: os.Stderr, 348 RPCAddr: DefaultRPCAddr, 349 SerfConfig: serf.DefaultConfig(), 350 NumSchedulers: 1, 351 ReconcileInterval: 60 * time.Second, 352 EvalGCInterval: 5 * time.Minute, 353 EvalGCThreshold: 1 * time.Hour, 354 JobGCInterval: 5 * time.Minute, 355 JobGCThreshold: 4 * time.Hour, 356 NodeGCInterval: 5 * time.Minute, 357 NodeGCThreshold: 24 * time.Hour, 358 DeploymentGCInterval: 5 * time.Minute, 359 DeploymentGCThreshold: 1 * time.Hour, 360 EvalNackTimeout: 60 * time.Second, 361 EvalDeliveryLimit: 3, 362 EvalNackInitialReenqueueDelay: 1 * time.Second, 363 EvalNackSubsequentReenqueueDelay: 20 * time.Second, 364 EvalFailedFollowupBaselineDelay: 1 * time.Minute, 365 EvalFailedFollowupDelayRange: 5 * time.Minute, 366 MinHeartbeatTTL: 10 * time.Second, 367 MaxHeartbeatsPerSecond: 50.0, 368 HeartbeatGrace: 10 * time.Second, 369 FailoverHeartbeatTTL: 300 * time.Second, 370 ConsulConfig: config.DefaultConsulConfig(), 371 VaultConfig: config.DefaultVaultConfig(), 372 RPCHoldTimeout: 5 * time.Second, 373 StatsCollectionInterval: 1 * time.Minute, 374 TLSConfig: &config.TLSConfig{}, 375 ReplicationBackoff: 30 * time.Second, 376 SentinelGCInterval: 30 * time.Second, 377 AutopilotConfig: &structs.AutopilotConfig{ 378 CleanupDeadServers: true, 379 LastContactThreshold: 200 * time.Millisecond, 380 MaxTrailingLogs: 250, 381 ServerStabilizationTime: 10 * time.Second, 382 }, 383 ServerHealthInterval: 2 * time.Second, 384 AutopilotInterval: 10 * time.Second, 385 } 386 387 // Enable all known schedulers by default 388 c.EnabledSchedulers = make([]string, 0, len(scheduler.BuiltinSchedulers)) 389 for name := range scheduler.BuiltinSchedulers { 390 c.EnabledSchedulers = append(c.EnabledSchedulers, name) 391 } 392 c.EnabledSchedulers = append(c.EnabledSchedulers, structs.JobTypeCore) 393 394 // Default the number of schedulers to match the cores 395 c.NumSchedulers = runtime.NumCPU() 396 397 // Increase our reap interval to 3 days instead of 24h. 398 c.SerfConfig.ReconnectTimeout = 3 * 24 * time.Hour 399 400 // Serf should use the WAN timing, since we are using it 401 // to communicate between DC's 402 c.SerfConfig.MemberlistConfig = memberlist.DefaultWANConfig() 403 c.SerfConfig.MemberlistConfig.BindPort = DefaultSerfPort 404 405 // Disable shutdown on removal 406 c.RaftConfig.ShutdownOnRemove = false 407 408 // Default to Raft v2, update to v3 to enable new Raft and autopilot features. 409 c.RaftConfig.ProtocolVersion = 2 410 411 return c 412 }