github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/nomad/config.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "io" 6 "net" 7 "os" 8 "runtime" 9 "time" 10 11 "github.com/hashicorp/memberlist" 12 "github.com/hashicorp/nomad/helper/tlsutil" 13 "github.com/hashicorp/nomad/nomad/structs" 14 "github.com/hashicorp/nomad/nomad/structs/config" 15 "github.com/hashicorp/nomad/scheduler" 16 "github.com/hashicorp/raft" 17 "github.com/hashicorp/serf/serf" 18 ) 19 20 const ( 21 DefaultRegion = "global" 22 DefaultDC = "dc1" 23 DefaultSerfPort = 4648 24 ) 25 26 // These are the protocol versions that Nomad can understand 27 const ( 28 ProtocolVersionMin uint8 = 1 29 ProtocolVersionMax = 1 30 ) 31 32 // ProtocolVersionMap is the mapping of Nomad protocol versions 33 // to Serf protocol versions. We mask the Serf protocols using 34 // our own protocol version. 35 var protocolVersionMap map[uint8]uint8 36 37 func init() { 38 protocolVersionMap = map[uint8]uint8{ 39 1: 4, 40 } 41 } 42 43 var ( 44 DefaultRPCAddr = &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 4647} 45 ) 46 47 // Config is used to parameterize the server 48 type Config struct { 49 // Bootstrap mode is used to bring up the first Nomad server. It is 50 // required so that it can elect a leader without any other nodes 51 // being present 52 Bootstrap bool 53 54 // BootstrapExpect mode is used to automatically bring up a 55 // collection of Nomad servers. This can be used to automatically 56 // bring up a collection of nodes. All operations on BootstrapExpect 57 // must be handled via `atomic.*Int32()` calls. 58 BootstrapExpect int32 59 60 // DataDir is the directory to store our state in 61 DataDir string 62 63 // DevMode is used for development purposes only and limits the 64 // use of persistence or state. 65 DevMode bool 66 67 // DevDisableBootstrap is used to disable bootstrap mode while 68 // in DevMode. This is largely used for testing. 69 DevDisableBootstrap bool 70 71 // LogOutput is the location to write logs to. If this is not set, 72 // logs will go to stderr. 73 LogOutput io.Writer 74 75 // ProtocolVersion is the protocol version to speak. This must be between 76 // ProtocolVersionMin and ProtocolVersionMax. 77 ProtocolVersion uint8 78 79 // RPCAddr is the RPC address used by Nomad. This should be reachable 80 // by the other servers and clients 81 RPCAddr *net.TCPAddr 82 83 // RPCAdvertise is the address that is advertised to other nodes for 84 // the RPC endpoint. This can differ from the RPC address, if for example 85 // the RPCAddr is unspecified "0.0.0.0:4646", but this address must be 86 // reachable 87 RPCAdvertise *net.TCPAddr 88 89 // RaftConfig is the configuration used for Raft in the local DC 90 RaftConfig *raft.Config 91 92 // RaftTimeout is applied to any network traffic for raft. Defaults to 10s. 93 RaftTimeout time.Duration 94 95 // SerfConfig is the configuration for the serf cluster 96 SerfConfig *serf.Config 97 98 // Node name is the name we use to advertise. Defaults to hostname. 99 NodeName string 100 101 // Region is the region this Nomad server belongs to. 102 Region string 103 104 // Datacenter is the datacenter this Nomad server belongs to. 105 Datacenter string 106 107 // Build is a string that is gossiped around, and can be used to help 108 // operators track which versions are actively deployed 109 Build string 110 111 // NumSchedulers is the number of scheduler thread that are run. 112 // This can be as many as one per core, or zero to disable this server 113 // from doing any scheduling work. 114 NumSchedulers int 115 116 // EnabledSchedulers controls the set of sub-schedulers that are 117 // enabled for this server to handle. This will restrict the evaluations 118 // that the workers dequeue for processing. 119 EnabledSchedulers []string 120 121 // ReconcileInterval controls how often we reconcile the strongly 122 // consistent store with the Serf info. This is used to handle nodes 123 // that are force removed, as well as intermittent unavailability during 124 // leader election. 125 ReconcileInterval time.Duration 126 127 // EvalGCInterval is how often we dispatch a job to GC evaluations 128 EvalGCInterval time.Duration 129 130 // EvalGCThreshold is how "old" an evaluation must be to be eligible 131 // for GC. This gives users some time to debug a failed evaluation. 132 EvalGCThreshold time.Duration 133 134 // JobGCInterval is how often we dispatch a job to GC jobs that are 135 // available for garbage collection. 136 JobGCInterval time.Duration 137 138 // JobGCThreshold is how old a job must be before it eligible for GC. This gives 139 // the user time to inspect the job. 140 JobGCThreshold time.Duration 141 142 // NodeGCInterval is how often we dispatch a job to GC failed nodes. 143 NodeGCInterval time.Duration 144 145 // NodeGCThreshold is how "old" a node must be to be eligible 146 // for GC. This gives users some time to view and debug a failed nodes. 147 NodeGCThreshold time.Duration 148 149 // DeploymentGCInterval is how often we dispatch a job to GC terminal 150 // deployments. 151 DeploymentGCInterval time.Duration 152 153 // DeploymentGCThreshold is how "old" a deployment must be to be eligible 154 // for GC. This gives users some time to view terminal deployments. 155 DeploymentGCThreshold time.Duration 156 157 // EvalNackTimeout controls how long we allow a sub-scheduler to 158 // work on an evaluation before we consider it failed and Nack it. 159 // This allows that evaluation to be handed to another sub-scheduler 160 // to work on. Defaults to 60 seconds. This should be long enough that 161 // no evaluation hits it unless the sub-scheduler has failed. 162 EvalNackTimeout time.Duration 163 164 // EvalDeliveryLimit is the limit of attempts we make to deliver and 165 // process an evaluation. This is used so that an eval that will never 166 // complete eventually fails out of the system. 167 EvalDeliveryLimit int 168 169 // EvalNackInitialReenqueueDelay is the delay applied before reenqueuing a 170 // Nacked evaluation for the first time. This value should be small as the 171 // initial Nack can be due to a down machine and the eval should be retried 172 // quickly for liveliness. 173 EvalNackInitialReenqueueDelay time.Duration 174 175 // EvalNackSubsequentReenqueueDelay is the delay applied before reenqueuing 176 // an evaluation that has been Nacked more than once. This delay is 177 // compounding after the first Nack. This value should be significantly 178 // longer than the initial delay as the purpose it severs is to apply 179 // back-pressure as evaluatiions are being Nacked either due to scheduler 180 // failures or because they are hitting their Nack timeout, both of which 181 // are signs of high server resource usage. 182 EvalNackSubsequentReenqueueDelay time.Duration 183 184 // EvalFailedFollowupBaselineDelay is the minimum time waited before 185 // retrying a failed evaluation. 186 EvalFailedFollowupBaselineDelay time.Duration 187 188 // EvalFailedFollowupDelayRange defines the range of additional time from 189 // the baseline in which to wait before retrying a failed evaluation. The 190 // additional delay is selected from this range randomly. 191 EvalFailedFollowupDelayRange time.Duration 192 193 // MinHeartbeatTTL is the minimum time between heartbeats. 194 // This is used as a floor to prevent excessive updates. 195 MinHeartbeatTTL time.Duration 196 197 // MaxHeartbeatsPerSecond is the maximum target rate of heartbeats 198 // being processed per second. This allows the TTL to be increased 199 // to meet the target rate. 200 MaxHeartbeatsPerSecond float64 201 202 // HeartbeatGrace is the additional time given as a grace period 203 // beyond the TTL to account for network and processing delays 204 // as well as clock skew. 205 HeartbeatGrace time.Duration 206 207 // FailoverHeartbeatTTL is the TTL applied to heartbeats after 208 // a new leader is elected, since we no longer know the status 209 // of all the heartbeats. 210 FailoverHeartbeatTTL time.Duration 211 212 // ConsulConfig is this Agent's Consul configuration 213 ConsulConfig *config.ConsulConfig 214 215 // VaultConfig is this Agent's Vault configuration 216 VaultConfig *config.VaultConfig 217 218 // RPCHoldTimeout is how long an RPC can be "held" before it is errored. 219 // This is used to paper over a loss of leadership by instead holding RPCs, 220 // so that the caller experiences a slow response rather than an error. 221 // This period is meant to be long enough for a leader election to take 222 // place, and a small jitter is applied to avoid a thundering herd. 223 RPCHoldTimeout time.Duration 224 225 // TLSConfig holds various TLS related configurations 226 TLSConfig *config.TLSConfig 227 } 228 229 // CheckVersion is used to check if the ProtocolVersion is valid 230 func (c *Config) CheckVersion() error { 231 if c.ProtocolVersion < ProtocolVersionMin { 232 return fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]", 233 c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax) 234 } else if c.ProtocolVersion > ProtocolVersionMax { 235 return fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]", 236 c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax) 237 } 238 return nil 239 } 240 241 // DefaultConfig returns the default configuration 242 func DefaultConfig() *Config { 243 hostname, err := os.Hostname() 244 if err != nil { 245 panic(err) 246 } 247 248 c := &Config{ 249 Region: DefaultRegion, 250 Datacenter: DefaultDC, 251 NodeName: hostname, 252 ProtocolVersion: ProtocolVersionMax, 253 RaftConfig: raft.DefaultConfig(), 254 RaftTimeout: 10 * time.Second, 255 LogOutput: os.Stderr, 256 RPCAddr: DefaultRPCAddr, 257 SerfConfig: serf.DefaultConfig(), 258 NumSchedulers: 1, 259 ReconcileInterval: 60 * time.Second, 260 EvalGCInterval: 5 * time.Minute, 261 EvalGCThreshold: 1 * time.Hour, 262 JobGCInterval: 5 * time.Minute, 263 JobGCThreshold: 4 * time.Hour, 264 NodeGCInterval: 5 * time.Minute, 265 NodeGCThreshold: 24 * time.Hour, 266 DeploymentGCInterval: 5 * time.Minute, 267 DeploymentGCThreshold: 1 * time.Hour, 268 EvalNackTimeout: 60 * time.Second, 269 EvalDeliveryLimit: 3, 270 EvalNackInitialReenqueueDelay: 1 * time.Second, 271 EvalNackSubsequentReenqueueDelay: 20 * time.Second, 272 EvalFailedFollowupBaselineDelay: 1 * time.Minute, 273 EvalFailedFollowupDelayRange: 5 * time.Minute, 274 MinHeartbeatTTL: 10 * time.Second, 275 MaxHeartbeatsPerSecond: 50.0, 276 HeartbeatGrace: 10 * time.Second, 277 FailoverHeartbeatTTL: 300 * time.Second, 278 ConsulConfig: config.DefaultConsulConfig(), 279 VaultConfig: config.DefaultVaultConfig(), 280 RPCHoldTimeout: 5 * time.Second, 281 TLSConfig: &config.TLSConfig{}, 282 } 283 284 // Enable all known schedulers by default 285 c.EnabledSchedulers = make([]string, 0, len(scheduler.BuiltinSchedulers)) 286 for name := range scheduler.BuiltinSchedulers { 287 c.EnabledSchedulers = append(c.EnabledSchedulers, name) 288 } 289 c.EnabledSchedulers = append(c.EnabledSchedulers, structs.JobTypeCore) 290 291 // Default the number of schedulers to match the coores 292 c.NumSchedulers = runtime.NumCPU() 293 294 // Increase our reap interval to 3 days instead of 24h. 295 c.SerfConfig.ReconnectTimeout = 3 * 24 * time.Hour 296 297 // Serf should use the WAN timing, since we are using it 298 // to communicate between DC's 299 c.SerfConfig.MemberlistConfig = memberlist.DefaultWANConfig() 300 c.SerfConfig.MemberlistConfig.BindPort = DefaultSerfPort 301 302 // Disable shutdown on removal 303 c.RaftConfig.ShutdownOnRemove = false 304 305 // Enable interoperability with unversioned Raft library, and don't 306 // start using new ID-based features yet. 307 c.RaftConfig.ProtocolVersion = 1 308 309 return c 310 } 311 312 // tlsConfig returns a TLSUtil Config based on the server configuration 313 func (c *Config) tlsConfig() *tlsutil.Config { 314 tlsConf := &tlsutil.Config{ 315 VerifyIncoming: true, 316 VerifyOutgoing: true, 317 VerifyServerHostname: c.TLSConfig.VerifyServerHostname, 318 CAFile: c.TLSConfig.CAFile, 319 CertFile: c.TLSConfig.CertFile, 320 KeyFile: c.TLSConfig.KeyFile, 321 } 322 return tlsConf 323 }