github.com/djenriquez/nomad-1@v0.8.1/nomad/config.go

github.com/djenriquez/nomad-1@v0.8.1/nomad/config.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"net"
     7  	"os"
     8  	"runtime"
     9  	"time"
    10  
    11  	"github.com/hashicorp/memberlist"
    12  	"github.com/hashicorp/nomad/helper/tlsutil"
    13  	"github.com/hashicorp/nomad/helper/uuid"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  	"github.com/hashicorp/nomad/nomad/structs/config"
    16  	"github.com/hashicorp/nomad/scheduler"
    17  	"github.com/hashicorp/raft"
    18  	"github.com/hashicorp/serf/serf"
    19  )
    20  
    21  const (
    22  	DefaultRegion   = "global"
    23  	DefaultDC       = "dc1"
    24  	DefaultSerfPort = 4648
    25  )
    26  
    27  // These are the protocol versions that Nomad can understand
    28  const (
    29  	ProtocolVersionMin uint8 = 1
    30  	ProtocolVersionMax       = 1
    31  )
    32  
    33  // ProtocolVersionMap is the mapping of Nomad protocol versions
    34  // to Serf protocol versions. We mask the Serf protocols using
    35  // our own protocol version.
    36  var protocolVersionMap map[uint8]uint8
    37  
    38  func init() {
    39  	protocolVersionMap = map[uint8]uint8{
    40  		1: 4,
    41  	}
    42  }
    43  
    44  var (
    45  	DefaultRPCAddr = &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 4647}
    46  )
    47  
    48  // Config is used to parameterize the server
    49  type Config struct {
    50  	// Bootstrap mode is used to bring up the first Nomad server.  It is
    51  	// required so that it can elect a leader without any other nodes
    52  	// being present
    53  	Bootstrap bool
    54  
    55  	// BootstrapExpect mode is used to automatically bring up a
    56  	// collection of Nomad servers. This can be used to automatically
    57  	// bring up a collection of nodes.  All operations on BootstrapExpect
    58  	// must be handled via `atomic.*Int32()` calls.
    59  	BootstrapExpect int32
    60  
    61  	// DataDir is the directory to store our state in
    62  	DataDir string
    63  
    64  	// DevMode is used for development purposes only and limits the
    65  	// use of persistence or state.
    66  	DevMode bool
    67  
    68  	// DevDisableBootstrap is used to disable bootstrap mode while
    69  	// in DevMode. This is largely used for testing.
    70  	DevDisableBootstrap bool
    71  
    72  	// LogOutput is the location to write logs to. If this is not set,
    73  	// logs will go to stderr.
    74  	LogOutput io.Writer
    75  
    76  	// ProtocolVersion is the protocol version to speak. This must be between
    77  	// ProtocolVersionMin and ProtocolVersionMax.
    78  	ProtocolVersion uint8
    79  
    80  	// RPCAddr is the RPC address used by Nomad. This should be reachable
    81  	// by the other servers and clients
    82  	RPCAddr *net.TCPAddr
    83  
    84  	// ClientRPCAdvertise is the address that is advertised to client nodes for
    85  	// the RPC endpoint. This can differ from the RPC address, if for example
    86  	// the RPCAddr is unspecified "0.0.0.0:4646", but this address must be
    87  	// reachable
    88  	ClientRPCAdvertise *net.TCPAddr
    89  
    90  	// ServerRPCAdvertise is the address that is advertised to other servers for
    91  	// the RPC endpoint. This can differ from the RPC address, if for example
    92  	// the RPCAddr is unspecified "0.0.0.0:4646", but this address must be
    93  	// reachable
    94  	ServerRPCAdvertise *net.TCPAddr
    95  
    96  	// RaftConfig is the configuration used for Raft in the local DC
    97  	RaftConfig *raft.Config
    98  
    99  	// RaftTimeout is applied to any network traffic for raft. Defaults to 10s.
   100  	RaftTimeout time.Duration
   101  
   102  	// (Enterprise-only) NonVoter is used to prevent this server from being added
   103  	// as a voting member of the Raft cluster.
   104  	NonVoter bool
   105  
   106  	// (Enterprise-only) RedundancyZone is the redundancy zone to use for this server.
   107  	RedundancyZone string
   108  
   109  	// (Enterprise-only) UpgradeVersion is the custom upgrade version to use when
   110  	// performing upgrade migrations.
   111  	UpgradeVersion string
   112  
   113  	// SerfConfig is the configuration for the serf cluster
   114  	SerfConfig *serf.Config
   115  
   116  	// Node name is the name we use to advertise. Defaults to hostname.
   117  	NodeName string
   118  
   119  	// NodeID is the uuid of this server.
   120  	NodeID string
   121  
   122  	// Region is the region this Nomad server belongs to.
   123  	Region string
   124  
   125  	// AuthoritativeRegion is the region which is treated as the authoritative source
   126  	// for ACLs and Policies. This provides a single source of truth to resolve conflicts.
   127  	AuthoritativeRegion string
   128  
   129  	// Datacenter is the datacenter this Nomad server belongs to.
   130  	Datacenter string
   131  
   132  	// Build is a string that is gossiped around, and can be used to help
   133  	// operators track which versions are actively deployed
   134  	Build string
   135  
   136  	// NumSchedulers is the number of scheduler thread that are run.
   137  	// This can be as many as one per core, or zero to disable this server
   138  	// from doing any scheduling work.
   139  	NumSchedulers int
   140  
   141  	// EnabledSchedulers controls the set of sub-schedulers that are
   142  	// enabled for this server to handle. This will restrict the evaluations
   143  	// that the workers dequeue for processing.
   144  	EnabledSchedulers []string
   145  
   146  	// ReconcileInterval controls how often we reconcile the strongly
   147  	// consistent store with the Serf info. This is used to handle nodes
   148  	// that are force removed, as well as intermittent unavailability during
   149  	// leader election.
   150  	ReconcileInterval time.Duration
   151  
   152  	// EvalGCInterval is how often we dispatch a job to GC evaluations
   153  	EvalGCInterval time.Duration
   154  
   155  	// EvalGCThreshold is how "old" an evaluation must be to be eligible
   156  	// for GC. This gives users some time to debug a failed evaluation.
   157  	EvalGCThreshold time.Duration
   158  
   159  	// JobGCInterval is how often we dispatch a job to GC jobs that are
   160  	// available for garbage collection.
   161  	JobGCInterval time.Duration
   162  
   163  	// JobGCThreshold is how old a job must be before it eligible for GC. This gives
   164  	// the user time to inspect the job.
   165  	JobGCThreshold time.Duration
   166  
   167  	// NodeGCInterval is how often we dispatch a job to GC failed nodes.
   168  	NodeGCInterval time.Duration
   169  
   170  	// NodeGCThreshold is how "old" a node must be to be eligible
   171  	// for GC. This gives users some time to view and debug a failed nodes.
   172  	NodeGCThreshold time.Duration
   173  
   174  	// DeploymentGCInterval is how often we dispatch a job to GC terminal
   175  	// deployments.
   176  	DeploymentGCInterval time.Duration
   177  
   178  	// DeploymentGCThreshold is how "old" a deployment must be to be eligible
   179  	// for GC. This gives users some time to view terminal deployments.
   180  	DeploymentGCThreshold time.Duration
   181  
   182  	// EvalNackTimeout controls how long we allow a sub-scheduler to
   183  	// work on an evaluation before we consider it failed and Nack it.
   184  	// This allows that evaluation to be handed to another sub-scheduler
   185  	// to work on. Defaults to 60 seconds. This should be long enough that
   186  	// no evaluation hits it unless the sub-scheduler has failed.
   187  	EvalNackTimeout time.Duration
   188  
   189  	// EvalDeliveryLimit is the limit of attempts we make to deliver and
   190  	// process an evaluation. This is used so that an eval that will never
   191  	// complete eventually fails out of the system.
   192  	EvalDeliveryLimit int
   193  
   194  	// EvalNackInitialReenqueueDelay is the delay applied before reenqueuing a
   195  	// Nacked evaluation for the first time. This value should be small as the
   196  	// initial Nack can be due to a down machine and the eval should be retried
   197  	// quickly for liveliness.
   198  	EvalNackInitialReenqueueDelay time.Duration
   199  
   200  	// EvalNackSubsequentReenqueueDelay is the delay applied before reenqueuing
   201  	// an evaluation that has been Nacked more than once. This delay is
   202  	// compounding after the first Nack. This value should be significantly
   203  	// longer than the initial delay as the purpose it severs is to apply
   204  	// back-pressure as evaluations are being Nacked either due to scheduler
   205  	// failures or because they are hitting their Nack timeout, both of which
   206  	// are signs of high server resource usage.
   207  	EvalNackSubsequentReenqueueDelay time.Duration
   208  
   209  	// EvalFailedFollowupBaselineDelay is the minimum time waited before
   210  	// retrying a failed evaluation.
   211  	EvalFailedFollowupBaselineDelay time.Duration
   212  
   213  	// EvalFailedFollowupDelayRange defines the range of additional time from
   214  	// the baseline in which to wait before retrying a failed evaluation. The
   215  	// additional delay is selected from this range randomly.
   216  	EvalFailedFollowupDelayRange time.Duration
   217  
   218  	// MinHeartbeatTTL is the minimum time between heartbeats.
   219  	// This is used as a floor to prevent excessive updates.
   220  	MinHeartbeatTTL time.Duration
   221  
   222  	// MaxHeartbeatsPerSecond is the maximum target rate of heartbeats
   223  	// being processed per second. This allows the TTL to be increased
   224  	// to meet the target rate.
   225  	MaxHeartbeatsPerSecond float64
   226  
   227  	// HeartbeatGrace is the additional time given as a grace period
   228  	// beyond the TTL to account for network and processing delays
   229  	// as well as clock skew.
   230  	HeartbeatGrace time.Duration
   231  
   232  	// FailoverHeartbeatTTL is the TTL applied to heartbeats after
   233  	// a new leader is elected, since we no longer know the status
   234  	// of all the heartbeats.
   235  	FailoverHeartbeatTTL time.Duration
   236  
   237  	// ConsulConfig is this Agent's Consul configuration
   238  	ConsulConfig *config.ConsulConfig
   239  
   240  	// VaultConfig is this Agent's Vault configuration
   241  	VaultConfig *config.VaultConfig
   242  
   243  	// RPCHoldTimeout is how long an RPC can be "held" before it is errored.
   244  	// This is used to paper over a loss of leadership by instead holding RPCs,
   245  	// so that the caller experiences a slow response rather than an error.
   246  	// This period is meant to be long enough for a leader election to take
   247  	// place, and a small jitter is applied to avoid a thundering herd.
   248  	RPCHoldTimeout time.Duration
   249  
   250  	// TLSConfig holds various TLS related configurations
   251  	TLSConfig *config.TLSConfig
   252  
   253  	// ACLEnabled controls if ACL enforcement and management is enabled.
   254  	ACLEnabled bool
   255  
   256  	// ReplicationBackoff is how much we backoff when replication errors.
   257  	// This is a tunable knob for testing primarily.
   258  	ReplicationBackoff time.Duration
   259  
   260  	// ReplicationToken is the ACL Token Secret ID used to fetch from
   261  	// the Authoritative Region.
   262  	ReplicationToken string
   263  
   264  	// SentinelGCInterval is the interval that we GC unused policies.
   265  	SentinelGCInterval time.Duration
   266  
   267  	// SentinelConfig is this Agent's Sentinel configuration
   268  	SentinelConfig *config.SentinelConfig
   269  
   270  	// StatsCollectionInterval is the interval at which the Nomad server
   271  	// publishes metrics which are periodic in nature like updating gauges
   272  	StatsCollectionInterval time.Duration
   273  
   274  	// DisableTaggedMetrics determines whether metrics will be displayed via a
   275  	// key/value/tag format, or simply a key/value format
   276  	DisableTaggedMetrics bool
   277  
   278  	// BackwardsCompatibleMetrics determines whether to show methods of
   279  	// displaying metrics for older versions, or to only show the new format
   280  	BackwardsCompatibleMetrics bool
   281  
   282  	// AutopilotConfig is used to apply the initial autopilot config when
   283  	// bootstrapping.
   284  	AutopilotConfig *structs.AutopilotConfig
   285  
   286  	// ServerHealthInterval is the frequency with which the health of the
   287  	// servers in the cluster will be updated.
   288  	ServerHealthInterval time.Duration
   289  
   290  	// AutopilotInterval is the frequency with which the leader will perform
   291  	// autopilot tasks, such as promoting eligible non-voters and removing
   292  	// dead servers.
   293  	AutopilotInterval time.Duration
   294  }
   295  
   296  // CheckVersion is used to check if the ProtocolVersion is valid
   297  func (c *Config) CheckVersion() error {
   298  	if c.ProtocolVersion < ProtocolVersionMin {
   299  		return fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]",
   300  			c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
   301  	} else if c.ProtocolVersion > ProtocolVersionMax {
   302  		return fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]",
   303  			c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
   304  	}
   305  	return nil
   306  }
   307  
   308  // DefaultConfig returns the default configuration
   309  func DefaultConfig() *Config {
   310  	hostname, err := os.Hostname()
   311  	if err != nil {
   312  		panic(err)
   313  	}
   314  
   315  	c := &Config{
   316  		Region:                           DefaultRegion,
   317  		AuthoritativeRegion:              DefaultRegion,
   318  		Datacenter:                       DefaultDC,
   319  		NodeName:                         hostname,
   320  		NodeID:                           uuid.Generate(),
   321  		ProtocolVersion:                  ProtocolVersionMax,
   322  		RaftConfig:                       raft.DefaultConfig(),
   323  		RaftTimeout:                      10 * time.Second,
   324  		LogOutput:                        os.Stderr,
   325  		RPCAddr:                          DefaultRPCAddr,
   326  		SerfConfig:                       serf.DefaultConfig(),
   327  		NumSchedulers:                    1,
   328  		ReconcileInterval:                60 * time.Second,
   329  		EvalGCInterval:                   5 * time.Minute,
   330  		EvalGCThreshold:                  1 * time.Hour,
   331  		JobGCInterval:                    5 * time.Minute,
   332  		JobGCThreshold:                   4 * time.Hour,
   333  		NodeGCInterval:                   5 * time.Minute,
   334  		NodeGCThreshold:                  24 * time.Hour,
   335  		DeploymentGCInterval:             5 * time.Minute,
   336  		DeploymentGCThreshold:            1 * time.Hour,
   337  		EvalNackTimeout:                  60 * time.Second,
   338  		EvalDeliveryLimit:                3,
   339  		EvalNackInitialReenqueueDelay:    1 * time.Second,
   340  		EvalNackSubsequentReenqueueDelay: 20 * time.Second,
   341  		EvalFailedFollowupBaselineDelay:  1 * time.Minute,
   342  		EvalFailedFollowupDelayRange:     5 * time.Minute,
   343  		MinHeartbeatTTL:                  10 * time.Second,
   344  		MaxHeartbeatsPerSecond:           50.0,
   345  		HeartbeatGrace:                   10 * time.Second,
   346  		FailoverHeartbeatTTL:             300 * time.Second,
   347  		ConsulConfig:                     config.DefaultConsulConfig(),
   348  		VaultConfig:                      config.DefaultVaultConfig(),
   349  		RPCHoldTimeout:                   5 * time.Second,
   350  		StatsCollectionInterval:          1 * time.Minute,
   351  		TLSConfig:                        &config.TLSConfig{},
   352  		ReplicationBackoff:               30 * time.Second,
   353  		SentinelGCInterval:               30 * time.Second,
   354  		AutopilotConfig: &structs.AutopilotConfig{
   355  			CleanupDeadServers:      true,
   356  			LastContactThreshold:    200 * time.Millisecond,
   357  			MaxTrailingLogs:         250,
   358  			ServerStabilizationTime: 10 * time.Second,
   359  		},
   360  		ServerHealthInterval: 2 * time.Second,
   361  		AutopilotInterval:    10 * time.Second,
   362  	}
   363  
   364  	// Enable all known schedulers by default
   365  	c.EnabledSchedulers = make([]string, 0, len(scheduler.BuiltinSchedulers))
   366  	for name := range scheduler.BuiltinSchedulers {
   367  		c.EnabledSchedulers = append(c.EnabledSchedulers, name)
   368  	}
   369  	c.EnabledSchedulers = append(c.EnabledSchedulers, structs.JobTypeCore)
   370  
   371  	// Default the number of schedulers to match the cores
   372  	c.NumSchedulers = runtime.NumCPU()
   373  
   374  	// Increase our reap interval to 3 days instead of 24h.
   375  	c.SerfConfig.ReconnectTimeout = 3 * 24 * time.Hour
   376  
   377  	// Serf should use the WAN timing, since we are using it
   378  	// to communicate between DC's
   379  	c.SerfConfig.MemberlistConfig = memberlist.DefaultWANConfig()
   380  	c.SerfConfig.MemberlistConfig.BindPort = DefaultSerfPort
   381  
   382  	// Disable shutdown on removal
   383  	c.RaftConfig.ShutdownOnRemove = false
   384  
   385  	// Enable interoperability with new raft APIs, requires all servers
   386  	// to be on raft v1 or higher.
   387  	c.RaftConfig.ProtocolVersion = 2
   388  
   389  	return c
   390  }
   391  
   392  // tlsConfig returns a TLSUtil Config based on the server configuration
   393  func (c *Config) tlsConfig() *tlsutil.Config {
   394  	return &tlsutil.Config{
   395  		VerifyIncoming:       true,
   396  		VerifyOutgoing:       true,
   397  		VerifyServerHostname: c.TLSConfig.VerifyServerHostname,
   398  		CAFile:               c.TLSConfig.CAFile,
   399  		CertFile:             c.TLSConfig.CertFile,
   400  		KeyFile:              c.TLSConfig.KeyFile,
   401  		KeyLoader:            c.TLSConfig.GetKeyLoader(),
   402  	}
   403  }