github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/nomad/config.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"net"
     7  	"os"
     8  	"runtime"
     9  	"time"
    10  
    11  	"github.com/hashicorp/memberlist"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  	"github.com/hashicorp/nomad/scheduler"
    14  	"github.com/hashicorp/raft"
    15  	"github.com/hashicorp/serf/serf"
    16  )
    17  
    18  const (
    19  	DefaultRegion   = "global"
    20  	DefaultDC       = "dc1"
    21  	DefaultSerfPort = 4648
    22  )
    23  
    24  // These are the protocol versions that Nomad can understand
    25  const (
    26  	ProtocolVersionMin uint8 = 1
    27  	ProtocolVersionMax       = 1
    28  )
    29  
    30  // ProtocolVersionMap is the mapping of Nomad protocol versions
    31  // to Serf protocol versions. We mask the Serf protocols using
    32  // our own protocol version.
    33  var protocolVersionMap map[uint8]uint8
    34  
    35  func init() {
    36  	protocolVersionMap = map[uint8]uint8{
    37  		1: 4,
    38  	}
    39  }
    40  
    41  var (
    42  	DefaultRPCAddr = &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 4647}
    43  )
    44  
    45  // Config is used to parameterize the server
    46  type Config struct {
    47  	// Bootstrap mode is used to bring up the first Consul server.
    48  	// It is required so that it can elect a leader without any
    49  	// other nodes being present
    50  	Bootstrap bool
    51  
    52  	// BootstrapExpect mode is used to automatically bring up a collection of
    53  	// Consul servers. This can be used to automatically bring up a collection
    54  	// of nodes.
    55  	BootstrapExpect int
    56  
    57  	// DataDir is the directory to store our state in
    58  	DataDir string
    59  
    60  	// DevMode is used for development purposes only and limits the
    61  	// use of persistence or state.
    62  	DevMode bool
    63  
    64  	// DevDisableBootstrap is used to disable bootstrap mode while
    65  	// in DevMode. This is largely used for testing.
    66  	DevDisableBootstrap bool
    67  
    68  	// LogOutput is the location to write logs to. If this is not set,
    69  	// logs will go to stderr.
    70  	LogOutput io.Writer
    71  
    72  	// ProtocolVersion is the protocol version to speak. This must be between
    73  	// ProtocolVersionMin and ProtocolVersionMax.
    74  	ProtocolVersion uint8
    75  
    76  	// RPCAddr is the RPC address used by Nomad. This should be reachable
    77  	// by the other servers and clients
    78  	RPCAddr *net.TCPAddr
    79  
    80  	// RPCAdvertise is the address that is advertised to other nodes for
    81  	// the RPC endpoint. This can differ from the RPC address, if for example
    82  	// the RPCAddr is unspecified "0.0.0.0:4646", but this address must be
    83  	// reachable
    84  	RPCAdvertise *net.TCPAddr
    85  
    86  	// RaftConfig is the configuration used for Raft in the local DC
    87  	RaftConfig *raft.Config
    88  
    89  	// RaftTimeout is applied to any network traffic for raft. Defaults to 10s.
    90  	RaftTimeout time.Duration
    91  
    92  	// RequireTLS ensures that all RPC traffic is protected with TLS
    93  	RequireTLS bool
    94  
    95  	// SerfConfig is the configuration for the serf cluster
    96  	SerfConfig *serf.Config
    97  
    98  	// Node name is the name we use to advertise. Defaults to hostname.
    99  	NodeName string
   100  
   101  	// Region is the region this Nomad server belongs to.
   102  	Region string
   103  
   104  	// Datacenter is the datacenter this Nomad server belongs to.
   105  	Datacenter string
   106  
   107  	// Build is a string that is gossiped around, and can be used to help
   108  	// operators track which versions are actively deployed
   109  	Build string
   110  
   111  	// NumSchedulers is the number of scheduler thread that are run.
   112  	// This can be as many as one per core, or zero to disable this server
   113  	// from doing any scheduling work.
   114  	NumSchedulers int
   115  
   116  	// EnabledSchedulers controls the set of sub-schedulers that are
   117  	// enabled for this server to handle. This will restrict the evaluations
   118  	// that the workers dequeue for processing.
   119  	EnabledSchedulers []string
   120  
   121  	// ReconcileInterval controls how often we reconcile the strongly
   122  	// consistent store with the Serf info. This is used to handle nodes
   123  	// that are force removed, as well as intermittent unavailability during
   124  	// leader election.
   125  	ReconcileInterval time.Duration
   126  
   127  	// EvalGCInterval is how often we dispatch a job to GC evaluations
   128  	EvalGCInterval time.Duration
   129  
   130  	// EvalGCThreshold is how "old" an evaluation must be to be eligible
   131  	// for GC. This gives users some time to debug a failed evaluation.
   132  	EvalGCThreshold time.Duration
   133  
   134  	// NodeGCInterval is how often we dispatch a job to GC failed nodes.
   135  	NodeGCInterval time.Duration
   136  
   137  	// NodeGCThreshold is how "old" a nodemust be to be eligible
   138  	// for GC. This gives users some time to view and debug a failed nodes.
   139  	NodeGCThreshold time.Duration
   140  
   141  	// EvalNackTimeout controls how long we allow a sub-scheduler to
   142  	// work on an evaluation before we consider it failed and Nack it.
   143  	// This allows that evaluation to be handed to another sub-scheduler
   144  	// to work on. Defaults to 60 seconds. This should be long enough that
   145  	// no evaluation hits it unless the sub-scheduler has failed.
   146  	EvalNackTimeout time.Duration
   147  
   148  	// EvalDeliveryLimit is the limit of attempts we make to deliver and
   149  	// process an evaluation. This is used so that an eval that will never
   150  	// complete eventually fails out of the system.
   151  	EvalDeliveryLimit int
   152  
   153  	// MinHeartbeatTTL is the minimum time between heartbeats.
   154  	// This is used as a floor to prevent excessive updates.
   155  	MinHeartbeatTTL time.Duration
   156  
   157  	// MaxHeartbeatsPerSecond is the maximum target rate of heartbeats
   158  	// being processed per second. This allows the TTL to be increased
   159  	// to meet the target rate.
   160  	MaxHeartbeatsPerSecond float64
   161  
   162  	// HeartbeatGrace is the additional time given as a grace period
   163  	// beyond the TTL to account for network and processing delays
   164  	// as well as clock skew.
   165  	HeartbeatGrace time.Duration
   166  
   167  	// FailoverHeartbeatTTL is the TTL applied to heartbeats after
   168  	// a new leader is elected, since we no longer know the status
   169  	// of all the heartbeats.
   170  	FailoverHeartbeatTTL time.Duration
   171  }
   172  
   173  // CheckVersion is used to check if the ProtocolVersion is valid
   174  func (c *Config) CheckVersion() error {
   175  	if c.ProtocolVersion < ProtocolVersionMin {
   176  		return fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]",
   177  			c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
   178  	} else if c.ProtocolVersion > ProtocolVersionMax {
   179  		return fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]",
   180  			c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
   181  	}
   182  	return nil
   183  }
   184  
   185  // DefaultConfig returns the default configuration
   186  func DefaultConfig() *Config {
   187  	hostname, err := os.Hostname()
   188  	if err != nil {
   189  		panic(err)
   190  	}
   191  
   192  	c := &Config{
   193  		Region:                 DefaultRegion,
   194  		Datacenter:             DefaultDC,
   195  		NodeName:               hostname,
   196  		ProtocolVersion:        ProtocolVersionMax,
   197  		RaftConfig:             raft.DefaultConfig(),
   198  		RaftTimeout:            10 * time.Second,
   199  		RPCAddr:                DefaultRPCAddr,
   200  		SerfConfig:             serf.DefaultConfig(),
   201  		NumSchedulers:          1,
   202  		ReconcileInterval:      60 * time.Second,
   203  		EvalGCInterval:         5 * time.Minute,
   204  		EvalGCThreshold:        1 * time.Hour,
   205  		NodeGCInterval:         5 * time.Minute,
   206  		NodeGCThreshold:        24 * time.Hour,
   207  		EvalNackTimeout:        60 * time.Second,
   208  		EvalDeliveryLimit:      3,
   209  		MinHeartbeatTTL:        10 * time.Second,
   210  		MaxHeartbeatsPerSecond: 50.0,
   211  		HeartbeatGrace:         10 * time.Second,
   212  		FailoverHeartbeatTTL:   300 * time.Second,
   213  	}
   214  
   215  	// Enable all known schedulers by default
   216  	c.EnabledSchedulers = make([]string, 0, len(scheduler.BuiltinSchedulers))
   217  	for name := range scheduler.BuiltinSchedulers {
   218  		c.EnabledSchedulers = append(c.EnabledSchedulers, name)
   219  	}
   220  	c.EnabledSchedulers = append(c.EnabledSchedulers, structs.JobTypeCore)
   221  
   222  	// Default the number of schedulers to match the coores
   223  	c.NumSchedulers = runtime.NumCPU()
   224  
   225  	// Increase our reap interval to 3 days instead of 24h.
   226  	c.SerfConfig.ReconnectTimeout = 3 * 24 * time.Hour
   227  
   228  	// Serf should use the WAN timing, since we are using it
   229  	// to communicate between DC's
   230  	c.SerfConfig.MemberlistConfig = memberlist.DefaultWANConfig()
   231  	c.SerfConfig.MemberlistConfig.BindPort = DefaultSerfPort
   232  
   233  	// Disable shutdown on removal
   234  	c.RaftConfig.ShutdownOnRemove = false
   235  	return c
   236  }