github.com/adityamillind98/nomad@v0.11.8/nomad/config.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"net"
     7  	"os"
     8  	"runtime"
     9  	"time"
    10  
    11  	log "github.com/hashicorp/go-hclog"
    12  
    13  	"github.com/hashicorp/memberlist"
    14  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    15  	"github.com/hashicorp/nomad/helper/uuid"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/nomad/structs/config"
    18  	"github.com/hashicorp/nomad/scheduler"
    19  	"github.com/hashicorp/raft"
    20  	"github.com/hashicorp/serf/serf"
    21  )
    22  
    23  const (
    24  	DefaultRegion   = "global"
    25  	DefaultDC       = "dc1"
    26  	DefaultSerfPort = 4648
    27  )
    28  
    29  // These are the protocol versions that Nomad can understand
    30  const (
    31  	ProtocolVersionMin uint8 = 1
    32  	ProtocolVersionMax       = 1
    33  )
    34  
    35  // ProtocolVersionMap is the mapping of Nomad protocol versions
    36  // to Serf protocol versions. We mask the Serf protocols using
    37  // our own protocol version.
    38  var protocolVersionMap map[uint8]uint8
    39  
    40  func init() {
    41  	protocolVersionMap = map[uint8]uint8{
    42  		1: 4,
    43  	}
    44  }
    45  
    46  func DefaultRPCAddr() *net.TCPAddr {
    47  	return &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 4647}
    48  }
    49  
    50  // Config is used to parameterize the server
    51  type Config struct {
    52  	// Bootstrapped indicates if Server has bootstrapped or not.
    53  	// Its value must be 0 (not bootstrapped) or 1 (bootstrapped).
    54  	// All operations on Bootstrapped must be handled via `atomic.*Int32()` calls
    55  	Bootstrapped int32
    56  
    57  	// BootstrapExpect mode is used to automatically bring up a
    58  	// collection of Nomad servers. This can be used to automatically
    59  	// bring up a collection of nodes.
    60  	//
    61  	// The BootstrapExpect can be of any of the following values:
    62  	//  1: Server will form a single node cluster and become a leader immediately
    63  	//  N, larger than 1: Server will wait until it's connected to N servers
    64  	//      before attempting leadership and forming the cluster.  No Raft Log operation
    65  	//      will succeed until then.
    66  	//  0: Server will wait to get a Raft configuration from another node and may not
    67  	//      attempt to form a cluster or establish leadership on its own.
    68  	BootstrapExpect int
    69  
    70  	// DataDir is the directory to store our state in
    71  	DataDir string
    72  
    73  	// DevMode is used for development purposes only and limits the
    74  	// use of persistence or state.
    75  	DevMode bool
    76  
    77  	// EnableDebug is used to enable debugging RPC endpoints
    78  	// in the absence of ACLs
    79  	EnableDebug bool
    80  
    81  	// LogOutput is the location to write logs to. If this is not set,
    82  	// logs will go to stderr.
    83  	LogOutput io.Writer
    84  
    85  	// Logger is the logger used by the server.
    86  	Logger log.InterceptLogger
    87  
    88  	// ProtocolVersion is the protocol version to speak. This must be between
    89  	// ProtocolVersionMin and ProtocolVersionMax.
    90  	ProtocolVersion uint8
    91  
    92  	// RPCAddr is the RPC address used by Nomad. This should be reachable
    93  	// by the other servers and clients
    94  	RPCAddr *net.TCPAddr
    95  
    96  	// ClientRPCAdvertise is the address that is advertised to client nodes for
    97  	// the RPC endpoint. This can differ from the RPC address, if for example
    98  	// the RPCAddr is unspecified "0.0.0.0:4646", but this address must be
    99  	// reachable
   100  	ClientRPCAdvertise *net.TCPAddr
   101  
   102  	// ServerRPCAdvertise is the address that is advertised to other servers for
   103  	// the RPC endpoint. This can differ from the RPC address, if for example
   104  	// the RPCAddr is unspecified "0.0.0.0:4646", but this address must be
   105  	// reachable
   106  	ServerRPCAdvertise *net.TCPAddr
   107  
   108  	// RaftConfig is the configuration used for Raft in the local DC
   109  	RaftConfig *raft.Config
   110  
   111  	// RaftTimeout is applied to any network traffic for raft. Defaults to 10s.
   112  	RaftTimeout time.Duration
   113  
   114  	// (Enterprise-only) NonVoter is used to prevent this server from being added
   115  	// as a voting member of the Raft cluster.
   116  	NonVoter bool
   117  
   118  	// (Enterprise-only) RedundancyZone is the redundancy zone to use for this server.
   119  	RedundancyZone string
   120  
   121  	// (Enterprise-only) UpgradeVersion is the custom upgrade version to use when
   122  	// performing upgrade migrations.
   123  	UpgradeVersion string
   124  
   125  	// SerfConfig is the configuration for the serf cluster
   126  	SerfConfig *serf.Config
   127  
   128  	// Node name is the name we use to advertise. Defaults to hostname.
   129  	NodeName string
   130  
   131  	// NodeID is the uuid of this server.
   132  	NodeID string
   133  
   134  	// Region is the region this Nomad server belongs to.
   135  	Region string
   136  
   137  	// AuthoritativeRegion is the region which is treated as the authoritative source
   138  	// for ACLs and Policies. This provides a single source of truth to resolve conflicts.
   139  	AuthoritativeRegion string
   140  
   141  	// Datacenter is the datacenter this Nomad server belongs to.
   142  	Datacenter string
   143  
   144  	// Build is a string that is gossiped around, and can be used to help
   145  	// operators track which versions are actively deployed
   146  	Build string
   147  
   148  	// NumSchedulers is the number of scheduler thread that are run.
   149  	// This can be as many as one per core, or zero to disable this server
   150  	// from doing any scheduling work.
   151  	NumSchedulers int
   152  
   153  	// EnabledSchedulers controls the set of sub-schedulers that are
   154  	// enabled for this server to handle. This will restrict the evaluations
   155  	// that the workers dequeue for processing.
   156  	EnabledSchedulers []string
   157  
   158  	// ReconcileInterval controls how often we reconcile the strongly
   159  	// consistent store with the Serf info. This is used to handle nodes
   160  	// that are force removed, as well as intermittent unavailability during
   161  	// leader election.
   162  	ReconcileInterval time.Duration
   163  
   164  	// EvalGCInterval is how often we dispatch a job to GC evaluations
   165  	EvalGCInterval time.Duration
   166  
   167  	// EvalGCThreshold is how "old" an evaluation must be to be eligible
   168  	// for GC. This gives users some time to debug a failed evaluation.
   169  	EvalGCThreshold time.Duration
   170  
   171  	// JobGCInterval is how often we dispatch a job to GC jobs that are
   172  	// available for garbage collection.
   173  	JobGCInterval time.Duration
   174  
   175  	// JobGCThreshold is how old a job must be before it eligible for GC. This gives
   176  	// the user time to inspect the job.
   177  	JobGCThreshold time.Duration
   178  
   179  	// NodeGCInterval is how often we dispatch a job to GC failed nodes.
   180  	NodeGCInterval time.Duration
   181  
   182  	// NodeGCThreshold is how "old" a node must be to be eligible
   183  	// for GC. This gives users some time to view and debug a failed nodes.
   184  	NodeGCThreshold time.Duration
   185  
   186  	// DeploymentGCInterval is how often we dispatch a job to GC terminal
   187  	// deployments.
   188  	DeploymentGCInterval time.Duration
   189  
   190  	// DeploymentGCThreshold is how "old" a deployment must be to be eligible
   191  	// for GC. This gives users some time to view terminal deployments.
   192  	DeploymentGCThreshold time.Duration
   193  
   194  	// CSIPluginGCInterval is how often we dispatch a job to GC unused plugins.
   195  	CSIPluginGCInterval time.Duration
   196  
   197  	// CSIPluginGCThreshold is how "old" a plugin must be to be eligible
   198  	// for GC. This gives users some time to debug plugins.
   199  	CSIPluginGCThreshold time.Duration
   200  
   201  	// CSIVolumeClaimGCInterval is how often we dispatch a job to GC
   202  	// volume claims.
   203  	CSIVolumeClaimGCInterval time.Duration
   204  
   205  	// CSIVolumeClaimGCThreshold is how "old" a volume must be to be
   206  	// eligible for GC. This gives users some time to debug volumes.
   207  	CSIVolumeClaimGCThreshold time.Duration
   208  
   209  	// EvalNackTimeout controls how long we allow a sub-scheduler to
   210  	// work on an evaluation before we consider it failed and Nack it.
   211  	// This allows that evaluation to be handed to another sub-scheduler
   212  	// to work on. Defaults to 60 seconds. This should be long enough that
   213  	// no evaluation hits it unless the sub-scheduler has failed.
   214  	EvalNackTimeout time.Duration
   215  
   216  	// EvalDeliveryLimit is the limit of attempts we make to deliver and
   217  	// process an evaluation. This is used so that an eval that will never
   218  	// complete eventually fails out of the system.
   219  	EvalDeliveryLimit int
   220  
   221  	// EvalNackInitialReenqueueDelay is the delay applied before reenqueuing a
   222  	// Nacked evaluation for the first time. This value should be small as the
   223  	// initial Nack can be due to a down machine and the eval should be retried
   224  	// quickly for liveliness.
   225  	EvalNackInitialReenqueueDelay time.Duration
   226  
   227  	// EvalNackSubsequentReenqueueDelay is the delay applied before reenqueuing
   228  	// an evaluation that has been Nacked more than once. This delay is
   229  	// compounding after the first Nack. This value should be significantly
   230  	// longer than the initial delay as the purpose it severs is to apply
   231  	// back-pressure as evaluations are being Nacked either due to scheduler
   232  	// failures or because they are hitting their Nack timeout, both of which
   233  	// are signs of high server resource usage.
   234  	EvalNackSubsequentReenqueueDelay time.Duration
   235  
   236  	// EvalFailedFollowupBaselineDelay is the minimum time waited before
   237  	// retrying a failed evaluation.
   238  	EvalFailedFollowupBaselineDelay time.Duration
   239  
   240  	// EvalFailedFollowupDelayRange defines the range of additional time from
   241  	// the baseline in which to wait before retrying a failed evaluation. The
   242  	// additional delay is selected from this range randomly.
   243  	EvalFailedFollowupDelayRange time.Duration
   244  
   245  	// MinHeartbeatTTL is the minimum time between heartbeats.
   246  	// This is used as a floor to prevent excessive updates.
   247  	MinHeartbeatTTL time.Duration
   248  
   249  	// MaxHeartbeatsPerSecond is the maximum target rate of heartbeats
   250  	// being processed per second. This allows the TTL to be increased
   251  	// to meet the target rate.
   252  	MaxHeartbeatsPerSecond float64
   253  
   254  	// HeartbeatGrace is the additional time given as a grace period
   255  	// beyond the TTL to account for network and processing delays
   256  	// as well as clock skew.
   257  	HeartbeatGrace time.Duration
   258  
   259  	// FailoverHeartbeatTTL is the TTL applied to heartbeats after
   260  	// a new leader is elected, since we no longer know the status
   261  	// of all the heartbeats.
   262  	FailoverHeartbeatTTL time.Duration
   263  
   264  	// ConsulConfig is this Agent's Consul configuration
   265  	ConsulConfig *config.ConsulConfig
   266  
   267  	// VaultConfig is this Agent's Vault configuration
   268  	VaultConfig *config.VaultConfig
   269  
   270  	// RPCHoldTimeout is how long an RPC can be "held" before it is errored.
   271  	// This is used to paper over a loss of leadership by instead holding RPCs,
   272  	// so that the caller experiences a slow response rather than an error.
   273  	// This period is meant to be long enough for a leader election to take
   274  	// place, and a small jitter is applied to avoid a thundering herd.
   275  	RPCHoldTimeout time.Duration
   276  
   277  	// TLSConfig holds various TLS related configurations
   278  	TLSConfig *config.TLSConfig
   279  
   280  	// ACLEnabled controls if ACL enforcement and management is enabled.
   281  	ACLEnabled bool
   282  
   283  	// ReplicationBackoff is how much we backoff when replication errors.
   284  	// This is a tunable knob for testing primarily.
   285  	ReplicationBackoff time.Duration
   286  
   287  	// ReplicationToken is the ACL Token Secret ID used to fetch from
   288  	// the Authoritative Region.
   289  	ReplicationToken string
   290  
   291  	// SentinelGCInterval is the interval that we GC unused policies.
   292  	SentinelGCInterval time.Duration
   293  
   294  	// SentinelConfig is this Agent's Sentinel configuration
   295  	SentinelConfig *config.SentinelConfig
   296  
   297  	// StatsCollectionInterval is the interval at which the Nomad server
   298  	// publishes metrics which are periodic in nature like updating gauges
   299  	StatsCollectionInterval time.Duration
   300  
   301  	// DisableTaggedMetrics determines whether metrics will be displayed via a
   302  	// key/value/tag format, or simply a key/value format
   303  	DisableTaggedMetrics bool
   304  
   305  	// DisableDispatchedJobSummaryMetrics allows for ignore dispatched jobs when
   306  	// publishing Job summary metrics
   307  	DisableDispatchedJobSummaryMetrics bool
   308  
   309  	// BackwardsCompatibleMetrics determines whether to show methods of
   310  	// displaying metrics for older versions, or to only show the new format
   311  	BackwardsCompatibleMetrics bool
   312  
   313  	// AutopilotConfig is used to apply the initial autopilot config when
   314  	// bootstrapping.
   315  	AutopilotConfig *structs.AutopilotConfig
   316  
   317  	// ServerHealthInterval is the frequency with which the health of the
   318  	// servers in the cluster will be updated.
   319  	ServerHealthInterval time.Duration
   320  
   321  	// AutopilotInterval is the frequency with which the leader will perform
   322  	// autopilot tasks, such as promoting eligible non-voters and removing
   323  	// dead servers.
   324  	AutopilotInterval time.Duration
   325  
   326  	// DefaultSchedulerConfig configures the initial scheduler config to be persisted in Raft.
   327  	// Once the cluster is bootstrapped, and Raft persists the config (from here or through API),
   328  	// This value is ignored.
   329  	DefaultSchedulerConfig structs.SchedulerConfiguration `hcl:"default_scheduler_config"`
   330  
   331  	// PluginLoader is used to load plugins.
   332  	PluginLoader loader.PluginCatalog
   333  
   334  	// PluginSingletonLoader is a plugin loader that will returns singleton
   335  	// instances of the plugins.
   336  	PluginSingletonLoader loader.PluginCatalog
   337  
   338  	// RPCHandshakeTimeout is the deadline by which RPC handshakes must
   339  	// complete. The RPC handshake includes the first byte read as well as
   340  	// the TLS handshake and subsequent byte read if TLS is enabled.
   341  	//
   342  	// The deadline is reset after the first byte is read so when TLS is
   343  	// enabled RPC connections may take (timeout * 2) to complete.
   344  	//
   345  	// 0 means no timeout.
   346  	RPCHandshakeTimeout time.Duration
   347  
   348  	// RPCMaxConnsPerClient is the maximum number of concurrent RPC
   349  	// connections from a single IP address. nil/0 means no limit.
   350  	RPCMaxConnsPerClient int
   351  }
   352  
   353  // CheckVersion is used to check if the ProtocolVersion is valid
   354  func (c *Config) CheckVersion() error {
   355  	if c.ProtocolVersion < ProtocolVersionMin {
   356  		return fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]",
   357  			c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
   358  	} else if c.ProtocolVersion > ProtocolVersionMax {
   359  		return fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]",
   360  			c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
   361  	}
   362  	return nil
   363  }
   364  
   365  // DefaultConfig returns the default configuration. Only used as the basis for
   366  // merging agent or test parameters.
   367  func DefaultConfig() *Config {
   368  	hostname, err := os.Hostname()
   369  	if err != nil {
   370  		panic(err)
   371  	}
   372  
   373  	c := &Config{
   374  		Region:                           DefaultRegion,
   375  		AuthoritativeRegion:              DefaultRegion,
   376  		Datacenter:                       DefaultDC,
   377  		NodeName:                         hostname,
   378  		NodeID:                           uuid.Generate(),
   379  		ProtocolVersion:                  ProtocolVersionMax,
   380  		RaftConfig:                       raft.DefaultConfig(),
   381  		RaftTimeout:                      10 * time.Second,
   382  		LogOutput:                        os.Stderr,
   383  		RPCAddr:                          DefaultRPCAddr(),
   384  		SerfConfig:                       serf.DefaultConfig(),
   385  		NumSchedulers:                    1,
   386  		ReconcileInterval:                60 * time.Second,
   387  		EvalGCInterval:                   5 * time.Minute,
   388  		EvalGCThreshold:                  1 * time.Hour,
   389  		JobGCInterval:                    5 * time.Minute,
   390  		JobGCThreshold:                   4 * time.Hour,
   391  		NodeGCInterval:                   5 * time.Minute,
   392  		NodeGCThreshold:                  24 * time.Hour,
   393  		DeploymentGCInterval:             5 * time.Minute,
   394  		DeploymentGCThreshold:            1 * time.Hour,
   395  		CSIPluginGCInterval:              5 * time.Minute,
   396  		CSIPluginGCThreshold:             1 * time.Hour,
   397  		CSIVolumeClaimGCInterval:         5 * time.Minute,
   398  		CSIVolumeClaimGCThreshold:        1 * time.Hour,
   399  		EvalNackTimeout:                  60 * time.Second,
   400  		EvalDeliveryLimit:                3,
   401  		EvalNackInitialReenqueueDelay:    1 * time.Second,
   402  		EvalNackSubsequentReenqueueDelay: 20 * time.Second,
   403  		EvalFailedFollowupBaselineDelay:  1 * time.Minute,
   404  		EvalFailedFollowupDelayRange:     5 * time.Minute,
   405  		MinHeartbeatTTL:                  10 * time.Second,
   406  		MaxHeartbeatsPerSecond:           50.0,
   407  		HeartbeatGrace:                   10 * time.Second,
   408  		FailoverHeartbeatTTL:             300 * time.Second,
   409  		ConsulConfig:                     config.DefaultConsulConfig(),
   410  		VaultConfig:                      config.DefaultVaultConfig(),
   411  		RPCHoldTimeout:                   5 * time.Second,
   412  		StatsCollectionInterval:          1 * time.Minute,
   413  		TLSConfig:                        &config.TLSConfig{},
   414  		ReplicationBackoff:               30 * time.Second,
   415  		SentinelGCInterval:               30 * time.Second,
   416  		AutopilotConfig: &structs.AutopilotConfig{
   417  			CleanupDeadServers:      true,
   418  			LastContactThreshold:    200 * time.Millisecond,
   419  			MaxTrailingLogs:         250,
   420  			ServerStabilizationTime: 10 * time.Second,
   421  		},
   422  		ServerHealthInterval: 2 * time.Second,
   423  		AutopilotInterval:    10 * time.Second,
   424  		DefaultSchedulerConfig: structs.SchedulerConfiguration{
   425  			SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
   426  			PreemptionConfig: structs.PreemptionConfig{
   427  				SystemSchedulerEnabled:  true,
   428  				BatchSchedulerEnabled:   false,
   429  				ServiceSchedulerEnabled: false,
   430  			},
   431  		},
   432  	}
   433  
   434  	// Enable all known schedulers by default
   435  	c.EnabledSchedulers = make([]string, 0, len(scheduler.BuiltinSchedulers))
   436  	for name := range scheduler.BuiltinSchedulers {
   437  		c.EnabledSchedulers = append(c.EnabledSchedulers, name)
   438  	}
   439  	c.EnabledSchedulers = append(c.EnabledSchedulers, structs.JobTypeCore)
   440  
   441  	// Default the number of schedulers to match the cores
   442  	c.NumSchedulers = runtime.NumCPU()
   443  
   444  	// Increase our reap interval to 3 days instead of 24h.
   445  	c.SerfConfig.ReconnectTimeout = 3 * 24 * time.Hour
   446  
   447  	// Serf should use the WAN timing, since we are using it
   448  	// to communicate between DC's
   449  	c.SerfConfig.MemberlistConfig = memberlist.DefaultWANConfig()
   450  	c.SerfConfig.MemberlistConfig.BindPort = DefaultSerfPort
   451  
   452  	// Disable shutdown on removal
   453  	c.RaftConfig.ShutdownOnRemove = false
   454  
   455  	// Default to Raft v2, update to v3 to enable new Raft and autopilot features.
   456  	c.RaftConfig.ProtocolVersion = 2
   457  
   458  	return c
   459  }