github.com/hernad/nomad@v1.6.112/nomad/structs/operator.go

github.com/hernad/nomad@v1.6.112/nomad/structs/operator.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package structs
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/hashicorp/raft"
    11  )
    12  
    13  // RaftServer has information about a server in the Raft configuration.
    14  type RaftServer struct {
    15  	// ID is the unique ID for the server. These are currently the same
    16  	// as the address, but they will be changed to a real GUID in a future
    17  	// release of Nomad.
    18  	ID raft.ServerID
    19  
    20  	// Node is the node name of the server, as known by Nomad, or this
    21  	// will be set to "(unknown)" otherwise.
    22  	Node string
    23  
    24  	// Address is the IP:port of the server, used for Raft communications.
    25  	Address raft.ServerAddress
    26  
    27  	// Leader is true if this server is the current cluster leader.
    28  	Leader bool
    29  
    30  	// Voter is true if this server has a vote in the cluster. This might
    31  	// be false if the server is staging and still coming online, or if
    32  	// it's a non-voting server, which will be added in a future release of
    33  	// Nomad.
    34  	Voter bool
    35  
    36  	// RaftProtocol is the version of the Raft protocol spoken by this server.
    37  	RaftProtocol string
    38  }
    39  
    40  // RaftConfigurationResponse is returned when querying for the current Raft
    41  // configuration.
    42  type RaftConfigurationResponse struct {
    43  	// Servers has the list of servers in the Raft configuration.
    44  	Servers []*RaftServer
    45  
    46  	// Index has the Raft index of this configuration.
    47  	Index uint64
    48  }
    49  
    50  // RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft
    51  // operation on a specific Raft peer by address in the form of "IP:port".
    52  type RaftPeerByAddressRequest struct {
    53  	// Address is the peer to remove, in the form "IP:port".
    54  	Address raft.ServerAddress
    55  
    56  	// WriteRequest holds the Region for this request.
    57  	WriteRequest
    58  }
    59  
    60  // RaftPeerByIDRequest is used by the Operator endpoint to apply a Raft
    61  // operation on a specific Raft peer by ID.
    62  type RaftPeerByIDRequest struct {
    63  	// ID is the peer ID to remove.
    64  	ID raft.ServerID
    65  
    66  	// WriteRequest holds the Region for this request.
    67  	WriteRequest
    68  }
    69  
    70  // AutopilotSetConfigRequest is used by the Operator endpoint to update the
    71  // current Autopilot configuration of the cluster.
    72  type AutopilotSetConfigRequest struct {
    73  	// Datacenter is the target this request is intended for.
    74  	Datacenter string
    75  
    76  	// Config is the new Autopilot configuration to use.
    77  	Config AutopilotConfig
    78  
    79  	// CAS controls whether to use check-and-set semantics for this request.
    80  	CAS bool
    81  
    82  	// WriteRequest holds the ACL token to go along with this request.
    83  	WriteRequest
    84  }
    85  
    86  // RequestDatacenter returns the datacenter for a given request.
    87  func (op *AutopilotSetConfigRequest) RequestDatacenter() string {
    88  	return op.Datacenter
    89  }
    90  
    91  // AutopilotConfig is the internal config for the Autopilot mechanism.
    92  type AutopilotConfig struct {
    93  	// CleanupDeadServers controls whether to remove dead servers when a new
    94  	// server is added to the Raft peers.
    95  	CleanupDeadServers bool
    96  
    97  	// ServerStabilizationTime is the minimum amount of time a server must be
    98  	// in a stable, healthy state before it can be added to the cluster. Only
    99  	// applicable with Raft protocol version 3 or higher.
   100  	ServerStabilizationTime time.Duration
   101  
   102  	// LastContactThreshold is the limit on the amount of time a server can go
   103  	// without leader contact before being considered unhealthy.
   104  	LastContactThreshold time.Duration
   105  
   106  	// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
   107  	// be behind before being considered unhealthy.
   108  	MaxTrailingLogs uint64
   109  
   110  	// MinQuorum sets the minimum number of servers required in a cluster
   111  	// before autopilot can prune dead servers.
   112  	MinQuorum uint
   113  
   114  	// (Enterprise-only) EnableRedundancyZones specifies whether to enable redundancy zones.
   115  	EnableRedundancyZones bool
   116  
   117  	// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
   118  	// strategy of waiting until enough newer-versioned servers have been added to the
   119  	// cluster before promoting them to voters.
   120  	DisableUpgradeMigration bool
   121  
   122  	// (Enterprise-only) EnableCustomUpgrades specifies whether to enable using custom
   123  	// upgrade versions when performing migrations.
   124  	EnableCustomUpgrades bool
   125  
   126  	// CreateIndex/ModifyIndex store the create/modify indexes of this configuration.
   127  	CreateIndex uint64
   128  	ModifyIndex uint64
   129  }
   130  
   131  func (a *AutopilotConfig) Copy() *AutopilotConfig {
   132  	if a == nil {
   133  		return nil
   134  	}
   135  
   136  	na := *a
   137  	return &na
   138  }
   139  
   140  // SchedulerAlgorithm is an enum string that encapsulates the valid options for a
   141  // SchedulerConfiguration block's SchedulerAlgorithm. These modes will allow the
   142  // scheduler to be user-selectable.
   143  type SchedulerAlgorithm string
   144  
   145  const (
   146  	// SchedulerAlgorithmBinpack indicates that the scheduler should spread
   147  	// allocations as evenly as possible over the available hardware.
   148  	SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack"
   149  
   150  	// SchedulerAlgorithmSpread indicates that the scheduler should spread
   151  	// allocations as evenly as possible over the available hardware.
   152  	SchedulerAlgorithmSpread SchedulerAlgorithm = "spread"
   153  )
   154  
   155  // SchedulerConfiguration is the config for controlling scheduler behavior
   156  type SchedulerConfiguration struct {
   157  	// SchedulerAlgorithm lets you select between available scheduling algorithms.
   158  	SchedulerAlgorithm SchedulerAlgorithm `hcl:"scheduler_algorithm"`
   159  
   160  	// PreemptionConfig specifies whether to enable eviction of lower
   161  	// priority jobs to place higher priority jobs.
   162  	PreemptionConfig PreemptionConfig `hcl:"preemption_config"`
   163  
   164  	// MemoryOversubscriptionEnabled specifies whether memory oversubscription is enabled
   165  	MemoryOversubscriptionEnabled bool `hcl:"memory_oversubscription_enabled"`
   166  
   167  	// RejectJobRegistration disables new job registrations except with a
   168  	// management ACL token
   169  	RejectJobRegistration bool `hcl:"reject_job_registration"`
   170  
   171  	// PauseEvalBroker is a boolean to control whether the evaluation broker
   172  	// should be paused on the cluster leader. Only a single broker runs per
   173  	// region, and it must be persisted to state so the parameter is consistent
   174  	// during leadership transitions.
   175  	PauseEvalBroker bool `hcl:"pause_eval_broker"`
   176  
   177  	// CreateIndex/ModifyIndex store the create/modify indexes of this configuration.
   178  	CreateIndex uint64
   179  	ModifyIndex uint64
   180  }
   181  
   182  func (s *SchedulerConfiguration) Copy() *SchedulerConfiguration {
   183  	if s == nil {
   184  		return s
   185  	}
   186  
   187  	ns := *s
   188  	return &ns
   189  }
   190  
   191  func (s *SchedulerConfiguration) EffectiveSchedulerAlgorithm() SchedulerAlgorithm {
   192  	if s == nil || s.SchedulerAlgorithm == "" {
   193  		return SchedulerAlgorithmBinpack
   194  	}
   195  
   196  	return s.SchedulerAlgorithm
   197  }
   198  
   199  // WithNodePool returns a new SchedulerConfiguration with the node pool
   200  // scheduler configuration applied.
   201  func (s *SchedulerConfiguration) WithNodePool(pool *NodePool) *SchedulerConfiguration {
   202  	schedConfig := s.Copy()
   203  
   204  	if pool == nil || pool.SchedulerConfiguration == nil {
   205  		return schedConfig
   206  	}
   207  
   208  	poolConfig := pool.SchedulerConfiguration
   209  	if poolConfig.SchedulerAlgorithm != "" {
   210  		schedConfig.SchedulerAlgorithm = poolConfig.SchedulerAlgorithm
   211  	}
   212  	if poolConfig.MemoryOversubscriptionEnabled != nil {
   213  		schedConfig.MemoryOversubscriptionEnabled = *poolConfig.MemoryOversubscriptionEnabled
   214  	}
   215  
   216  	return schedConfig
   217  }
   218  
   219  func (s *SchedulerConfiguration) Canonicalize() {
   220  	if s != nil && s.SchedulerAlgorithm == "" {
   221  		s.SchedulerAlgorithm = SchedulerAlgorithmBinpack
   222  	}
   223  }
   224  
   225  func (s *SchedulerConfiguration) Validate() error {
   226  	if s == nil {
   227  		return nil
   228  	}
   229  
   230  	switch s.SchedulerAlgorithm {
   231  	case "", SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread:
   232  	default:
   233  		return fmt.Errorf("invalid scheduler algorithm: %v", s.SchedulerAlgorithm)
   234  	}
   235  
   236  	return nil
   237  }
   238  
   239  // SchedulerConfigurationResponse is the response object that wraps SchedulerConfiguration
   240  type SchedulerConfigurationResponse struct {
   241  	// SchedulerConfig contains scheduler config options
   242  	SchedulerConfig *SchedulerConfiguration
   243  
   244  	QueryMeta
   245  }
   246  
   247  // SchedulerSetConfigurationResponse is the response object used
   248  // when updating scheduler configuration
   249  type SchedulerSetConfigurationResponse struct {
   250  	// Updated returns whether the config was actually updated
   251  	// Only set when the request uses CAS
   252  	Updated bool
   253  
   254  	WriteMeta
   255  }
   256  
   257  // PreemptionConfig specifies whether preemption is enabled based on scheduler type
   258  type PreemptionConfig struct {
   259  	// SystemSchedulerEnabled specifies if preemption is enabled for system jobs
   260  	SystemSchedulerEnabled bool `hcl:"system_scheduler_enabled"`
   261  
   262  	// SysBatchSchedulerEnabled specifies if preemption is enabled for sysbatch jobs
   263  	SysBatchSchedulerEnabled bool `hcl:"sysbatch_scheduler_enabled"`
   264  
   265  	// BatchSchedulerEnabled specifies if preemption is enabled for batch jobs
   266  	BatchSchedulerEnabled bool `hcl:"batch_scheduler_enabled"`
   267  
   268  	// ServiceSchedulerEnabled specifies if preemption is enabled for service jobs
   269  	ServiceSchedulerEnabled bool `hcl:"service_scheduler_enabled"`
   270  }
   271  
   272  // SchedulerSetConfigRequest is used by the Operator endpoint to update the
   273  // current Scheduler configuration of the cluster.
   274  type SchedulerSetConfigRequest struct {
   275  	// Config is the new Scheduler configuration to use.
   276  	Config SchedulerConfiguration
   277  
   278  	// CAS controls whether to use check-and-set semantics for this request.
   279  	CAS bool
   280  
   281  	// WriteRequest holds the ACL token to go along with this request.
   282  	WriteRequest
   283  }
   284  
   285  // SnapshotSaveRequest is used by the Operator endpoint to get a Raft snapshot
   286  type SnapshotSaveRequest struct {
   287  	QueryOptions
   288  }
   289  
   290  // SnapshotSaveResponse is the header for the streaming snapshot endpoint,
   291  // and followed by the snapshot file content.
   292  type SnapshotSaveResponse struct {
   293  
   294  	// SnapshotChecksum returns the checksum of snapshot file in the format
   295  	// `<algo>=<base64>` (e.g. `sha-256=...`)
   296  	SnapshotChecksum string
   297  
   298  	// ErrorCode is an http error code if an error is found, e.g. 403 for permission errors
   299  	ErrorCode int `codec:",omitempty"`
   300  
   301  	// ErrorMsg is the error message if an error is found, e.g. "Permission Denied"
   302  	ErrorMsg string `codec:",omitempty"`
   303  
   304  	QueryMeta
   305  }
   306  
   307  type SnapshotRestoreRequest struct {
   308  	WriteRequest
   309  }
   310  
   311  type SnapshotRestoreResponse struct {
   312  	ErrorCode int    `codec:",omitempty"`
   313  	ErrorMsg  string `codec:",omitempty"`
   314  
   315  	QueryMeta
   316  }