github.com/emate/nomad@v0.8.2-wo-binpacking/api/operator_autopilot.go (about)

     1  package api
     2  
     3  import (
     4  	"encoding/json"
     5  	"strconv"
     6  	"time"
     7  )
     8  
     9  // AutopilotConfiguration is used for querying/setting the Autopilot configuration.
    10  // Autopilot helps manage operator tasks related to Nomad servers like removing
    11  // failed servers from the Raft quorum.
    12  type AutopilotConfiguration struct {
    13  	// CleanupDeadServers controls whether to remove dead servers from the Raft
    14  	// peer list when a new server joins
    15  	CleanupDeadServers bool
    16  
    17  	// LastContactThreshold is the limit on the amount of time a server can go
    18  	// without leader contact before being considered unhealthy.
    19  	LastContactThreshold time.Duration
    20  
    21  	// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
    22  	// be behind before being considered unhealthy.
    23  	MaxTrailingLogs uint64
    24  
    25  	// ServerStabilizationTime is the minimum amount of time a server must be
    26  	// in a stable, healthy state before it can be added to the cluster. Only
    27  	// applicable with Raft protocol version 3 or higher.
    28  	ServerStabilizationTime time.Duration
    29  
    30  	// (Enterprise-only) EnableRedundancyZones specifies whether to enable redundancy zones.
    31  	EnableRedundancyZones bool
    32  
    33  	// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
    34  	// strategy of waiting until enough newer-versioned servers have been added to the
    35  	// cluster before promoting them to voters.
    36  	DisableUpgradeMigration bool
    37  
    38  	// (Enterprise-only) EnableCustomUpgrades specifies whether to enable using custom
    39  	// upgrade versions when performing migrations.
    40  	EnableCustomUpgrades bool
    41  
    42  	// CreateIndex holds the index corresponding the creation of this configuration.
    43  	// This is a read-only field.
    44  	CreateIndex uint64
    45  
    46  	// ModifyIndex will be set to the index of the last update when retrieving the
    47  	// Autopilot configuration. Resubmitting a configuration with
    48  	// AutopilotCASConfiguration will perform a check-and-set operation which ensures
    49  	// there hasn't been a subsequent update since the configuration was retrieved.
    50  	ModifyIndex uint64
    51  }
    52  
    53  func (u *AutopilotConfiguration) MarshalJSON() ([]byte, error) {
    54  	type Alias AutopilotConfiguration
    55  	return json.Marshal(&struct {
    56  		LastContactThreshold    string
    57  		ServerStabilizationTime string
    58  		*Alias
    59  	}{
    60  		LastContactThreshold:    u.LastContactThreshold.String(),
    61  		ServerStabilizationTime: u.ServerStabilizationTime.String(),
    62  		Alias: (*Alias)(u),
    63  	})
    64  }
    65  
    66  func (u *AutopilotConfiguration) UnmarshalJSON(data []byte) error {
    67  	type Alias AutopilotConfiguration
    68  	aux := &struct {
    69  		LastContactThreshold    string
    70  		ServerStabilizationTime string
    71  		*Alias
    72  	}{
    73  		Alias: (*Alias)(u),
    74  	}
    75  	if err := json.Unmarshal(data, &aux); err != nil {
    76  		return err
    77  	}
    78  	var err error
    79  	if aux.LastContactThreshold != "" {
    80  		if u.LastContactThreshold, err = time.ParseDuration(aux.LastContactThreshold); err != nil {
    81  			return err
    82  		}
    83  	}
    84  	if aux.ServerStabilizationTime != "" {
    85  		if u.ServerStabilizationTime, err = time.ParseDuration(aux.ServerStabilizationTime); err != nil {
    86  			return err
    87  		}
    88  	}
    89  	return nil
    90  }
    91  
    92  // ServerHealth is the health (from the leader's point of view) of a server.
    93  type ServerHealth struct {
    94  	// ID is the raft ID of the server.
    95  	ID string
    96  
    97  	// Name is the node name of the server.
    98  	Name string
    99  
   100  	// Address is the address of the server.
   101  	Address string
   102  
   103  	// The status of the SerfHealth check for the server.
   104  	SerfStatus string
   105  
   106  	// Version is the Nomad version of the server.
   107  	Version string
   108  
   109  	// Leader is whether this server is currently the leader.
   110  	Leader bool
   111  
   112  	// LastContact is the time since this node's last contact with the leader.
   113  	LastContact time.Duration
   114  
   115  	// LastTerm is the highest leader term this server has a record of in its Raft log.
   116  	LastTerm uint64
   117  
   118  	// LastIndex is the last log index this server has a record of in its Raft log.
   119  	LastIndex uint64
   120  
   121  	// Healthy is whether or not the server is healthy according to the current
   122  	// Autopilot config.
   123  	Healthy bool
   124  
   125  	// Voter is whether this is a voting server.
   126  	Voter bool
   127  
   128  	// StableSince is the last time this server's Healthy value changed.
   129  	StableSince time.Time
   130  }
   131  
   132  func (u *ServerHealth) MarshalJSON() ([]byte, error) {
   133  	type Alias ServerHealth
   134  	return json.Marshal(&struct {
   135  		LastContact string
   136  		*Alias
   137  	}{
   138  		LastContact: u.LastContact.String(),
   139  		Alias:       (*Alias)(u),
   140  	})
   141  }
   142  
   143  func (u *ServerHealth) UnmarshalJSON(data []byte) error {
   144  	type Alias ServerHealth
   145  	aux := &struct {
   146  		LastContact string
   147  		*Alias
   148  	}{
   149  		Alias: (*Alias)(u),
   150  	}
   151  	if err := json.Unmarshal(data, &aux); err != nil {
   152  		return err
   153  	}
   154  	var err error
   155  	if aux.LastContact != "" {
   156  		if u.LastContact, err = time.ParseDuration(aux.LastContact); err != nil {
   157  			return err
   158  		}
   159  	}
   160  	return nil
   161  }
   162  
   163  // OperatorHealthReply is a representation of the overall health of the cluster
   164  type OperatorHealthReply struct {
   165  	// Healthy is true if all the servers in the cluster are healthy.
   166  	Healthy bool
   167  
   168  	// FailureTolerance is the number of healthy servers that could be lost without
   169  	// an outage occurring.
   170  	FailureTolerance int
   171  
   172  	// Servers holds the health of each server.
   173  	Servers []ServerHealth
   174  }
   175  
   176  // AutopilotGetConfiguration is used to query the current Autopilot configuration.
   177  func (op *Operator) AutopilotGetConfiguration(q *QueryOptions) (*AutopilotConfiguration, *QueryMeta, error) {
   178  	var resp AutopilotConfiguration
   179  	qm, err := op.c.query("/v1/operator/autopilot/configuration", &resp, q)
   180  	if err != nil {
   181  		return nil, nil, err
   182  	}
   183  	return &resp, qm, nil
   184  }
   185  
   186  // AutopilotSetConfiguration is used to set the current Autopilot configuration.
   187  func (op *Operator) AutopilotSetConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (*WriteMeta, error) {
   188  	var out bool
   189  	wm, err := op.c.write("/v1/operator/autopilot/configuration", conf, &out, q)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  	return wm, nil
   194  }
   195  
   196  // AutopilotCASConfiguration is used to perform a Check-And-Set update on the
   197  // Autopilot configuration. The ModifyIndex value will be respected. Returns
   198  // true on success or false on failures.
   199  func (op *Operator) AutopilotCASConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (bool, *WriteMeta, error) {
   200  	var out bool
   201  	wm, err := op.c.write("/v1/operator/autopilot/configuration?cas="+strconv.FormatUint(conf.ModifyIndex, 10), conf, &out, q)
   202  	if err != nil {
   203  		return false, nil, err
   204  	}
   205  
   206  	return out, wm, nil
   207  }
   208  
   209  // AutopilotServerHealth is used to query Autopilot's top-level view of the health
   210  // of each Nomad server.
   211  func (op *Operator) AutopilotServerHealth(q *QueryOptions) (*OperatorHealthReply, *QueryMeta, error) {
   212  	var out OperatorHealthReply
   213  	qm, err := op.c.query("/v1/operator/autopilot/health", &out, q)
   214  	if err != nil {
   215  		return nil, nil, err
   216  	}
   217  	return &out, qm, nil
   218  }