github.com/hashicorp/nomad/api@v0.0.0-20240306165712-3193ac204f65/operator_autopilot.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package api 5 6 import ( 7 "encoding/json" 8 "strconv" 9 "time" 10 ) 11 12 // AutopilotConfiguration is used for querying/setting the Autopilot configuration. 13 // Autopilot helps manage operator tasks related to Nomad servers like removing 14 // failed servers from the Raft quorum. 15 type AutopilotConfiguration struct { 16 // CleanupDeadServers controls whether to remove dead servers from the Raft 17 // peer list when a new server joins 18 CleanupDeadServers bool 19 20 // LastContactThreshold is the limit on the amount of time a server can go 21 // without leader contact before being considered unhealthy. 22 LastContactThreshold time.Duration 23 24 // MaxTrailingLogs is the amount of entries in the Raft Log that a server can 25 // be behind before being considered unhealthy. 26 MaxTrailingLogs uint64 27 28 // MinQuorum sets the minimum number of servers allowed in a cluster before 29 // autopilot can prune dead servers. 30 MinQuorum uint 31 32 // ServerStabilizationTime is the minimum amount of time a server must be 33 // in a stable, healthy state before it can be added to the cluster. Only 34 // applicable with Raft protocol version 3 or higher. 35 ServerStabilizationTime time.Duration 36 37 // (Enterprise-only) EnableRedundancyZones specifies whether to enable redundancy zones. 38 EnableRedundancyZones bool 39 40 // (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration 41 // strategy of waiting until enough newer-versioned servers have been added to the 42 // cluster before promoting them to voters. 43 DisableUpgradeMigration bool 44 45 // (Enterprise-only) EnableCustomUpgrades specifies whether to enable using custom 46 // upgrade versions when performing migrations. 47 EnableCustomUpgrades bool 48 49 // CreateIndex holds the index corresponding the creation of this configuration. 50 // This is a read-only field. 51 CreateIndex uint64 52 53 // ModifyIndex will be set to the index of the last update when retrieving the 54 // Autopilot configuration. Resubmitting a configuration with 55 // AutopilotCASConfiguration will perform a check-and-set operation which ensures 56 // there hasn't been a subsequent update since the configuration was retrieved. 57 ModifyIndex uint64 58 } 59 60 func (u *AutopilotConfiguration) MarshalJSON() ([]byte, error) { 61 type Alias AutopilotConfiguration 62 return json.Marshal(&struct { 63 LastContactThreshold string 64 ServerStabilizationTime string 65 *Alias 66 }{ 67 LastContactThreshold: u.LastContactThreshold.String(), 68 ServerStabilizationTime: u.ServerStabilizationTime.String(), 69 Alias: (*Alias)(u), 70 }) 71 } 72 73 func (u *AutopilotConfiguration) UnmarshalJSON(data []byte) error { 74 type Alias AutopilotConfiguration 75 aux := &struct { 76 LastContactThreshold string 77 ServerStabilizationTime string 78 *Alias 79 }{ 80 Alias: (*Alias)(u), 81 } 82 if err := json.Unmarshal(data, &aux); err != nil { 83 return err 84 } 85 var err error 86 if aux.LastContactThreshold != "" { 87 if u.LastContactThreshold, err = time.ParseDuration(aux.LastContactThreshold); err != nil { 88 return err 89 } 90 } 91 if aux.ServerStabilizationTime != "" { 92 if u.ServerStabilizationTime, err = time.ParseDuration(aux.ServerStabilizationTime); err != nil { 93 return err 94 } 95 } 96 return nil 97 } 98 99 // ServerHealth is the health (from the leader's point of view) of a server. 100 type ServerHealth struct { 101 // ID is the raft ID of the server. 102 ID string 103 104 // Name is the node name of the server. 105 Name string 106 107 // Address is the address of the server. 108 Address string 109 110 // The status of the SerfHealth check for the server. 111 SerfStatus string 112 113 // Version is the Nomad version of the server. 114 Version string 115 116 // Leader is whether this server is currently the leader. 117 Leader bool 118 119 // LastContact is the time since this node's last contact with the leader. 120 LastContact time.Duration 121 122 // LastTerm is the highest leader term this server has a record of in its Raft log. 123 LastTerm uint64 124 125 // LastIndex is the last log index this server has a record of in its Raft log. 126 LastIndex uint64 127 128 // Healthy is whether or not the server is healthy according to the current 129 // Autopilot config. 130 Healthy bool 131 132 // Voter is whether this is a voting server. 133 Voter bool 134 135 // StableSince is the last time this server's Healthy value changed. 136 StableSince time.Time 137 } 138 139 func (u *ServerHealth) MarshalJSON() ([]byte, error) { 140 type Alias ServerHealth 141 return json.Marshal(&struct { 142 LastContact string 143 *Alias 144 }{ 145 LastContact: u.LastContact.String(), 146 Alias: (*Alias)(u), 147 }) 148 } 149 150 func (u *ServerHealth) UnmarshalJSON(data []byte) error { 151 type Alias ServerHealth 152 aux := &struct { 153 LastContact string 154 *Alias 155 }{ 156 Alias: (*Alias)(u), 157 } 158 if err := json.Unmarshal(data, &aux); err != nil { 159 return err 160 } 161 var err error 162 if aux.LastContact != "" { 163 if u.LastContact, err = time.ParseDuration(aux.LastContact); err != nil { 164 return err 165 } 166 } 167 return nil 168 } 169 170 // OperatorHealthReply is a representation of the overall health of the cluster 171 type OperatorHealthReply struct { 172 // Healthy is true if all the servers in the cluster are healthy. 173 Healthy bool 174 175 // FailureTolerance is the number of healthy servers that could be lost without 176 // an outage occurring. 177 FailureTolerance int 178 179 // Servers holds the health of each server. 180 Servers []ServerHealth 181 } 182 183 // AutopilotGetConfiguration is used to query the current Autopilot configuration. 184 func (op *Operator) AutopilotGetConfiguration(q *QueryOptions) (*AutopilotConfiguration, *QueryMeta, error) { 185 var resp AutopilotConfiguration 186 qm, err := op.c.query("/v1/operator/autopilot/configuration", &resp, q) 187 if err != nil { 188 return nil, nil, err 189 } 190 return &resp, qm, nil 191 } 192 193 // AutopilotSetConfiguration is used to set the current Autopilot configuration. 194 func (op *Operator) AutopilotSetConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (*WriteMeta, error) { 195 var out bool 196 wm, err := op.c.put("/v1/operator/autopilot/configuration", conf, &out, q) 197 if err != nil { 198 return nil, err 199 } 200 return wm, nil 201 } 202 203 // AutopilotCASConfiguration is used to perform a Check-And-Set update on the 204 // Autopilot configuration. The ModifyIndex value will be respected. Returns 205 // true on success or false on failures. 206 func (op *Operator) AutopilotCASConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (bool, *WriteMeta, error) { 207 var out bool 208 wm, err := op.c.put("/v1/operator/autopilot/configuration?cas="+strconv.FormatUint(conf.ModifyIndex, 10), conf, &out, q) 209 if err != nil { 210 return false, nil, err 211 } 212 213 return out, wm, nil 214 } 215 216 // AutopilotServerHealth is used to query Autopilot's top-level view of the health 217 // of each Nomad server. 218 func (op *Operator) AutopilotServerHealth(q *QueryOptions) (*OperatorHealthReply, *QueryMeta, error) { 219 var out OperatorHealthReply 220 qm, err := op.c.query("/v1/operator/autopilot/health", &out, q) 221 if err != nil { 222 return nil, nil, err 223 } 224 return &out, qm, nil 225 }