github.com/hashicorp/nomad/api@v0.0.0-20240306165712-3193ac204f65/operator.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package api 5 6 import ( 7 "encoding/json" 8 "errors" 9 "io" 10 "net/http" 11 "strconv" 12 "strings" 13 "time" 14 ) 15 16 // Operator can be used to perform low-level operator tasks for Nomad. 17 type Operator struct { 18 c *Client 19 } 20 21 // Operator returns a handle to the operator endpoints. 22 func (c *Client) Operator() *Operator { 23 return &Operator{c} 24 } 25 26 // RaftServer has information about a server in the Raft configuration. 27 type RaftServer struct { 28 // ID is the unique ID for the server. These are currently the same 29 // as the address, but they will be changed to a real GUID in a future 30 // release of Nomad. 31 ID string 32 33 // Node is the node name of the server, as known by Nomad, or this 34 // will be set to "(unknown)" otherwise. 35 Node string 36 37 // Address is the IP:port of the server, used for Raft communications. 38 Address string 39 40 // Leader is true if this server is the current cluster leader. 41 Leader bool 42 43 // Voter is true if this server has a vote in the cluster. This might 44 // be false if the server is staging and still coming online, or if 45 // it's a non-voting server, which will be added in a future release of 46 // Nomad. 47 Voter bool 48 49 // RaftProtocol is the version of the Raft protocol spoken by this server. 50 RaftProtocol string 51 } 52 53 // RaftConfiguration is returned when querying for the current Raft configuration. 54 type RaftConfiguration struct { 55 // Servers has the list of servers in the Raft configuration. 56 Servers []*RaftServer 57 58 // Index has the Raft index of this configuration. 59 Index uint64 60 } 61 62 // RaftGetConfiguration is used to query the current Raft peer set. 63 func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, error) { 64 r, err := op.c.newRequest("GET", "/v1/operator/raft/configuration") 65 if err != nil { 66 return nil, err 67 } 68 r.setQueryOptions(q) 69 _, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose 70 if err != nil { 71 return nil, err 72 } 73 defer resp.Body.Close() 74 75 var out RaftConfiguration 76 if err := decodeBody(resp, &out); err != nil { 77 return nil, err 78 } 79 return &out, nil 80 } 81 82 // RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft 83 // quorum but no longer known to Serf or the catalog) by address in the form of 84 // "IP:port". 85 func (op *Operator) RaftRemovePeerByAddress(address string, q *WriteOptions) error { 86 r, err := op.c.newRequest("DELETE", "/v1/operator/raft/peer") 87 if err != nil { 88 return err 89 } 90 r.setWriteOptions(q) 91 92 r.params.Set("address", address) 93 94 _, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose 95 if err != nil { 96 return err 97 } 98 99 resp.Body.Close() 100 return nil 101 } 102 103 // RaftRemovePeerByID is used to kick a stale peer (one that is in the Raft 104 // quorum but no longer known to Serf or the catalog) by ID. 105 func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error { 106 r, err := op.c.newRequest("DELETE", "/v1/operator/raft/peer") 107 if err != nil { 108 return err 109 } 110 r.setWriteOptions(q) 111 112 r.params.Set("id", id) 113 114 _, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose 115 if err != nil { 116 return err 117 } 118 119 resp.Body.Close() 120 return nil 121 } 122 123 // RaftTransferLeadershipByAddress is used to transfer leadership to a 124 // different peer using its address in the form of "IP:port". 125 func (op *Operator) RaftTransferLeadershipByAddress(address string, q *WriteOptions) error { 126 r, err := op.c.newRequest("PUT", "/v1/operator/raft/transfer-leadership") 127 if err != nil { 128 return err 129 } 130 r.setWriteOptions(q) 131 132 r.params.Set("address", address) 133 134 _, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose 135 if err != nil { 136 return err 137 } 138 139 resp.Body.Close() 140 return nil 141 } 142 143 // RaftTransferLeadershipByID is used to transfer leadership to a 144 // different peer using its Raft ID. 145 func (op *Operator) RaftTransferLeadershipByID(id string, q *WriteOptions) error { 146 r, err := op.c.newRequest("PUT", "/v1/operator/raft/transfer-leadership") 147 if err != nil { 148 return err 149 } 150 r.setWriteOptions(q) 151 152 r.params.Set("id", id) 153 154 _, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose 155 if err != nil { 156 return err 157 } 158 159 resp.Body.Close() 160 return nil 161 } 162 163 // SchedulerConfiguration is the config for controlling scheduler behavior 164 type SchedulerConfiguration struct { 165 // SchedulerAlgorithm lets you select between available scheduling algorithms. 166 SchedulerAlgorithm SchedulerAlgorithm 167 168 // PreemptionConfig specifies whether to enable eviction of lower 169 // priority jobs to place higher priority jobs. 170 PreemptionConfig PreemptionConfig 171 172 // MemoryOversubscriptionEnabled specifies whether memory oversubscription is enabled 173 MemoryOversubscriptionEnabled bool 174 175 // RejectJobRegistration disables new job registrations except with a 176 // management ACL token 177 RejectJobRegistration bool 178 179 // PauseEvalBroker stops the leader evaluation broker process from running 180 // until the configuration is updated and written to the Nomad servers. 181 PauseEvalBroker bool 182 183 // CreateIndex/ModifyIndex store the create/modify indexes of this configuration. 184 CreateIndex uint64 185 ModifyIndex uint64 186 } 187 188 // SchedulerConfigurationResponse is the response object that wraps SchedulerConfiguration 189 type SchedulerConfigurationResponse struct { 190 // SchedulerConfig contains scheduler config options 191 SchedulerConfig *SchedulerConfiguration 192 193 QueryMeta 194 } 195 196 // SchedulerSetConfigurationResponse is the response object used 197 // when updating scheduler configuration 198 type SchedulerSetConfigurationResponse struct { 199 // Updated returns whether the config was actually updated 200 // Only set when the request uses CAS 201 Updated bool 202 203 WriteMeta 204 } 205 206 // SchedulerAlgorithm is an enum string that encapsulates the valid options for a 207 // SchedulerConfiguration block's SchedulerAlgorithm. These modes will allow the 208 // scheduler to be user-selectable. 209 type SchedulerAlgorithm string 210 211 const ( 212 SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack" 213 SchedulerAlgorithmSpread SchedulerAlgorithm = "spread" 214 ) 215 216 // PreemptionConfig specifies whether preemption is enabled based on scheduler type 217 type PreemptionConfig struct { 218 SystemSchedulerEnabled bool 219 SysBatchSchedulerEnabled bool 220 BatchSchedulerEnabled bool 221 ServiceSchedulerEnabled bool 222 } 223 224 // SchedulerGetConfiguration is used to query the current Scheduler configuration. 225 func (op *Operator) SchedulerGetConfiguration(q *QueryOptions) (*SchedulerConfigurationResponse, *QueryMeta, error) { 226 var resp SchedulerConfigurationResponse 227 qm, err := op.c.query("/v1/operator/scheduler/configuration", &resp, q) 228 if err != nil { 229 return nil, nil, err 230 } 231 return &resp, qm, nil 232 } 233 234 // SchedulerSetConfiguration is used to set the current Scheduler configuration. 235 func (op *Operator) SchedulerSetConfiguration(conf *SchedulerConfiguration, q *WriteOptions) (*SchedulerSetConfigurationResponse, *WriteMeta, error) { 236 var out SchedulerSetConfigurationResponse 237 wm, err := op.c.put("/v1/operator/scheduler/configuration", conf, &out, q) 238 if err != nil { 239 return nil, nil, err 240 } 241 return &out, wm, nil 242 } 243 244 // SchedulerCASConfiguration is used to perform a Check-And-Set update on the 245 // Scheduler configuration. The ModifyIndex value will be respected. Returns 246 // true on success or false on failures. 247 func (op *Operator) SchedulerCASConfiguration(conf *SchedulerConfiguration, q *WriteOptions) (*SchedulerSetConfigurationResponse, *WriteMeta, error) { 248 var out SchedulerSetConfigurationResponse 249 wm, err := op.c.put("/v1/operator/scheduler/configuration?cas="+strconv.FormatUint(conf.ModifyIndex, 10), conf, &out, q) 250 if err != nil { 251 return nil, nil, err 252 } 253 254 return &out, wm, nil 255 } 256 257 // Snapshot is used to capture a snapshot state of a running cluster. 258 // The returned reader that must be consumed fully 259 func (op *Operator) Snapshot(q *QueryOptions) (io.ReadCloser, error) { 260 r, err := op.c.newRequest("GET", "/v1/operator/snapshot") 261 if err != nil { 262 return nil, err 263 } 264 r.setQueryOptions(q) 265 _, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose 266 if err != nil { 267 return nil, err 268 } 269 270 digest := resp.Header.Get("Digest") 271 272 cr, err := newChecksumValidatingReader(resp.Body, digest) 273 if err != nil { 274 io.Copy(io.Discard, resp.Body) 275 resp.Body.Close() 276 return nil, err 277 } 278 279 return cr, nil 280 } 281 282 // SnapshotRestore is used to restore a running nomad cluster to an original 283 // state. 284 func (op *Operator) SnapshotRestore(in io.Reader, q *WriteOptions) (*WriteMeta, error) { 285 wm, err := op.c.put("/v1/operator/snapshot", in, nil, q) 286 if err != nil { 287 return nil, err 288 } 289 290 return wm, nil 291 } 292 293 type License struct { 294 // The unique identifier of the license 295 LicenseID string 296 297 // The customer ID associated with the license 298 CustomerID string 299 300 // If set, an identifier that should be used to lock the license to a 301 // particular site, cluster, etc. 302 InstallationID string 303 304 // The time at which the license was issued 305 IssueTime time.Time 306 307 // The time at which the license starts being valid 308 StartTime time.Time 309 310 // The time after which the license expires 311 ExpirationTime time.Time 312 313 // The time at which the license ceases to function and can 314 // no longer be used in any capacity 315 TerminationTime time.Time 316 317 // The product the license is valid for 318 Product string 319 320 // License Specific Flags 321 Flags map[string]interface{} 322 323 // Modules is a list of the licensed enterprise modules 324 Modules []string 325 326 // List of features enabled by the license 327 Features []string 328 } 329 330 type LicenseReply struct { 331 License *License 332 ConfigOutdated bool 333 QueryMeta 334 } 335 336 type ApplyLicenseOptions struct { 337 Force bool 338 } 339 340 func (op *Operator) LicensePut(license string, q *WriteOptions) (*WriteMeta, error) { 341 return op.ApplyLicense(license, nil, q) 342 } 343 344 func (op *Operator) ApplyLicense(license string, opts *ApplyLicenseOptions, q *WriteOptions) (*WriteMeta, error) { 345 r, err := op.c.newRequest("PUT", "/v1/operator/license") 346 if err != nil { 347 return nil, err 348 } 349 350 if opts != nil && opts.Force { 351 r.params.Add("force", "true") 352 } 353 354 r.setWriteOptions(q) 355 r.body = strings.NewReader(license) 356 357 rtt, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose 358 if err != nil { 359 return nil, err 360 } 361 defer resp.Body.Close() 362 363 wm := &WriteMeta{RequestTime: rtt} 364 parseWriteMeta(resp, wm) 365 366 return wm, nil 367 } 368 369 func (op *Operator) LicenseGet(q *QueryOptions) (*LicenseReply, *QueryMeta, error) { 370 req, err := op.c.newRequest("GET", "/v1/operator/license") 371 if err != nil { 372 return nil, nil, err 373 } 374 req.setQueryOptions(q) 375 376 var reply LicenseReply 377 rtt, resp, err := op.c.doRequest(req) //nolint:bodyclose 378 if err != nil { 379 return nil, nil, err 380 } 381 defer resp.Body.Close() 382 383 if resp.StatusCode == http.StatusNoContent { 384 return nil, nil, errors.New("Nomad Enterprise only endpoint") 385 } 386 387 if resp.StatusCode != http.StatusOK { 388 return nil, nil, newUnexpectedResponseError( 389 fromHTTPResponse(resp), 390 withExpectedStatuses([]int{http.StatusOK, http.StatusNoContent}), 391 ) 392 } 393 394 err = json.NewDecoder(resp.Body).Decode(&reply) 395 if err != nil { 396 return nil, nil, err 397 } 398 399 qm := &QueryMeta{} 400 parseQueryMeta(resp, qm) 401 qm.RequestTime = rtt 402 403 return &reply, qm, nil 404 } 405 406 type LeadershipTransferResponse struct { 407 From RaftServer 408 To RaftServer 409 Noop bool 410 Err error 411 412 WriteMeta 413 } 414 415 // VaultWorkloadIdentityUpgradeCheck is the result of verifying if the cluster 416 // is ready to switch to workload identities for Vault. 417 type VaultWorkloadIdentityUpgradeCheck struct { 418 // JobsWithoutVaultIdentity is the list of jobs that have a `vault` block 419 // but do not have an `identity` for Vault. 420 JobsWithoutVaultIdentity []*JobListStub 421 422 // OutdatedNodes is the list of nodes running a version of Nomad that does 423 // not support workload identities for Vault. 424 OutdatedNodes []*NodeListStub 425 426 // VaultTokens is the list of Vault ACL token accessors that Nomad created 427 // and will no longer manage after the cluster is migrated to workload 428 // identities. 429 VaultTokens []*VaultAccessor 430 } 431 432 // Ready returns true if the cluster is ready to migrate to workload identities 433 // with Vault. 434 func (v *VaultWorkloadIdentityUpgradeCheck) Ready() bool { 435 return v != nil && 436 len(v.VaultTokens) == 0 && 437 len(v.OutdatedNodes) == 0 && 438 len(v.JobsWithoutVaultIdentity) == 0 439 } 440 441 // VaultAccessor is a Vault ACL token created by Nomad for a task to access 442 // Vault using the legacy authentication flow. 443 type VaultAccessor struct { 444 // AllocID is the ID of the allocation that requested this token. 445 AllocID string 446 447 // Task is the name of the task that requested this token. 448 Task string 449 450 // NodeID is the ID of the node running the allocation that requested this 451 // token. 452 NodeID string 453 454 // Accessor is the Vault ACL token accessor ID. 455 Accessor string 456 457 // CreationTTL is the TTL set when the token was created. 458 CreationTTL int 459 460 // CreateIndex is the Raft index when the token was created. 461 CreateIndex uint64 462 } 463 464 // UpgradeCheckVaultWorkloadIdentity retrieves the cluster status for migrating 465 // to workload identities with Vault. 466 func (op *Operator) UpgradeCheckVaultWorkloadIdentity(q *QueryOptions) (*VaultWorkloadIdentityUpgradeCheck, *QueryMeta, error) { 467 var resp VaultWorkloadIdentityUpgradeCheck 468 qm, err := op.c.query("/v1/operator/upgrade-check/vault-workload-identity", &resp, q) 469 if err != nil { 470 return nil, nil, err 471 } 472 return &resp, qm, nil 473 }