github.com/hashicorp/nomad/api@v0.0.0-20240306165712-3193ac204f65/operator.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package api
     5  
     6  import (
     7  	"encoding/json"
     8  	"errors"
     9  	"io"
    10  	"net/http"
    11  	"strconv"
    12  	"strings"
    13  	"time"
    14  )
    15  
    16  // Operator can be used to perform low-level operator tasks for Nomad.
    17  type Operator struct {
    18  	c *Client
    19  }
    20  
    21  // Operator returns a handle to the operator endpoints.
    22  func (c *Client) Operator() *Operator {
    23  	return &Operator{c}
    24  }
    25  
    26  // RaftServer has information about a server in the Raft configuration.
    27  type RaftServer struct {
    28  	// ID is the unique ID for the server. These are currently the same
    29  	// as the address, but they will be changed to a real GUID in a future
    30  	// release of Nomad.
    31  	ID string
    32  
    33  	// Node is the node name of the server, as known by Nomad, or this
    34  	// will be set to "(unknown)" otherwise.
    35  	Node string
    36  
    37  	// Address is the IP:port of the server, used for Raft communications.
    38  	Address string
    39  
    40  	// Leader is true if this server is the current cluster leader.
    41  	Leader bool
    42  
    43  	// Voter is true if this server has a vote in the cluster. This might
    44  	// be false if the server is staging and still coming online, or if
    45  	// it's a non-voting server, which will be added in a future release of
    46  	// Nomad.
    47  	Voter bool
    48  
    49  	// RaftProtocol is the version of the Raft protocol spoken by this server.
    50  	RaftProtocol string
    51  }
    52  
    53  // RaftConfiguration is returned when querying for the current Raft configuration.
    54  type RaftConfiguration struct {
    55  	// Servers has the list of servers in the Raft configuration.
    56  	Servers []*RaftServer
    57  
    58  	// Index has the Raft index of this configuration.
    59  	Index uint64
    60  }
    61  
    62  // RaftGetConfiguration is used to query the current Raft peer set.
    63  func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, error) {
    64  	r, err := op.c.newRequest("GET", "/v1/operator/raft/configuration")
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  	r.setQueryOptions(q)
    69  	_, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose
    70  	if err != nil {
    71  		return nil, err
    72  	}
    73  	defer resp.Body.Close()
    74  
    75  	var out RaftConfiguration
    76  	if err := decodeBody(resp, &out); err != nil {
    77  		return nil, err
    78  	}
    79  	return &out, nil
    80  }
    81  
    82  // RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft
    83  // quorum but no longer known to Serf or the catalog) by address in the form of
    84  // "IP:port".
    85  func (op *Operator) RaftRemovePeerByAddress(address string, q *WriteOptions) error {
    86  	r, err := op.c.newRequest("DELETE", "/v1/operator/raft/peer")
    87  	if err != nil {
    88  		return err
    89  	}
    90  	r.setWriteOptions(q)
    91  
    92  	r.params.Set("address", address)
    93  
    94  	_, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose
    95  	if err != nil {
    96  		return err
    97  	}
    98  
    99  	resp.Body.Close()
   100  	return nil
   101  }
   102  
   103  // RaftRemovePeerByID is used to kick a stale peer (one that is in the Raft
   104  // quorum but no longer known to Serf or the catalog) by ID.
   105  func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error {
   106  	r, err := op.c.newRequest("DELETE", "/v1/operator/raft/peer")
   107  	if err != nil {
   108  		return err
   109  	}
   110  	r.setWriteOptions(q)
   111  
   112  	r.params.Set("id", id)
   113  
   114  	_, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose
   115  	if err != nil {
   116  		return err
   117  	}
   118  
   119  	resp.Body.Close()
   120  	return nil
   121  }
   122  
   123  // RaftTransferLeadershipByAddress is used to transfer leadership to a
   124  // different peer using its address in the form of "IP:port".
   125  func (op *Operator) RaftTransferLeadershipByAddress(address string, q *WriteOptions) error {
   126  	r, err := op.c.newRequest("PUT", "/v1/operator/raft/transfer-leadership")
   127  	if err != nil {
   128  		return err
   129  	}
   130  	r.setWriteOptions(q)
   131  
   132  	r.params.Set("address", address)
   133  
   134  	_, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose
   135  	if err != nil {
   136  		return err
   137  	}
   138  
   139  	resp.Body.Close()
   140  	return nil
   141  }
   142  
   143  // RaftTransferLeadershipByID is used to transfer leadership to a
   144  // different peer using its Raft ID.
   145  func (op *Operator) RaftTransferLeadershipByID(id string, q *WriteOptions) error {
   146  	r, err := op.c.newRequest("PUT", "/v1/operator/raft/transfer-leadership")
   147  	if err != nil {
   148  		return err
   149  	}
   150  	r.setWriteOptions(q)
   151  
   152  	r.params.Set("id", id)
   153  
   154  	_, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose
   155  	if err != nil {
   156  		return err
   157  	}
   158  
   159  	resp.Body.Close()
   160  	return nil
   161  }
   162  
   163  // SchedulerConfiguration is the config for controlling scheduler behavior
   164  type SchedulerConfiguration struct {
   165  	// SchedulerAlgorithm lets you select between available scheduling algorithms.
   166  	SchedulerAlgorithm SchedulerAlgorithm
   167  
   168  	// PreemptionConfig specifies whether to enable eviction of lower
   169  	// priority jobs to place higher priority jobs.
   170  	PreemptionConfig PreemptionConfig
   171  
   172  	// MemoryOversubscriptionEnabled specifies whether memory oversubscription is enabled
   173  	MemoryOversubscriptionEnabled bool
   174  
   175  	// RejectJobRegistration disables new job registrations except with a
   176  	// management ACL token
   177  	RejectJobRegistration bool
   178  
   179  	// PauseEvalBroker stops the leader evaluation broker process from running
   180  	// until the configuration is updated and written to the Nomad servers.
   181  	PauseEvalBroker bool
   182  
   183  	// CreateIndex/ModifyIndex store the create/modify indexes of this configuration.
   184  	CreateIndex uint64
   185  	ModifyIndex uint64
   186  }
   187  
   188  // SchedulerConfigurationResponse is the response object that wraps SchedulerConfiguration
   189  type SchedulerConfigurationResponse struct {
   190  	// SchedulerConfig contains scheduler config options
   191  	SchedulerConfig *SchedulerConfiguration
   192  
   193  	QueryMeta
   194  }
   195  
   196  // SchedulerSetConfigurationResponse is the response object used
   197  // when updating scheduler configuration
   198  type SchedulerSetConfigurationResponse struct {
   199  	// Updated returns whether the config was actually updated
   200  	// Only set when the request uses CAS
   201  	Updated bool
   202  
   203  	WriteMeta
   204  }
   205  
   206  // SchedulerAlgorithm is an enum string that encapsulates the valid options for a
   207  // SchedulerConfiguration block's SchedulerAlgorithm. These modes will allow the
   208  // scheduler to be user-selectable.
   209  type SchedulerAlgorithm string
   210  
   211  const (
   212  	SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack"
   213  	SchedulerAlgorithmSpread  SchedulerAlgorithm = "spread"
   214  )
   215  
   216  // PreemptionConfig specifies whether preemption is enabled based on scheduler type
   217  type PreemptionConfig struct {
   218  	SystemSchedulerEnabled   bool
   219  	SysBatchSchedulerEnabled bool
   220  	BatchSchedulerEnabled    bool
   221  	ServiceSchedulerEnabled  bool
   222  }
   223  
   224  // SchedulerGetConfiguration is used to query the current Scheduler configuration.
   225  func (op *Operator) SchedulerGetConfiguration(q *QueryOptions) (*SchedulerConfigurationResponse, *QueryMeta, error) {
   226  	var resp SchedulerConfigurationResponse
   227  	qm, err := op.c.query("/v1/operator/scheduler/configuration", &resp, q)
   228  	if err != nil {
   229  		return nil, nil, err
   230  	}
   231  	return &resp, qm, nil
   232  }
   233  
   234  // SchedulerSetConfiguration is used to set the current Scheduler configuration.
   235  func (op *Operator) SchedulerSetConfiguration(conf *SchedulerConfiguration, q *WriteOptions) (*SchedulerSetConfigurationResponse, *WriteMeta, error) {
   236  	var out SchedulerSetConfigurationResponse
   237  	wm, err := op.c.put("/v1/operator/scheduler/configuration", conf, &out, q)
   238  	if err != nil {
   239  		return nil, nil, err
   240  	}
   241  	return &out, wm, nil
   242  }
   243  
   244  // SchedulerCASConfiguration is used to perform a Check-And-Set update on the
   245  // Scheduler configuration. The ModifyIndex value will be respected. Returns
   246  // true on success or false on failures.
   247  func (op *Operator) SchedulerCASConfiguration(conf *SchedulerConfiguration, q *WriteOptions) (*SchedulerSetConfigurationResponse, *WriteMeta, error) {
   248  	var out SchedulerSetConfigurationResponse
   249  	wm, err := op.c.put("/v1/operator/scheduler/configuration?cas="+strconv.FormatUint(conf.ModifyIndex, 10), conf, &out, q)
   250  	if err != nil {
   251  		return nil, nil, err
   252  	}
   253  
   254  	return &out, wm, nil
   255  }
   256  
   257  // Snapshot is used to capture a snapshot state of a running cluster.
   258  // The returned reader that must be consumed fully
   259  func (op *Operator) Snapshot(q *QueryOptions) (io.ReadCloser, error) {
   260  	r, err := op.c.newRequest("GET", "/v1/operator/snapshot")
   261  	if err != nil {
   262  		return nil, err
   263  	}
   264  	r.setQueryOptions(q)
   265  	_, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose
   266  	if err != nil {
   267  		return nil, err
   268  	}
   269  
   270  	digest := resp.Header.Get("Digest")
   271  
   272  	cr, err := newChecksumValidatingReader(resp.Body, digest)
   273  	if err != nil {
   274  		io.Copy(io.Discard, resp.Body)
   275  		resp.Body.Close()
   276  		return nil, err
   277  	}
   278  
   279  	return cr, nil
   280  }
   281  
   282  // SnapshotRestore is used to restore a running nomad cluster to an original
   283  // state.
   284  func (op *Operator) SnapshotRestore(in io.Reader, q *WriteOptions) (*WriteMeta, error) {
   285  	wm, err := op.c.put("/v1/operator/snapshot", in, nil, q)
   286  	if err != nil {
   287  		return nil, err
   288  	}
   289  
   290  	return wm, nil
   291  }
   292  
   293  type License struct {
   294  	// The unique identifier of the license
   295  	LicenseID string
   296  
   297  	// The customer ID associated with the license
   298  	CustomerID string
   299  
   300  	// If set, an identifier that should be used to lock the license to a
   301  	// particular site, cluster, etc.
   302  	InstallationID string
   303  
   304  	// The time at which the license was issued
   305  	IssueTime time.Time
   306  
   307  	// The time at which the license starts being valid
   308  	StartTime time.Time
   309  
   310  	// The time after which the license expires
   311  	ExpirationTime time.Time
   312  
   313  	// The time at which the license ceases to function and can
   314  	// no longer be used in any capacity
   315  	TerminationTime time.Time
   316  
   317  	// The product the license is valid for
   318  	Product string
   319  
   320  	// License Specific Flags
   321  	Flags map[string]interface{}
   322  
   323  	// Modules is a list of the licensed enterprise modules
   324  	Modules []string
   325  
   326  	// List of features enabled by the license
   327  	Features []string
   328  }
   329  
   330  type LicenseReply struct {
   331  	License        *License
   332  	ConfigOutdated bool
   333  	QueryMeta
   334  }
   335  
   336  type ApplyLicenseOptions struct {
   337  	Force bool
   338  }
   339  
   340  func (op *Operator) LicensePut(license string, q *WriteOptions) (*WriteMeta, error) {
   341  	return op.ApplyLicense(license, nil, q)
   342  }
   343  
   344  func (op *Operator) ApplyLicense(license string, opts *ApplyLicenseOptions, q *WriteOptions) (*WriteMeta, error) {
   345  	r, err := op.c.newRequest("PUT", "/v1/operator/license")
   346  	if err != nil {
   347  		return nil, err
   348  	}
   349  
   350  	if opts != nil && opts.Force {
   351  		r.params.Add("force", "true")
   352  	}
   353  
   354  	r.setWriteOptions(q)
   355  	r.body = strings.NewReader(license)
   356  
   357  	rtt, resp, err := requireOK(op.c.doRequest(r)) //nolint:bodyclose
   358  	if err != nil {
   359  		return nil, err
   360  	}
   361  	defer resp.Body.Close()
   362  
   363  	wm := &WriteMeta{RequestTime: rtt}
   364  	parseWriteMeta(resp, wm)
   365  
   366  	return wm, nil
   367  }
   368  
   369  func (op *Operator) LicenseGet(q *QueryOptions) (*LicenseReply, *QueryMeta, error) {
   370  	req, err := op.c.newRequest("GET", "/v1/operator/license")
   371  	if err != nil {
   372  		return nil, nil, err
   373  	}
   374  	req.setQueryOptions(q)
   375  
   376  	var reply LicenseReply
   377  	rtt, resp, err := op.c.doRequest(req) //nolint:bodyclose
   378  	if err != nil {
   379  		return nil, nil, err
   380  	}
   381  	defer resp.Body.Close()
   382  
   383  	if resp.StatusCode == http.StatusNoContent {
   384  		return nil, nil, errors.New("Nomad Enterprise only endpoint")
   385  	}
   386  
   387  	if resp.StatusCode != http.StatusOK {
   388  		return nil, nil, newUnexpectedResponseError(
   389  			fromHTTPResponse(resp),
   390  			withExpectedStatuses([]int{http.StatusOK, http.StatusNoContent}),
   391  		)
   392  	}
   393  
   394  	err = json.NewDecoder(resp.Body).Decode(&reply)
   395  	if err != nil {
   396  		return nil, nil, err
   397  	}
   398  
   399  	qm := &QueryMeta{}
   400  	parseQueryMeta(resp, qm)
   401  	qm.RequestTime = rtt
   402  
   403  	return &reply, qm, nil
   404  }
   405  
   406  type LeadershipTransferResponse struct {
   407  	From RaftServer
   408  	To   RaftServer
   409  	Noop bool
   410  	Err  error
   411  
   412  	WriteMeta
   413  }
   414  
   415  // VaultWorkloadIdentityUpgradeCheck is the result of verifying if the cluster
   416  // is ready to switch to workload identities for Vault.
   417  type VaultWorkloadIdentityUpgradeCheck struct {
   418  	// JobsWithoutVaultIdentity is the list of jobs that have a `vault` block
   419  	// but do not have an `identity` for Vault.
   420  	JobsWithoutVaultIdentity []*JobListStub
   421  
   422  	// OutdatedNodes is the list of nodes running a version of Nomad that does
   423  	// not support workload identities for Vault.
   424  	OutdatedNodes []*NodeListStub
   425  
   426  	// VaultTokens is the list of Vault ACL token accessors that Nomad created
   427  	// and will no longer manage after the cluster is migrated to workload
   428  	// identities.
   429  	VaultTokens []*VaultAccessor
   430  }
   431  
   432  // Ready returns true if the cluster is ready to migrate to workload identities
   433  // with Vault.
   434  func (v *VaultWorkloadIdentityUpgradeCheck) Ready() bool {
   435  	return v != nil &&
   436  		len(v.VaultTokens) == 0 &&
   437  		len(v.OutdatedNodes) == 0 &&
   438  		len(v.JobsWithoutVaultIdentity) == 0
   439  }
   440  
   441  // VaultAccessor is a Vault ACL token created by Nomad for a task to access
   442  // Vault using the legacy authentication flow.
   443  type VaultAccessor struct {
   444  	// AllocID is the ID of the allocation that requested this token.
   445  	AllocID string
   446  
   447  	// Task is the name of the task that requested this token.
   448  	Task string
   449  
   450  	// NodeID is the ID of the node running the allocation that requested this
   451  	// token.
   452  	NodeID string
   453  
   454  	// Accessor is the Vault ACL token accessor ID.
   455  	Accessor string
   456  
   457  	// CreationTTL is the TTL set when the token was created.
   458  	CreationTTL int
   459  
   460  	// CreateIndex is the Raft index when the token was created.
   461  	CreateIndex uint64
   462  }
   463  
   464  // UpgradeCheckVaultWorkloadIdentity retrieves the cluster status for migrating
   465  // to workload identities with Vault.
   466  func (op *Operator) UpgradeCheckVaultWorkloadIdentity(q *QueryOptions) (*VaultWorkloadIdentityUpgradeCheck, *QueryMeta, error) {
   467  	var resp VaultWorkloadIdentityUpgradeCheck
   468  	qm, err := op.c.query("/v1/operator/upgrade-check/vault-workload-identity", &resp, q)
   469  	if err != nil {
   470  		return nil, nil, err
   471  	}
   472  	return &resp, qm, nil
   473  }