github.com/hernad/nomad@v1.6.112/nomad/vault.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package nomad
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"math/rand"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	"github.com/hernad/nomad/helper"
    18  	"github.com/hernad/nomad/helper/useragent"
    19  	tomb "gopkg.in/tomb.v2"
    20  
    21  	metrics "github.com/armon/go-metrics"
    22  	log "github.com/hashicorp/go-hclog"
    23  	multierror "github.com/hashicorp/go-multierror"
    24  	"github.com/hernad/nomad/nomad/structs"
    25  	"github.com/hernad/nomad/nomad/structs/config"
    26  	vapi "github.com/hashicorp/vault/api"
    27  
    28  	"golang.org/x/sync/errgroup"
    29  	"golang.org/x/time/rate"
    30  )
    31  
    32  const (
    33  	// vaultTokenCreateTTL is the duration the wrapped token for the client is
    34  	// valid for. The units are in seconds.
    35  	vaultTokenCreateTTL = "60s"
    36  
    37  	// minimumTokenTTL is the minimum Token TTL allowed for child tokens.
    38  	minimumTokenTTL = 5 * time.Minute
    39  
    40  	// defaultTokenTTL is the default Token TTL used when the passed token is a
    41  	// root token such that child tokens aren't being created against a role
    42  	// that has defined a TTL
    43  	defaultTokenTTL = "72h"
    44  
    45  	// requestRateLimit is the maximum number of requests per second Nomad will
    46  	// make against Vault
    47  	requestRateLimit rate.Limit = 500.0
    48  
    49  	// maxParallelRevokes is the maximum number of parallel Vault
    50  	// token revocation requests
    51  	maxParallelRevokes = 64
    52  
    53  	// vaultRevocationIntv is the interval at which Vault tokens that failed
    54  	// initial revocation are retried
    55  	vaultRevocationIntv = 5 * time.Minute
    56  
    57  	// vaultCapabilitiesLookupPath is the path to lookup the capabilities of
    58  	// ones token.
    59  	vaultCapabilitiesLookupPath = "sys/capabilities-self"
    60  
    61  	// vaultTokenRenewPath is the path used to renew our token
    62  	vaultTokenRenewPath = "auth/token/renew-self"
    63  
    64  	// vaultTokenLookupPath is the path used to lookup a token
    65  	vaultTokenLookupPath = "auth/token/lookup"
    66  
    67  	// vaultTokenRevokePath is the path used to revoke a token
    68  	vaultTokenRevokePath = "auth/token/revoke-accessor"
    69  
    70  	// vaultRoleLookupPath is the path to lookup a role
    71  	vaultRoleLookupPath = "auth/token/roles/%s"
    72  
    73  	// vaultRoleCreatePath is the path to create a token from a role
    74  	vaultTokenRoleCreatePath = "auth/token/create/%s"
    75  )
    76  
    77  var (
    78  	// vaultCapabilitiesCapability is the expected capability of Nomad's Vault
    79  	// token on the the path. The token must have at least one of the
    80  	// capabilities.
    81  	vaultCapabilitiesCapability = []string{"update", "root"}
    82  
    83  	// vaultTokenRenewCapability is the expected capability Nomad's
    84  	// Vault token should have on the path. The token must have at least one of
    85  	// the capabilities.
    86  	vaultTokenRenewCapability = []string{"update", "root"}
    87  
    88  	// vaultTokenLookupCapability is the expected capability Nomad's
    89  	// Vault token should have on the path. The token must have at least one of
    90  	// the capabilities.
    91  	vaultTokenLookupCapability = []string{"update", "root"}
    92  
    93  	// vaultTokenRevokeCapability is the expected capability Nomad's
    94  	// Vault token should have on the path. The token must have at least one of
    95  	// the capabilities.
    96  	vaultTokenRevokeCapability = []string{"update", "root"}
    97  
    98  	// vaultRoleLookupCapability is the the expected capability Nomad's Vault
    99  	// token should have on the path. The token must have at least one of the
   100  	// capabilities.
   101  	vaultRoleLookupCapability = []string{"read", "root"}
   102  
   103  	// vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault
   104  	// token should have on the path. The token must have at least one of the
   105  	// capabilities.
   106  	vaultTokenRoleCreateCapability = []string{"update", "root"}
   107  )
   108  
   109  // VaultClient is the Servers interface for interfacing with Vault
   110  type VaultClient interface {
   111  	// SetActive activates or de-activates the Vault client. When active, token
   112  	// creation/lookup/revocation operation are allowed.
   113  	SetActive(active bool)
   114  
   115  	// SetConfig updates the config used by the Vault client
   116  	SetConfig(config *config.VaultConfig) error
   117  
   118  	// GetConfig returns a copy of the config used by the Vault client, for
   119  	// testing
   120  	GetConfig() *config.VaultConfig
   121  
   122  	// CreateToken takes an allocation and task and returns an appropriate Vault
   123  	// Secret
   124  	CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error)
   125  
   126  	// LookupToken takes a token string and returns its capabilities.
   127  	LookupToken(ctx context.Context, token string) (*vapi.Secret, error)
   128  
   129  	// RevokeTokens takes a set of tokens accessor and revokes the tokens
   130  	RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error
   131  
   132  	// MarkForRevocation revokes the tokens in background
   133  	MarkForRevocation(accessors []*structs.VaultAccessor) error
   134  
   135  	// Stop is used to stop token renewal
   136  	Stop()
   137  
   138  	// Running returns whether the Vault client is running
   139  	Running() bool
   140  
   141  	// Stats returns the Vault clients statistics
   142  	Stats() map[string]string
   143  
   144  	// EmitStats emits that clients statistics at the given period until stopCh
   145  	// is called.
   146  	EmitStats(period time.Duration, stopCh <-chan struct{})
   147  }
   148  
   149  // VaultStats returns all the stats about Vault tokens created and managed by
   150  // Nomad.
   151  type VaultStats struct {
   152  	// TrackedForRevoke is the count of tokens that are being tracked to be
   153  	// revoked since they could not be immediately revoked.
   154  	TrackedForRevoke int
   155  
   156  	// TokenTTL is the time-to-live duration for the current token
   157  	TokenTTL time.Duration
   158  
   159  	// TokenExpiry is the recorded expiry time of the current token
   160  	TokenExpiry time.Time
   161  
   162  	// LastRenewalTime is the time since the token was last renewed
   163  	LastRenewalTime     time.Time
   164  	TimeFromLastRenewal time.Duration
   165  
   166  	// NextRenewalTime is the time the token will attempt to renew
   167  	NextRenewalTime   time.Time
   168  	TimeToNextRenewal time.Duration
   169  }
   170  
   171  // PurgeVaultAccessorFn is called to remove VaultAccessors from the system. If
   172  // the function returns an error, the token will still be tracked and revocation
   173  // will retry till there is a success
   174  type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error
   175  
   176  // vaultClient is the Servers implementation of the VaultClient interface. The
   177  // client renews the PeriodicToken given in the Vault configuration and provides
   178  // the Server with the ability to create child tokens and lookup the permissions
   179  // of tokens.
   180  type vaultClient struct {
   181  	// limiter is used to rate limit requests to Vault
   182  	limiter *rate.Limiter
   183  
   184  	// client is the Vault API client used for Namespace-relative integrations
   185  	// with the Vault API (anything except `/v1/sys`). If this server is not
   186  	// configured to reference a Vault namespace, this will point to the same
   187  	// client as clientSys
   188  	client *vapi.Client
   189  
   190  	// clientSys is the Vault API client used for non-Namespace-relative integrations
   191  	// with the Vault API (anything involving `/v1/sys`). This client is never configured
   192  	// with a Vault namespace, because these endpoints may return errors if a namespace
   193  	// header is provided
   194  	clientSys *vapi.Client
   195  
   196  	// auth is the Vault token auth API client
   197  	auth *vapi.TokenAuth
   198  
   199  	// config is the user passed Vault config
   200  	config *config.VaultConfig
   201  
   202  	// connEstablished marks whether we have an established connection to Vault.
   203  	connEstablished bool
   204  
   205  	// connEstablishedErr marks an error that can occur when establishing a
   206  	// connection
   207  	connEstablishedErr error
   208  
   209  	// token is the raw token used by the client
   210  	token string
   211  
   212  	// tokenData is the data of the passed Vault token
   213  	tokenData *structs.VaultTokenData
   214  
   215  	// revoking tracks the VaultAccessors that must be revoked
   216  	revoking map[*structs.VaultAccessor]time.Time
   217  	purgeFn  PurgeVaultAccessorFn
   218  	revLock  sync.Mutex
   219  
   220  	// active indicates whether the vaultClient is active. It should be
   221  	// accessed using a helper and updated atomically
   222  	active int32
   223  
   224  	// running indicates whether the vault client is started.
   225  	running bool
   226  
   227  	// renewLoopActive indicates whether the renewal goroutine is running
   228  	// It should be accessed and updated atomically
   229  	// used for testing purposes only
   230  	renewLoopActive int32
   231  
   232  	// childTTL is the TTL for child tokens.
   233  	childTTL string
   234  
   235  	// currentExpiration is the time the current token lease expires
   236  	currentExpiration     time.Time
   237  	currentExpirationLock sync.Mutex
   238  	lastRenewalTime       time.Time
   239  	nextRenewalTime       time.Time
   240  	renewalTimeLock       sync.Mutex
   241  
   242  	tomb   *tomb.Tomb
   243  	logger log.Logger
   244  
   245  	// l is used to lock the configuration aspects of the client such that
   246  	// multiple callers can't cause conflicting config updates
   247  	l sync.Mutex
   248  
   249  	// setConfigLock serializes access to the SetConfig method
   250  	setConfigLock sync.Mutex
   251  
   252  	// consts as struct fields for overriding in tests
   253  	maxRevokeBatchSize int
   254  	revocationIntv     time.Duration
   255  
   256  	entHandler taskClientHandler
   257  }
   258  
   259  type taskClientHandler interface {
   260  	clientForTask(v *vaultClient, namespace string) (*vapi.Client, error)
   261  }
   262  
   263  // NewVaultClient returns a Vault client from the given config. If the client
   264  // couldn't be made an error is returned.
   265  func NewVaultClient(c *config.VaultConfig, logger log.Logger, purgeFn PurgeVaultAccessorFn, delegate taskClientHandler) (*vaultClient, error) {
   266  	if c == nil {
   267  		return nil, fmt.Errorf("must pass valid VaultConfig")
   268  	}
   269  
   270  	if logger == nil {
   271  		return nil, fmt.Errorf("must pass valid logger")
   272  	}
   273  	if purgeFn == nil {
   274  		purgeFn = func(accessors []*structs.VaultAccessor) error { return nil }
   275  	}
   276  	if delegate == nil {
   277  		delegate = &VaultNoopDelegate{}
   278  	}
   279  
   280  	v := &vaultClient{
   281  		config:             c,
   282  		logger:             logger.Named("vault"),
   283  		limiter:            rate.NewLimiter(requestRateLimit, int(requestRateLimit)),
   284  		revoking:           make(map[*structs.VaultAccessor]time.Time),
   285  		purgeFn:            purgeFn,
   286  		tomb:               &tomb.Tomb{},
   287  		maxRevokeBatchSize: maxVaultRevokeBatchSize,
   288  		revocationIntv:     vaultRevocationIntv,
   289  		entHandler:         delegate,
   290  	}
   291  
   292  	if v.config.IsEnabled() {
   293  		if err := v.buildClient(); err != nil {
   294  			return nil, err
   295  		}
   296  
   297  		// Launch the required goroutines
   298  		v.tomb.Go(wrapNilError(v.establishConnection))
   299  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   300  
   301  		v.running = true
   302  	}
   303  
   304  	return v, nil
   305  }
   306  
   307  func (v *vaultClient) Stop() {
   308  	v.l.Lock()
   309  	running := v.running
   310  	v.running = false
   311  	v.l.Unlock()
   312  
   313  	if running {
   314  		v.tomb.Kill(nil)
   315  		v.tomb.Wait()
   316  		v.flush()
   317  	}
   318  }
   319  
   320  func (v *vaultClient) Running() bool {
   321  	v.l.Lock()
   322  	defer v.l.Unlock()
   323  	return v.running
   324  }
   325  
   326  // SetActive activates or de-activates the Vault client. When active, token
   327  // creation/lookup/revocation operation are allowed. All queued revocations are
   328  // cancelled if set un-active as it is assumed another instances is taking over
   329  func (v *vaultClient) SetActive(active bool) {
   330  	if active {
   331  		atomic.StoreInt32(&v.active, 1)
   332  	} else {
   333  		atomic.StoreInt32(&v.active, 0)
   334  	}
   335  
   336  	// Clear out the revoking tokens
   337  	v.revLock.Lock()
   338  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   339  	v.revLock.Unlock()
   340  }
   341  
   342  // flush is used to reset the state of the vault client
   343  func (v *vaultClient) flush() {
   344  	v.l.Lock()
   345  	defer v.l.Unlock()
   346  	v.revLock.Lock()
   347  	defer v.revLock.Unlock()
   348  
   349  	v.client = nil
   350  	v.clientSys = nil
   351  	v.auth = nil
   352  	v.connEstablished = false
   353  	v.connEstablishedErr = nil
   354  	v.token = ""
   355  	v.tokenData = nil
   356  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   357  	v.childTTL = ""
   358  	v.tomb = &tomb.Tomb{}
   359  }
   360  
   361  // GetConfig returns a copy of this vault client's configuration, for testing.
   362  func (v *vaultClient) GetConfig() *config.VaultConfig {
   363  	v.setConfigLock.Lock()
   364  	defer v.setConfigLock.Unlock()
   365  	return v.config.Copy()
   366  }
   367  
   368  // SetConfig is used to update the Vault config being used. A temporary outage
   369  // may occur after calling as it re-establishes a connection to Vault
   370  func (v *vaultClient) SetConfig(config *config.VaultConfig) error {
   371  	if config == nil {
   372  		return fmt.Errorf("must pass valid VaultConfig")
   373  	}
   374  	v.setConfigLock.Lock()
   375  	defer v.setConfigLock.Unlock()
   376  
   377  	v.l.Lock()
   378  	defer v.l.Unlock()
   379  
   380  	// If reloading the same config, no-op
   381  	if v.config.Equal(config) {
   382  		return nil
   383  	}
   384  
   385  	// Kill any background routines
   386  	if v.running {
   387  		// Kill any background routine
   388  		v.tomb.Kill(nil)
   389  
   390  		// Locking around tomb.Wait can deadlock with
   391  		// establishConnection exiting, so we must unlock here.
   392  		v.l.Unlock()
   393  		v.tomb.Wait()
   394  		v.l.Lock()
   395  
   396  		// Stop accepting any new requests
   397  		v.connEstablished = false
   398  		v.tomb = &tomb.Tomb{}
   399  		v.running = false
   400  	}
   401  
   402  	// Store the new config
   403  	v.config = config
   404  
   405  	// Check if we should relaunch
   406  	if v.config.IsEnabled() {
   407  		// Rebuild the client
   408  		if err := v.buildClient(); err != nil {
   409  			return err
   410  		}
   411  
   412  		// Launch the required goroutines
   413  		v.tomb.Go(wrapNilError(v.establishConnection))
   414  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   415  		v.running = true
   416  	}
   417  
   418  	return nil
   419  }
   420  
   421  // buildClient is used to build a Vault client based on the stored Vault config
   422  func (v *vaultClient) buildClient() error {
   423  	// Validate we have the required fields.
   424  	if v.config.Token == "" {
   425  		return errors.New("Vault token must be set")
   426  	} else if v.config.Addr == "" {
   427  		return errors.New("Vault address must be set")
   428  	}
   429  
   430  	// Parse the TTL if it is set
   431  	if v.config.TaskTokenTTL != "" {
   432  		d, err := time.ParseDuration(v.config.TaskTokenTTL)
   433  		if err != nil {
   434  			return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err)
   435  		}
   436  
   437  		if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() {
   438  			return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL)
   439  		}
   440  
   441  		v.childTTL = v.config.TaskTokenTTL
   442  	} else {
   443  		// Default the TaskTokenTTL
   444  		v.childTTL = defaultTokenTTL
   445  	}
   446  
   447  	// Get the Vault API configuration
   448  	apiConf, err := v.config.ApiConfig()
   449  	if err != nil {
   450  		return fmt.Errorf("Failed to create Vault API config: %v", err)
   451  	}
   452  
   453  	// Create the Vault API client
   454  	client, err := vapi.NewClient(apiConf)
   455  	if err != nil {
   456  		v.logger.Error("failed to create Vault client and not retrying", "error", err)
   457  		return err
   458  	}
   459  	useragent.SetHeaders(client)
   460  
   461  	// Store the client, create/assign the /sys client
   462  	v.client = client
   463  	if v.config.Namespace != "" {
   464  		v.logger.Debug("configuring Vault namespace", "namespace", v.config.Namespace)
   465  		v.clientSys, err = vapi.NewClient(apiConf)
   466  		if err != nil {
   467  			v.logger.Error("failed to create Vault sys client and not retrying", "error", err)
   468  			return err
   469  		}
   470  		useragent.SetHeaders(v.clientSys)
   471  		client.SetNamespace(v.config.Namespace)
   472  	} else {
   473  		v.clientSys = client
   474  	}
   475  
   476  	// Set the token
   477  	v.token = v.config.Token
   478  	client.SetToken(v.token)
   479  	v.auth = client.Auth().Token()
   480  
   481  	return nil
   482  }
   483  
   484  // establishConnection is used to make first contact with Vault. This should be
   485  // called in a go-routine since the connection is retried until the Vault Client
   486  // is stopped or the connection is successfully made at which point the renew
   487  // loop is started.
   488  func (v *vaultClient) establishConnection() {
   489  	// Create the retry timer and set initial duration to zero so it fires
   490  	// immediately
   491  	retryTimer := time.NewTimer(0)
   492  	initStatus := false
   493  OUTER:
   494  	for {
   495  		select {
   496  		case <-v.tomb.Dying():
   497  			return
   498  		case <-retryTimer.C:
   499  			// Retry validating the token till success
   500  			if err := v.parseSelfToken(); err != nil {
   501  				// if parsing token fails, try to distinguish legitimate token error from transient Vault initialization/connection issue
   502  				if !initStatus {
   503  					if _, err := v.clientSys.Sys().Health(); err != nil {
   504  						v.logger.Warn("failed to contact Vault API", "retry", v.config.ConnectionRetryIntv, "error", err)
   505  						retryTimer.Reset(v.config.ConnectionRetryIntv)
   506  						continue OUTER
   507  					}
   508  					initStatus = true
   509  				}
   510  
   511  				v.logger.Error("failed to validate self token/role", "retry", v.config.ConnectionRetryIntv, "error", err)
   512  				retryTimer.Reset(v.config.ConnectionRetryIntv)
   513  				v.l.Lock()
   514  				v.connEstablished = true
   515  				v.connEstablishedErr = fmt.Errorf("failed to establish connection to Vault: %v", err)
   516  				v.l.Unlock()
   517  				continue OUTER
   518  			}
   519  
   520  			break OUTER
   521  		}
   522  	}
   523  
   524  	// Set the wrapping function such that token creation is wrapped now
   525  	// that we know our role
   526  	v.client.SetWrappingLookupFunc(v.getWrappingFn())
   527  
   528  	// If we are given a non-root token, start renewing it
   529  	if v.tokenData.Root() && v.tokenData.CreationTTL == 0 {
   530  		v.logger.Debug("not renewing token as it is root")
   531  	} else {
   532  		v.logger.Debug("starting renewal loop", "creation_ttl", time.Duration(v.tokenData.CreationTTL)*time.Second)
   533  		v.tomb.Go(wrapNilError(v.renewalLoop))
   534  	}
   535  
   536  	v.l.Lock()
   537  	v.connEstablished = true
   538  	v.connEstablishedErr = nil
   539  	v.l.Unlock()
   540  }
   541  
   542  func (v *vaultClient) isRenewLoopActive() bool {
   543  	return atomic.LoadInt32(&v.renewLoopActive) == 1
   544  }
   545  
   546  // renewalLoop runs the renew loop. This should only be called if we are given a
   547  // non-root token.
   548  func (v *vaultClient) renewalLoop() {
   549  	atomic.StoreInt32(&v.renewLoopActive, 1)
   550  	defer atomic.StoreInt32(&v.renewLoopActive, 0)
   551  
   552  	// Create the renewal timer and set initial duration to zero so it fires
   553  	// immediately
   554  	authRenewTimer := time.NewTimer(0)
   555  
   556  	// Backoff is to reduce the rate we try to renew with Vault under error
   557  	// situations
   558  	backoff := 0.0
   559  
   560  	for {
   561  		select {
   562  		case <-v.tomb.Dying():
   563  			return
   564  		case <-authRenewTimer.C:
   565  			// Renew the token and determine the new expiration
   566  			recoverable, err := v.renew()
   567  			v.currentExpirationLock.Lock()
   568  			currentExpiration := v.currentExpiration
   569  			v.currentExpirationLock.Unlock()
   570  
   571  			// Successfully renewed
   572  			if err == nil {
   573  				// Attempt to renew the token at half the expiration time
   574  				durationUntilRenew := time.Until(currentExpiration) / 2
   575  				v.renewalTimeLock.Lock()
   576  				now := time.Now()
   577  				v.lastRenewalTime = now
   578  				v.nextRenewalTime = now.Add(durationUntilRenew)
   579  				v.renewalTimeLock.Unlock()
   580  
   581  				v.logger.Info("successfully renewed token", "next_renewal", durationUntilRenew)
   582  				authRenewTimer.Reset(durationUntilRenew)
   583  
   584  				// Reset any backoff
   585  				backoff = 0
   586  				break
   587  			}
   588  
   589  			metrics.IncrCounter([]string{"nomad", "vault", "renew_failed"}, 1)
   590  			v.logger.Warn("got error or bad auth, so backing off", "error", err, "recoverable", recoverable)
   591  
   592  			if !recoverable {
   593  				return
   594  			}
   595  
   596  			backoff = nextBackoff(backoff, currentExpiration)
   597  			if backoff < 0 {
   598  				// We have failed to renew the token past its expiration. Stop
   599  				// renewing with Vault.
   600  				v.logger.Error("failed to renew Vault token before lease expiration. Shutting down Vault client",
   601  					"error", err)
   602  				v.l.Lock()
   603  				v.connEstablished = false
   604  				v.connEstablishedErr = err
   605  				v.l.Unlock()
   606  				return
   607  			}
   608  
   609  			durationUntilRetry := time.Duration(backoff) * time.Second
   610  			v.renewalTimeLock.Lock()
   611  			v.nextRenewalTime = time.Now().Add(durationUntilRetry)
   612  			v.renewalTimeLock.Unlock()
   613  			v.logger.Info("backing off renewal", "retry", durationUntilRetry)
   614  
   615  			authRenewTimer.Reset(durationUntilRetry)
   616  		}
   617  	}
   618  }
   619  
   620  // nextBackoff returns the delay for the next auto renew interval, in seconds.
   621  // Returns negative value if past expiration
   622  //
   623  // It should increase the amount of backoff each time, with the following rules:
   624  //
   625  //   - If token expired already despite earlier renewal attempts,
   626  //     back off for 1 minute + jitter
   627  //   - If we have an existing authentication that is going to expire,
   628  //
   629  // never back off more than half of the amount of time remaining
   630  // until expiration (with 5s floor)
   631  // * Never back off more than 30 seconds multiplied by a random
   632  // value between 1 and 2
   633  // * Use randomness so that many clients won't keep hitting Vault
   634  // at the same time
   635  func nextBackoff(backoff float64, expiry time.Time) float64 {
   636  	maxBackoff := time.Until(expiry) / 2
   637  
   638  	if maxBackoff < 0 {
   639  		// expiry passed
   640  		return 60 * (1.0 + rand.Float64())
   641  	}
   642  
   643  	switch {
   644  	case backoff >= 24:
   645  		backoff = 30
   646  	default:
   647  		backoff = backoff * 1.25
   648  	}
   649  
   650  	// Add randomness
   651  	backoff = backoff * (1.0 + rand.Float64())
   652  
   653  	if backoff > maxBackoff.Seconds() {
   654  		backoff = maxBackoff.Seconds()
   655  	}
   656  
   657  	if backoff < 5 {
   658  		backoff = 5
   659  	}
   660  
   661  	return backoff
   662  }
   663  
   664  // renew attempts to renew our Vault token. If the renewal fails, an error is
   665  // returned.  The boolean indicates whether it's safe to attempt to renew again.
   666  // This method updates the currentExpiration time
   667  func (v *vaultClient) renew() (bool, error) {
   668  	// Track how long the request takes
   669  	defer metrics.MeasureSince([]string{"nomad", "vault", "renew"}, time.Now())
   670  
   671  	// Attempt to renew the token
   672  	secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL)
   673  	if err != nil {
   674  		// Check if there is a permission denied
   675  		recoverable := !structs.VaultUnrecoverableError.MatchString(err.Error())
   676  		return recoverable, fmt.Errorf("failed to renew the vault token: %v", err)
   677  	}
   678  
   679  	if secret == nil {
   680  		// It's possible for RenewSelf to return (nil, nil) if the
   681  		// response body from Vault is empty.
   682  		return true, fmt.Errorf("renewal failed: empty response from vault")
   683  	}
   684  
   685  	// these treated as transient errors, where can keep renewing
   686  	auth := secret.Auth
   687  	if auth == nil {
   688  		return true, fmt.Errorf("renewal successful but not auth information returned")
   689  	} else if auth.LeaseDuration == 0 {
   690  		return true, fmt.Errorf("renewal successful but no lease duration returned")
   691  	}
   692  
   693  	v.extendExpiration(auth.LeaseDuration)
   694  
   695  	v.logger.Debug("successfully renewed server token")
   696  	return true, nil
   697  }
   698  
   699  // getWrappingFn returns an appropriate wrapping function for Nomad Servers
   700  func (v *vaultClient) getWrappingFn() func(operation, path string) string {
   701  	createPath := "auth/token/create"
   702  	role := v.getRole()
   703  	if role != "" {
   704  		createPath = fmt.Sprintf("auth/token/create/%s", role)
   705  	}
   706  
   707  	return func(operation, path string) string {
   708  		// Only wrap the token create operation
   709  		if operation != "POST" || path != createPath {
   710  			return ""
   711  		}
   712  
   713  		return vaultTokenCreateTTL
   714  	}
   715  }
   716  
   717  // parseSelfToken looks up the Vault token in Vault and parses its data storing
   718  // it in the client. If the token is not valid for Nomads purposes an error is
   719  // returned.
   720  func (v *vaultClient) parseSelfToken() error {
   721  	// Try looking up the token using the self endpoint
   722  	secret, err := v.lookupSelf()
   723  	if err != nil {
   724  		return err
   725  	}
   726  
   727  	// Read and parse the fields
   728  	var data structs.VaultTokenData
   729  	if err := structs.DecodeVaultSecretData(secret, &data); err != nil {
   730  		return fmt.Errorf("failed to parse Vault token's data block: %v", err)
   731  	}
   732  	v.tokenData = &data
   733  	v.extendExpiration(data.TTL)
   734  
   735  	// The criteria that must be met for the token to be valid are as follows:
   736  	// 1) If token is non-root or is but has a creation ttl
   737  	//   a) The token must be renewable
   738  	//   b) Token must have a non-zero TTL
   739  	// 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens)
   740  	// 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens)
   741  	// 4) If configured to create tokens against a role:
   742  	//   a) Must have read capability for "auth/token/roles/<role_name" (Can just attempt a read)
   743  	//   b) Must have update capability for path "auth/token/create/<role_name>"
   744  	//   c) Role must:
   745  	//     1) Must allow tokens to be renewed
   746  	//     2) Must not have an explicit max TTL
   747  	//     3) Must have non-zero period
   748  	// 5) If not configured against a role, the token must be root
   749  
   750  	var mErr multierror.Error
   751  	role := v.getRole()
   752  	if !data.Root() {
   753  		// All non-root tokens must be renewable
   754  		if !data.Renewable {
   755  			_ = multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root"))
   756  		}
   757  
   758  		// All non-root tokens must have a lease duration
   759  		if data.CreationTTL == 0 {
   760  			_ = multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero"))
   761  		}
   762  
   763  		// The lease duration can not be expired
   764  		if data.TTL == 0 {
   765  			_ = multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   766  		}
   767  
   768  		// There must be a valid role since we aren't root
   769  		if role == "" {
   770  			_ = multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token"))
   771  		}
   772  
   773  	} else if data.CreationTTL != 0 {
   774  		// If the root token has a TTL it must be renewable
   775  		if !data.Renewable {
   776  			_ = multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable"))
   777  		} else if data.TTL == 0 {
   778  			// If the token has a TTL make sure it has not expired
   779  			_ = multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   780  		}
   781  	}
   782  
   783  	// Check we have the correct capabilities
   784  	if err := v.validateCapabilities(role, data.Root()); err != nil {
   785  		_ = multierror.Append(&mErr, err)
   786  	}
   787  
   788  	// If given a role validate it
   789  	if role != "" {
   790  		if err := v.validateRole(role); err != nil {
   791  			_ = multierror.Append(&mErr, err)
   792  		}
   793  	}
   794  
   795  	return mErr.ErrorOrNil()
   796  }
   797  
   798  // lookupSelf is a helper function that looks up latest self lease info.
   799  func (v *vaultClient) lookupSelf() (*vapi.Secret, error) {
   800  	// Get the initial lease duration
   801  	auth := v.client.Auth().Token()
   802  
   803  	secret, err := auth.LookupSelf()
   804  	if err == nil && secret != nil && secret.Data != nil {
   805  		return secret, nil
   806  	}
   807  
   808  	// Try looking up our token directly, even when we get an empty response,
   809  	// in case of an unexpected event - a true failure would occur in this lookup again
   810  	secret, err = auth.Lookup(v.client.Token())
   811  	switch {
   812  	case err != nil:
   813  		return nil, fmt.Errorf("failed to lookup Vault periodic token: %v", err)
   814  	case secret == nil || secret.Data == nil:
   815  		return nil, fmt.Errorf("failed to lookup Vault periodic token: got empty response")
   816  	default:
   817  		return secret, nil
   818  	}
   819  }
   820  
   821  // getRole returns the role name to be used when creating tokens
   822  func (v *vaultClient) getRole() string {
   823  	if v.config.Role != "" {
   824  		return v.config.Role
   825  	}
   826  
   827  	return v.tokenData.Role
   828  }
   829  
   830  // validateCapabilities checks that Nomad's Vault token has the correct
   831  // capabilities.
   832  func (v *vaultClient) validateCapabilities(role string, root bool) error {
   833  	// Check if the token can lookup capabilities.
   834  	var mErr multierror.Error
   835  	_, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability)
   836  	if err != nil {
   837  		// Check if there is a permission denied
   838  		if structs.VaultUnrecoverableError.MatchString(err.Error()) {
   839  			// Since we can't read permissions, we just log a warning that we
   840  			// can't tell if the Vault token will work
   841  			msg := fmt.Sprintf("can not lookup token capabilities. "+
   842  				"As such certain operations may fail in the future. "+
   843  				"Please give Nomad a Vault token with one of the following "+
   844  				"capabilities %q on %q so that the required capabilities can be verified",
   845  				vaultCapabilitiesCapability, vaultCapabilitiesLookupPath)
   846  			v.logger.Warn(msg)
   847  			return nil
   848  		} else {
   849  			_ = multierror.Append(&mErr, err)
   850  		}
   851  	}
   852  
   853  	// verify is a helper function that verifies the token has one of the
   854  	// capabilities on the given path and adds an issue to the error
   855  	verify := func(path string, requiredCaps []string) {
   856  		ok, caps, err := v.hasCapability(path, requiredCaps)
   857  		if err != nil {
   858  			_ = multierror.Append(&mErr, err)
   859  		} else if !ok {
   860  			_ = multierror.Append(&mErr,
   861  				fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps))
   862  		}
   863  	}
   864  
   865  	// Check if we are verifying incoming tokens
   866  	if !v.config.AllowsUnauthenticated() {
   867  		verify(vaultTokenLookupPath, vaultTokenLookupCapability)
   868  	}
   869  
   870  	// Verify we can renew our selves tokens
   871  	verify(vaultTokenRenewPath, vaultTokenRenewCapability)
   872  
   873  	// Verify we can revoke tokens
   874  	verify(vaultTokenRevokePath, vaultTokenRevokeCapability)
   875  
   876  	// If we are using a role verify the capability
   877  	if role != "" {
   878  		// Verify we can read the role
   879  		verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability)
   880  
   881  		// Verify we can create from the role
   882  		verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability)
   883  	}
   884  
   885  	return mErr.ErrorOrNil()
   886  }
   887  
   888  // hasCapability takes a path and returns whether the token has at least one of
   889  // the required capabilities on the given path. It also returns the set of
   890  // capabilities the token does have as well as any error that occurred.
   891  func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) {
   892  	caps, err := v.client.Sys().CapabilitiesSelf(path)
   893  	if err != nil {
   894  		return false, nil, err
   895  	}
   896  	for _, c := range caps {
   897  		for _, r := range required {
   898  			if c == r {
   899  				return true, caps, nil
   900  			}
   901  		}
   902  	}
   903  	return false, caps, nil
   904  }
   905  
   906  // validateRole contacts Vault and checks that the given Vault role is valid for
   907  // the purposes of being used by Nomad
   908  func (v *vaultClient) validateRole(role string) error {
   909  	if role == "" {
   910  		return fmt.Errorf("Invalid empty role name")
   911  	}
   912  
   913  	// Validate the role
   914  	rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role))
   915  	if err != nil {
   916  		return fmt.Errorf("failed to lookup role %q: %v", role, err)
   917  	}
   918  	if rsecret == nil {
   919  		return fmt.Errorf("Role %q does not exist", role)
   920  	}
   921  
   922  	// Read and parse the fields
   923  	var data structs.VaultTokenRoleData
   924  	if err := structs.DecodeVaultSecretData(rsecret, &data); err != nil {
   925  		return fmt.Errorf("failed to parse Vault role's data block: %v", err)
   926  	}
   927  
   928  	// Validate the role is acceptable
   929  	var mErr multierror.Error
   930  	if !data.Renewable {
   931  		_ = multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed"))
   932  	}
   933  
   934  	if data.ExplicitMaxTtl != 0 || data.TokenExplicitMaxTtl != 0 {
   935  		_ = multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic."))
   936  	}
   937  
   938  	if data.Period == 0 && data.TokenPeriod == 0 {
   939  		_ = multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic."))
   940  	}
   941  
   942  	return mErr.ErrorOrNil()
   943  }
   944  
   945  // ConnectionEstablished returns whether a connection to Vault has been
   946  // established and any error that potentially caused it to be false
   947  func (v *vaultClient) ConnectionEstablished() (bool, error) {
   948  	v.l.Lock()
   949  	defer v.l.Unlock()
   950  	return v.connEstablished, v.connEstablishedErr
   951  }
   952  
   953  // Enabled returns whether the client is active
   954  func (v *vaultClient) Enabled() bool {
   955  	v.l.Lock()
   956  	defer v.l.Unlock()
   957  	return v.config.IsEnabled()
   958  }
   959  
   960  // Active returns whether the client is active
   961  func (v *vaultClient) Active() bool {
   962  	return atomic.LoadInt32(&v.active) == 1
   963  }
   964  
   965  // CreateToken takes the allocation and task and returns an appropriate Vault
   966  // token. The call is rate limited and may be canceled with the passed policy.
   967  // When the error is recoverable, it will be of type RecoverableError
   968  func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) {
   969  	if !v.Enabled() {
   970  		return nil, fmt.Errorf("Vault integration disabled")
   971  	}
   972  	if !v.Active() {
   973  		return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true)
   974  	}
   975  	// Check if we have established a connection with Vault
   976  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   977  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   978  	} else if err != nil {
   979  		return nil, err
   980  	}
   981  
   982  	// Track how long the request takes
   983  	defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now())
   984  
   985  	// Retrieve the Vault block for the task
   986  	vaultBlocks := a.Job.Vault()
   987  	if vaultBlocks == nil {
   988  		return nil, fmt.Errorf("Job does not require Vault token")
   989  	}
   990  	tg, ok := vaultBlocks[a.TaskGroup]
   991  	if !ok {
   992  		return nil, fmt.Errorf("Task group does not require Vault token")
   993  	}
   994  	taskVault, ok := tg[task]
   995  	if !ok {
   996  		return nil, fmt.Errorf("Task does not require Vault token")
   997  	}
   998  
   999  	// Set namespace for task
  1000  	namespaceForTask := v.config.Namespace
  1001  	if taskVault.Namespace != "" {
  1002  		namespaceForTask = taskVault.Namespace
  1003  	}
  1004  
  1005  	// Build the creation request
  1006  	req := &vapi.TokenCreateRequest{
  1007  		Policies: taskVault.Policies,
  1008  		Metadata: map[string]string{
  1009  			"AllocationID": a.ID,
  1010  			"JobID":        a.JobID,
  1011  			"TaskGroup":    a.TaskGroup,
  1012  			"Task":         task,
  1013  			"NodeID":       a.NodeID,
  1014  			"Namespace":    namespaceForTask,
  1015  		},
  1016  		TTL:         v.childTTL,
  1017  		DisplayName: fmt.Sprintf("%s-%s", a.ID, task),
  1018  	}
  1019  
  1020  	// Ensure we are under our rate limit
  1021  	if err := v.limiter.Wait(ctx); err != nil {
  1022  		return nil, err
  1023  	}
  1024  
  1025  	// Make the request and switch depending on whether we are using a root
  1026  	// token or a role based token
  1027  	var secret *vapi.Secret
  1028  	var err error
  1029  	role := v.getRole()
  1030  
  1031  	// Fetch client for task
  1032  	taskClient, err := v.entHandler.clientForTask(v, namespaceForTask)
  1033  	if err != nil {
  1034  		return nil, err
  1035  	}
  1036  
  1037  	if v.tokenData.Root() && role == "" {
  1038  		req.Period = v.childTTL
  1039  		secret, err = taskClient.Auth().Token().Create(req)
  1040  	} else {
  1041  		// Make the token using the role
  1042  		secret, err = taskClient.Auth().Token().CreateWithRole(req, role)
  1043  	}
  1044  
  1045  	// Determine whether it is unrecoverable
  1046  	if err != nil {
  1047  		err = fmt.Errorf("failed to create an alloc vault token: %v", err)
  1048  		if structs.VaultUnrecoverableError.MatchString(err.Error()) {
  1049  			return secret, err
  1050  		}
  1051  
  1052  		// The error is recoverable
  1053  		return nil, structs.NewRecoverableError(err, true)
  1054  	}
  1055  
  1056  	// Validate the response
  1057  	var validationErr error
  1058  	if secret == nil {
  1059  		validationErr = fmt.Errorf("Vault returned nil Secret")
  1060  	} else if secret.WrapInfo == nil {
  1061  		validationErr = fmt.Errorf("Vault returned Secret with nil WrapInfo. Secret warnings: %v", secret.Warnings)
  1062  	} else if secret.WrapInfo.WrappedAccessor == "" {
  1063  		validationErr = fmt.Errorf("Vault returned WrapInfo without WrappedAccessor. Secret warnings: %v", secret.Warnings)
  1064  	}
  1065  	if validationErr != nil {
  1066  		v.logger.Warn("failed to CreateToken", "error", validationErr)
  1067  		return nil, structs.NewRecoverableError(validationErr, true)
  1068  	}
  1069  
  1070  	// Got a valid response
  1071  	return secret, nil
  1072  }
  1073  
  1074  // LookupToken takes a Vault token and does a lookup against Vault. The call is
  1075  // rate limited and may be canceled with passed context.
  1076  func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) {
  1077  	if !v.Enabled() {
  1078  		return nil, fmt.Errorf("Vault integration disabled")
  1079  	}
  1080  
  1081  	if !v.Active() {
  1082  		return nil, fmt.Errorf("Vault client not active")
  1083  	}
  1084  
  1085  	// Check if we have established a connection with Vault
  1086  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1087  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
  1088  	} else if err != nil {
  1089  		return nil, err
  1090  	}
  1091  
  1092  	// Track how long the request takes
  1093  	defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now())
  1094  
  1095  	// Ensure we are under our rate limit
  1096  	if err := v.limiter.Wait(ctx); err != nil {
  1097  		return nil, err
  1098  	}
  1099  
  1100  	// Lookup the token
  1101  	return v.auth.Lookup(token)
  1102  }
  1103  
  1104  // RevokeTokens revokes the passed set of accessors. If committed is set, the
  1105  // purge function passed to the client is called. If there is an error purging
  1106  // either because of Vault failures or because of the purge function, the
  1107  // revocation is retried until the tokens TTL.
  1108  func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error {
  1109  	if !v.Enabled() {
  1110  		return nil
  1111  	}
  1112  
  1113  	if !v.Active() {
  1114  		return fmt.Errorf("Vault client not active")
  1115  	}
  1116  
  1117  	// Track how long the request takes
  1118  	defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now())
  1119  
  1120  	// Check if we have established a connection with Vault. If not just add it
  1121  	// to the queue
  1122  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1123  		// Only bother tracking it for later revocation if the accessor was
  1124  		// committed
  1125  		if committed {
  1126  			v.storeForRevocation(accessors)
  1127  		}
  1128  
  1129  		// Track that we are abandoning these accessors.
  1130  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
  1131  		return nil
  1132  	}
  1133  
  1134  	// Attempt to revoke immediately and if it fails, add it to the revoke queue
  1135  	err := v.parallelRevoke(ctx, accessors)
  1136  	if err != nil {
  1137  		// If it is uncommitted, it is a best effort revoke as it will shortly
  1138  		// TTL within the cubbyhole and has not been leaked to any outside
  1139  		// system
  1140  		if !committed {
  1141  			metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
  1142  			return nil
  1143  		}
  1144  
  1145  		v.logger.Warn("failed to revoke tokens. Will reattempt until TTL", "error", err)
  1146  		v.storeForRevocation(accessors)
  1147  		return nil
  1148  	} else if !committed {
  1149  		// Mark that it was revoked but there is nothing to purge so exit
  1150  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors)))
  1151  		return nil
  1152  	}
  1153  
  1154  	if err := v.purgeFn(accessors); err != nil {
  1155  		v.logger.Error("failed to purge Vault accessors", "error", err)
  1156  		v.storeForRevocation(accessors)
  1157  		return nil
  1158  	}
  1159  
  1160  	// Track that it was revoked successfully
  1161  	metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors)))
  1162  
  1163  	return nil
  1164  }
  1165  
  1166  func (v *vaultClient) MarkForRevocation(accessors []*structs.VaultAccessor) error {
  1167  	if !v.Enabled() {
  1168  		return nil
  1169  	}
  1170  
  1171  	if !v.Active() {
  1172  		return fmt.Errorf("Vault client not active")
  1173  	}
  1174  
  1175  	v.storeForRevocation(accessors)
  1176  	return nil
  1177  }
  1178  
  1179  // storeForRevocation stores the passed set of accessors for revocation. It
  1180  // captures their effective TTL by storing their create TTL plus the current
  1181  // time.
  1182  func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) {
  1183  	v.revLock.Lock()
  1184  
  1185  	now := time.Now()
  1186  	for _, a := range accessors {
  1187  		if _, ok := v.revoking[a]; !ok {
  1188  			v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second)
  1189  		}
  1190  	}
  1191  	v.revLock.Unlock()
  1192  }
  1193  
  1194  // parallelRevoke revokes the passed VaultAccessors in parallel.
  1195  func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error {
  1196  	if !v.Enabled() {
  1197  		return fmt.Errorf("Vault integration disabled")
  1198  	}
  1199  
  1200  	if !v.Active() {
  1201  		return fmt.Errorf("Vault client not active")
  1202  	}
  1203  
  1204  	// Check if we have established a connection with Vault
  1205  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1206  		return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
  1207  	} else if err != nil {
  1208  		return err
  1209  	}
  1210  
  1211  	g, pCtx := errgroup.WithContext(ctx)
  1212  
  1213  	// Cap the handlers
  1214  	handlers := len(accessors)
  1215  	if handlers > maxParallelRevokes {
  1216  		handlers = maxParallelRevokes
  1217  	}
  1218  
  1219  	// Revoke the Vault Token Accessors
  1220  	input := make(chan *structs.VaultAccessor, handlers)
  1221  	for i := 0; i < handlers; i++ {
  1222  		g.Go(func() error {
  1223  			for {
  1224  				select {
  1225  				case va, ok := <-input:
  1226  					if !ok {
  1227  						return nil
  1228  					}
  1229  
  1230  					err := v.auth.RevokeAccessor(va.Accessor)
  1231  					if err != nil && !strings.Contains(err.Error(), "invalid accessor") {
  1232  						return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err)
  1233  					}
  1234  				case <-pCtx.Done():
  1235  					return nil
  1236  				}
  1237  			}
  1238  		})
  1239  	}
  1240  
  1241  	// Send the input
  1242  	go func() {
  1243  		defer close(input)
  1244  		for _, va := range accessors {
  1245  			select {
  1246  			case <-pCtx.Done():
  1247  				return
  1248  			case input <- va:
  1249  			}
  1250  		}
  1251  
  1252  	}()
  1253  
  1254  	// Wait for everything to complete
  1255  	return g.Wait()
  1256  }
  1257  
  1258  // maxVaultRevokeBatchSize is the maximum tokens a revokeDaemon should revoke
  1259  // and purge at any given time.
  1260  //
  1261  // Limiting the revocation batch size is beneficial for few reasons:
  1262  //   - A single revocation failure of any entry in batch result into retrying the whole batch;
  1263  //     the larger the batch is the higher likelihood of such failure
  1264  //   - Smaller batch sizes result into more co-operativeness: provides hooks for
  1265  //     reconsidering token TTL and leadership steps down.
  1266  //   - Batches limit the size of the Raft message purging tokens. Due to bugs
  1267  //     pre-0.11.3, expired tokens were not properly purged, so users upgrading from
  1268  //     older versions may have huge numbers (millions) of expired tokens to purge.
  1269  const maxVaultRevokeBatchSize = 1000
  1270  
  1271  // revokeDaemon should be called in a goroutine and is used to periodically
  1272  // revoke Vault accessors that failed the original revocation
  1273  func (v *vaultClient) revokeDaemon() {
  1274  	ticker := time.NewTicker(v.revocationIntv)
  1275  	defer ticker.Stop()
  1276  
  1277  	for {
  1278  		select {
  1279  		case <-v.tomb.Dying():
  1280  			return
  1281  		case now := <-ticker.C:
  1282  			if established, err := v.ConnectionEstablished(); !established || err != nil {
  1283  				continue
  1284  			}
  1285  
  1286  			v.revLock.Lock()
  1287  
  1288  			// Fast path
  1289  			if len(v.revoking) == 0 {
  1290  				v.revLock.Unlock()
  1291  				continue
  1292  			}
  1293  
  1294  			// Build the list of accessors that need to be revoked while pruning any TTL'd checks
  1295  			toRevoke := len(v.revoking)
  1296  			if toRevoke > v.maxRevokeBatchSize {
  1297  				v.logger.Info("batching tokens to be revoked",
  1298  					"to_revoke", toRevoke, "batch_size", v.maxRevokeBatchSize,
  1299  					"batch_interval", v.revocationIntv)
  1300  				toRevoke = v.maxRevokeBatchSize
  1301  			}
  1302  			revoking := make([]*structs.VaultAccessor, 0, toRevoke)
  1303  			ttlExpired := []*structs.VaultAccessor{}
  1304  			for va, ttl := range v.revoking {
  1305  				if now.After(ttl) {
  1306  					ttlExpired = append(ttlExpired, va)
  1307  				} else {
  1308  					revoking = append(revoking, va)
  1309  				}
  1310  
  1311  				// Batches should consider tokens to be revoked
  1312  				// as well as expired tokens to ensure the Raft
  1313  				// message is reasonably sized.
  1314  				if len(revoking)+len(ttlExpired) >= toRevoke {
  1315  					break
  1316  				}
  1317  			}
  1318  
  1319  			if err := v.parallelRevoke(context.Background(), revoking); err != nil {
  1320  				v.logger.Warn("background token revocation errored", "error", err)
  1321  				v.revLock.Unlock()
  1322  				continue
  1323  			}
  1324  
  1325  			// Unlock before a potentially expensive operation
  1326  			v.revLock.Unlock()
  1327  
  1328  			// purge all explicitly revoked as well as ttl expired tokens
  1329  			// and only remove them locally on purge success
  1330  			revoking = append(revoking, ttlExpired...)
  1331  
  1332  			// Call the passed in token revocation function
  1333  			if err := v.purgeFn(revoking); err != nil {
  1334  				// Can continue since revocation is idempotent
  1335  				v.logger.Error("token revocation errored", "error", err)
  1336  				continue
  1337  			}
  1338  
  1339  			// Track that tokens were revoked successfully
  1340  			metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking)))
  1341  
  1342  			// Can delete from the tracked list now that we have purged
  1343  			v.revLock.Lock()
  1344  			for _, va := range revoking {
  1345  				delete(v.revoking, va)
  1346  			}
  1347  			v.revLock.Unlock()
  1348  
  1349  		}
  1350  	}
  1351  }
  1352  
  1353  // purgeVaultAccessors creates a Raft transaction to remove the passed Vault
  1354  // Accessors
  1355  func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error {
  1356  	// Commit this update via Raft
  1357  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1358  	_, _, err := s.raftApply(structs.VaultAccessorDeregisterRequestType, req)
  1359  	return err
  1360  }
  1361  
  1362  // wrapNilError is a helper that returns a wrapped function that returns a nil
  1363  // error
  1364  func wrapNilError(f func()) func() error {
  1365  	return func() error {
  1366  		f()
  1367  		return nil
  1368  	}
  1369  }
  1370  
  1371  // setLimit is used to update the rate limit
  1372  func (v *vaultClient) setLimit(l rate.Limit) {
  1373  	v.l.Lock()
  1374  	defer v.l.Unlock()
  1375  	v.limiter = rate.NewLimiter(l, int(l))
  1376  }
  1377  
  1378  func (v *vaultClient) Stats() map[string]string {
  1379  	stat := v.stats()
  1380  
  1381  	expireTimeStr := ""
  1382  	if !stat.TokenExpiry.IsZero() {
  1383  		expireTimeStr = stat.TokenExpiry.Format(time.RFC3339)
  1384  	}
  1385  
  1386  	lastRenewTimeStr := ""
  1387  	if !stat.LastRenewalTime.IsZero() {
  1388  		lastRenewTimeStr = stat.LastRenewalTime.Format(time.RFC3339)
  1389  	}
  1390  
  1391  	nextRenewTimeStr := ""
  1392  	if !stat.NextRenewalTime.IsZero() {
  1393  		nextRenewTimeStr = stat.NextRenewalTime.Format(time.RFC3339)
  1394  	}
  1395  
  1396  	return map[string]string{
  1397  		"tracked_for_revoked":     strconv.Itoa(stat.TrackedForRevoke),
  1398  		"token_ttl":               stat.TokenTTL.Round(time.Second).String(),
  1399  		"token_expire_time":       expireTimeStr,
  1400  		"token_last_renewal_time": lastRenewTimeStr,
  1401  		"token_next_renewal_time": nextRenewTimeStr,
  1402  	}
  1403  }
  1404  
  1405  func (v *vaultClient) stats() *VaultStats {
  1406  	// Allocate a new stats struct
  1407  	stats := new(VaultStats)
  1408  
  1409  	v.revLock.Lock()
  1410  	stats.TrackedForRevoke = len(v.revoking)
  1411  	v.revLock.Unlock()
  1412  
  1413  	v.currentExpirationLock.Lock()
  1414  	stats.TokenExpiry = v.currentExpiration
  1415  	v.currentExpirationLock.Unlock()
  1416  
  1417  	v.renewalTimeLock.Lock()
  1418  	stats.NextRenewalTime = v.nextRenewalTime
  1419  	stats.LastRenewalTime = v.lastRenewalTime
  1420  	v.renewalTimeLock.Unlock()
  1421  
  1422  	if !stats.TokenExpiry.IsZero() {
  1423  		stats.TokenTTL = time.Until(stats.TokenExpiry)
  1424  	}
  1425  
  1426  	if !stats.LastRenewalTime.IsZero() {
  1427  		stats.TimeFromLastRenewal = time.Since(stats.LastRenewalTime)
  1428  	}
  1429  	if !stats.NextRenewalTime.IsZero() {
  1430  		stats.TimeToNextRenewal = time.Until(stats.NextRenewalTime)
  1431  	}
  1432  
  1433  	return stats
  1434  }
  1435  
  1436  // EmitStats is used to export metrics about the blocked eval tracker while enabled
  1437  func (v *vaultClient) EmitStats(period time.Duration, stopCh <-chan struct{}) {
  1438  	timer, stop := helper.NewSafeTimer(period)
  1439  	defer stop()
  1440  
  1441  	for {
  1442  		timer.Reset(period)
  1443  
  1444  		select {
  1445  		case <-timer.C:
  1446  			stats := v.stats()
  1447  			metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke))
  1448  			metrics.SetGauge([]string{"nomad", "vault", "token_ttl"}, float32(stats.TokenTTL/time.Millisecond))
  1449  			metrics.SetGauge([]string{"nomad", "vault", "token_last_renewal"}, float32(stats.TimeFromLastRenewal/time.Millisecond))
  1450  			metrics.SetGauge([]string{"nomad", "vault", "token_next_renewal"}, float32(stats.TimeToNextRenewal/time.Millisecond))
  1451  
  1452  		case <-stopCh:
  1453  			return
  1454  		}
  1455  	}
  1456  }
  1457  
  1458  // extendExpiration sets the current auth token expiration record to ttLSeconds seconds from now
  1459  func (v *vaultClient) extendExpiration(ttlSeconds int) {
  1460  	v.currentExpirationLock.Lock()
  1461  	v.currentExpiration = time.Now().Add(time.Duration(ttlSeconds) * time.Second)
  1462  	v.currentExpirationLock.Unlock()
  1463  }
  1464  
  1465  // VaultVaultNoopDelegate returns the default vault api auth token handler
  1466  type VaultNoopDelegate struct{}
  1467  
  1468  func (e *VaultNoopDelegate) clientForTask(v *vaultClient, namespace string) (*vapi.Client, error) {
  1469  	return v.client, nil
  1470  }