github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/vault.go

github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/vault.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"math/rand"
     8  	"strconv"
     9  	"strings"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	tomb "gopkg.in/tomb.v2"
    15  
    16  	metrics "github.com/armon/go-metrics"
    17  	log "github.com/hashicorp/go-hclog"
    18  	multierror "github.com/hashicorp/go-multierror"
    19  	"github.com/hashicorp/nomad/nomad/structs"
    20  	"github.com/hashicorp/nomad/nomad/structs/config"
    21  	vapi "github.com/hashicorp/vault/api"
    22  	"github.com/mitchellh/mapstructure"
    23  
    24  	"golang.org/x/sync/errgroup"
    25  	"golang.org/x/time/rate"
    26  )
    27  
    28  const (
    29  	// vaultTokenCreateTTL is the duration the wrapped token for the client is
    30  	// valid for. The units are in seconds.
    31  	vaultTokenCreateTTL = "60s"
    32  
    33  	// minimumTokenTTL is the minimum Token TTL allowed for child tokens.
    34  	minimumTokenTTL = 5 * time.Minute
    35  
    36  	// defaultTokenTTL is the default Token TTL used when the passed token is a
    37  	// root token such that child tokens aren't being created against a role
    38  	// that has defined a TTL
    39  	defaultTokenTTL = "72h"
    40  
    41  	// requestRateLimit is the maximum number of requests per second Nomad will
    42  	// make against Vault
    43  	requestRateLimit rate.Limit = 500.0
    44  
    45  	// maxParallelRevokes is the maximum number of parallel Vault
    46  	// token revocation requests
    47  	maxParallelRevokes = 64
    48  
    49  	// vaultRevocationIntv is the interval at which Vault tokens that failed
    50  	// initial revocation are retried
    51  	vaultRevocationIntv = 5 * time.Minute
    52  
    53  	// vaultCapabilitiesLookupPath is the path to lookup the capabilities of
    54  	// ones token.
    55  	vaultCapabilitiesLookupPath = "sys/capabilities-self"
    56  
    57  	// vaultTokenRenewPath is the path used to renew our token
    58  	vaultTokenRenewPath = "auth/token/renew-self"
    59  
    60  	// vaultTokenLookupPath is the path used to lookup a token
    61  	vaultTokenLookupPath = "auth/token/lookup"
    62  
    63  	// vaultTokenRevokePath is the path used to revoke a token
    64  	vaultTokenRevokePath = "auth/token/revoke-accessor"
    65  
    66  	// vaultRoleLookupPath is the path to lookup a role
    67  	vaultRoleLookupPath = "auth/token/roles/%s"
    68  
    69  	// vaultRoleCreatePath is the path to create a token from a role
    70  	vaultTokenRoleCreatePath = "auth/token/create/%s"
    71  )
    72  
    73  var (
    74  	// vaultCapabilitiesCapability is the expected capability of Nomad's Vault
    75  	// token on the the path. The token must have at least one of the
    76  	// capabilities.
    77  	vaultCapabilitiesCapability = []string{"update", "root"}
    78  
    79  	// vaultTokenRenewCapability is the expected capability Nomad's
    80  	// Vault token should have on the path. The token must have at least one of
    81  	// the capabilities.
    82  	vaultTokenRenewCapability = []string{"update", "root"}
    83  
    84  	// vaultTokenLookupCapability is the expected capability Nomad's
    85  	// Vault token should have on the path. The token must have at least one of
    86  	// the capabilities.
    87  	vaultTokenLookupCapability = []string{"update", "root"}
    88  
    89  	// vaultTokenRevokeCapability is the expected capability Nomad's
    90  	// Vault token should have on the path. The token must have at least one of
    91  	// the capabilities.
    92  	vaultTokenRevokeCapability = []string{"update", "root"}
    93  
    94  	// vaultRoleLookupCapability is the the expected capability Nomad's Vault
    95  	// token should have on the path. The token must have at least one of the
    96  	// capabilities.
    97  	vaultRoleLookupCapability = []string{"read", "root"}
    98  
    99  	// vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault
   100  	// token should have on the path. The token must have at least one of the
   101  	// capabilities.
   102  	vaultTokenRoleCreateCapability = []string{"update", "root"}
   103  )
   104  
   105  // VaultClient is the Servers interface for interfacing with Vault
   106  type VaultClient interface {
   107  	// SetActive activates or de-activates the Vault client. When active, token
   108  	// creation/lookup/revocation operation are allowed.
   109  	SetActive(active bool)
   110  
   111  	// SetConfig updates the config used by the Vault client
   112  	SetConfig(config *config.VaultConfig) error
   113  
   114  	// CreateToken takes an allocation and task and returns an appropriate Vault
   115  	// Secret
   116  	CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error)
   117  
   118  	// LookupToken takes a token string and returns its capabilities.
   119  	LookupToken(ctx context.Context, token string) (*vapi.Secret, error)
   120  
   121  	// RevokeTokens takes a set of tokens accessor and revokes the tokens
   122  	RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error
   123  
   124  	// MarkForRevocation revokes the tokens in background
   125  	MarkForRevocation(accessors []*structs.VaultAccessor) error
   126  
   127  	// Stop is used to stop token renewal
   128  	Stop()
   129  
   130  	// Running returns whether the Vault client is running
   131  	Running() bool
   132  
   133  	// Stats returns the Vault clients statistics
   134  	Stats() map[string]string
   135  
   136  	// EmitStats emits that clients statistics at the given period until stopCh
   137  	// is called.
   138  	EmitStats(period time.Duration, stopCh <-chan struct{})
   139  }
   140  
   141  // VaultStats returns all the stats about Vault tokens created and managed by
   142  // Nomad.
   143  type VaultStats struct {
   144  	// TrackedForRevoke is the count of tokens that are being tracked to be
   145  	// revoked since they could not be immediately revoked.
   146  	TrackedForRevoke int
   147  
   148  	// TokenTTL is the time-to-live duration for the current token
   149  	TokenTTL time.Duration
   150  
   151  	// TokenExpiry is the recorded expiry time of the current token
   152  	TokenExpiry time.Time
   153  }
   154  
   155  // PurgeVaultAccessorFn is called to remove VaultAccessors from the system. If
   156  // the function returns an error, the token will still be tracked and revocation
   157  // will retry till there is a success
   158  type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error
   159  
   160  // tokenData holds the relevant information about the Vault token passed to the
   161  // client.
   162  type tokenData struct {
   163  	CreationTTL   int      `mapstructure:"creation_ttl"`
   164  	TTL           int      `mapstructure:"ttl"`
   165  	Renewable     bool     `mapstructure:"renewable"`
   166  	Policies      []string `mapstructure:"policies"`
   167  	Role          string   `mapstructure:"role"`
   168  	NamespacePath string   `mapstructure:"namespace_path"`
   169  	Root          bool
   170  }
   171  
   172  // vaultClient is the Servers implementation of the VaultClient interface. The
   173  // client renews the PeriodicToken given in the Vault configuration and provides
   174  // the Server with the ability to create child tokens and lookup the permissions
   175  // of tokens.
   176  type vaultClient struct {
   177  	// limiter is used to rate limit requests to Vault
   178  	limiter *rate.Limiter
   179  
   180  	// client is the Vault API client used for Namespace-relative integrations
   181  	// with the Vault API (anything except `/v1/sys`). If this server is not
   182  	// configured to reference a Vault namespace, this will point to the same
   183  	// client as clientSys
   184  	client *vapi.Client
   185  
   186  	// clientSys is the Vault API client used for non-Namespace-relative integrations
   187  	// with the Vault API (anything involving `/v1/sys`). This client is never configured
   188  	// with a Vault namespace, because these endpoints may return errors if a namespace
   189  	// header is provided
   190  	clientSys *vapi.Client
   191  
   192  	// auth is the Vault token auth API client
   193  	auth *vapi.TokenAuth
   194  
   195  	// config is the user passed Vault config
   196  	config *config.VaultConfig
   197  
   198  	// connEstablished marks whether we have an established connection to Vault.
   199  	connEstablished bool
   200  
   201  	// connEstablishedErr marks an error that can occur when establishing a
   202  	// connection
   203  	connEstablishedErr error
   204  
   205  	// token is the raw token used by the client
   206  	token string
   207  
   208  	// tokenData is the data of the passed Vault token
   209  	tokenData *tokenData
   210  
   211  	// revoking tracks the VaultAccessors that must be revoked
   212  	revoking map[*structs.VaultAccessor]time.Time
   213  	purgeFn  PurgeVaultAccessorFn
   214  	revLock  sync.Mutex
   215  
   216  	// active indicates whether the vaultClient is active. It should be
   217  	// accessed using a helper and updated atomically
   218  	active int32
   219  
   220  	// running indicates whether the vault client is started.
   221  	running bool
   222  
   223  	// renewLoopActive indicates whether the renewal goroutine is running
   224  	// It should be accessed and updated atomically
   225  	// used for testing purposes only
   226  	renewLoopActive int32
   227  
   228  	// childTTL is the TTL for child tokens.
   229  	childTTL string
   230  
   231  	// currentExpiration is the time the current token lease expires
   232  	currentExpiration     time.Time
   233  	currentExpirationLock sync.Mutex
   234  
   235  	tomb   *tomb.Tomb
   236  	logger log.Logger
   237  
   238  	// l is used to lock the configuration aspects of the client such that
   239  	// multiple callers can't cause conflicting config updates
   240  	l sync.Mutex
   241  
   242  	// setConfigLock serializes access to the SetConfig method
   243  	setConfigLock sync.Mutex
   244  
   245  	// consts as struct fields for overriding in tests
   246  	maxRevokeBatchSize int
   247  	revocationIntv     time.Duration
   248  
   249  	entHandler taskClientHandler
   250  }
   251  
   252  type taskClientHandler interface {
   253  	clientForTask(v *vaultClient, namespace string) (*vapi.Client, error)
   254  }
   255  
   256  // NewVaultClient returns a Vault client from the given config. If the client
   257  // couldn't be made an error is returned.
   258  func NewVaultClient(c *config.VaultConfig, logger log.Logger, purgeFn PurgeVaultAccessorFn, delegate taskClientHandler) (*vaultClient, error) {
   259  	if c == nil {
   260  		return nil, fmt.Errorf("must pass valid VaultConfig")
   261  	}
   262  
   263  	if logger == nil {
   264  		return nil, fmt.Errorf("must pass valid logger")
   265  	}
   266  	if purgeFn == nil {
   267  		purgeFn = func(accessors []*structs.VaultAccessor) error { return nil }
   268  	}
   269  	if delegate == nil {
   270  		delegate = &VaultNoopDelegate{}
   271  	}
   272  
   273  	v := &vaultClient{
   274  		config:             c,
   275  		logger:             logger.Named("vault"),
   276  		limiter:            rate.NewLimiter(requestRateLimit, int(requestRateLimit)),
   277  		revoking:           make(map[*structs.VaultAccessor]time.Time),
   278  		purgeFn:            purgeFn,
   279  		tomb:               &tomb.Tomb{},
   280  		maxRevokeBatchSize: maxVaultRevokeBatchSize,
   281  		revocationIntv:     vaultRevocationIntv,
   282  		entHandler:         delegate,
   283  	}
   284  
   285  	if v.config.IsEnabled() {
   286  		if err := v.buildClient(); err != nil {
   287  			return nil, err
   288  		}
   289  
   290  		// Launch the required goroutines
   291  		v.tomb.Go(wrapNilError(v.establishConnection))
   292  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   293  
   294  		v.running = true
   295  	}
   296  
   297  	return v, nil
   298  }
   299  
   300  func (v *vaultClient) Stop() {
   301  	v.l.Lock()
   302  	running := v.running
   303  	v.running = false
   304  	v.l.Unlock()
   305  
   306  	if running {
   307  		v.tomb.Kill(nil)
   308  		v.tomb.Wait()
   309  		v.flush()
   310  	}
   311  }
   312  
   313  func (v *vaultClient) Running() bool {
   314  	v.l.Lock()
   315  	defer v.l.Unlock()
   316  	return v.running
   317  }
   318  
   319  // SetActive activates or de-activates the Vault client. When active, token
   320  // creation/lookup/revocation operation are allowed. All queued revocations are
   321  // cancelled if set un-active as it is assumed another instances is taking over
   322  func (v *vaultClient) SetActive(active bool) {
   323  	if active {
   324  		atomic.StoreInt32(&v.active, 1)
   325  	} else {
   326  		atomic.StoreInt32(&v.active, 0)
   327  	}
   328  
   329  	// Clear out the revoking tokens
   330  	v.revLock.Lock()
   331  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   332  	v.revLock.Unlock()
   333  }
   334  
   335  // flush is used to reset the state of the vault client
   336  func (v *vaultClient) flush() {
   337  	v.l.Lock()
   338  	defer v.l.Unlock()
   339  	v.revLock.Lock()
   340  	defer v.revLock.Unlock()
   341  
   342  	v.client = nil
   343  	v.clientSys = nil
   344  	v.auth = nil
   345  	v.connEstablished = false
   346  	v.connEstablishedErr = nil
   347  	v.token = ""
   348  	v.tokenData = nil
   349  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   350  	v.childTTL = ""
   351  	v.tomb = &tomb.Tomb{}
   352  }
   353  
   354  // SetConfig is used to update the Vault config being used. A temporary outage
   355  // may occur after calling as it re-establishes a connection to Vault
   356  func (v *vaultClient) SetConfig(config *config.VaultConfig) error {
   357  	if config == nil {
   358  		return fmt.Errorf("must pass valid VaultConfig")
   359  	}
   360  	v.setConfigLock.Lock()
   361  	defer v.setConfigLock.Unlock()
   362  
   363  	v.l.Lock()
   364  	defer v.l.Unlock()
   365  
   366  	// If reloading the same config, no-op
   367  	if v.config.IsEqual(config) {
   368  		return nil
   369  	}
   370  
   371  	// Kill any background routines
   372  	if v.running {
   373  		// Kill any background routine
   374  		v.tomb.Kill(nil)
   375  
   376  		// Locking around tomb.Wait can deadlock with
   377  		// establishConnection exiting, so we must unlock here.
   378  		v.l.Unlock()
   379  		v.tomb.Wait()
   380  		v.l.Lock()
   381  
   382  		// Stop accepting any new requests
   383  		v.connEstablished = false
   384  		v.tomb = &tomb.Tomb{}
   385  		v.running = false
   386  	}
   387  
   388  	// Store the new config
   389  	v.config = config
   390  
   391  	// Check if we should relaunch
   392  	if v.config.IsEnabled() {
   393  		// Rebuild the client
   394  		if err := v.buildClient(); err != nil {
   395  			return err
   396  		}
   397  
   398  		// Launch the required goroutines
   399  		v.tomb.Go(wrapNilError(v.establishConnection))
   400  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   401  		v.running = true
   402  	}
   403  
   404  	return nil
   405  }
   406  
   407  // buildClient is used to build a Vault client based on the stored Vault config
   408  func (v *vaultClient) buildClient() error {
   409  	// Validate we have the required fields.
   410  	if v.config.Token == "" {
   411  		return errors.New("Vault token must be set")
   412  	} else if v.config.Addr == "" {
   413  		return errors.New("Vault address must be set")
   414  	}
   415  
   416  	// Parse the TTL if it is set
   417  	if v.config.TaskTokenTTL != "" {
   418  		d, err := time.ParseDuration(v.config.TaskTokenTTL)
   419  		if err != nil {
   420  			return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err)
   421  		}
   422  
   423  		if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() {
   424  			return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL)
   425  		}
   426  
   427  		v.childTTL = v.config.TaskTokenTTL
   428  	} else {
   429  		// Default the TaskTokenTTL
   430  		v.childTTL = defaultTokenTTL
   431  	}
   432  
   433  	// Get the Vault API configuration
   434  	apiConf, err := v.config.ApiConfig()
   435  	if err != nil {
   436  		return fmt.Errorf("Failed to create Vault API config: %v", err)
   437  	}
   438  
   439  	// Create the Vault API client
   440  	client, err := vapi.NewClient(apiConf)
   441  	if err != nil {
   442  		v.logger.Error("failed to create Vault client and not retrying", "error", err)
   443  		return err
   444  	}
   445  
   446  	// Store the client, create/assign the /sys client
   447  	v.client = client
   448  	if v.config.Namespace != "" {
   449  		v.logger.Debug("configuring Vault namespace", "namespace", v.config.Namespace)
   450  		v.clientSys, err = vapi.NewClient(apiConf)
   451  		if err != nil {
   452  			v.logger.Error("failed to create Vault sys client and not retrying", "error", err)
   453  			return err
   454  		}
   455  		client.SetNamespace(v.config.Namespace)
   456  	} else {
   457  		v.clientSys = client
   458  	}
   459  
   460  	// Set the token
   461  	v.token = v.config.Token
   462  	client.SetToken(v.token)
   463  	v.auth = client.Auth().Token()
   464  
   465  	return nil
   466  }
   467  
   468  // establishConnection is used to make first contact with Vault. This should be
   469  // called in a go-routine since the connection is retried until the Vault Client
   470  // is stopped or the connection is successfully made at which point the renew
   471  // loop is started.
   472  func (v *vaultClient) establishConnection() {
   473  	// Create the retry timer and set initial duration to zero so it fires
   474  	// immediately
   475  	retryTimer := time.NewTimer(0)
   476  	initStatus := false
   477  OUTER:
   478  	for {
   479  		select {
   480  		case <-v.tomb.Dying():
   481  			return
   482  		case <-retryTimer.C:
   483  			// Retry validating the token till success
   484  			if err := v.parseSelfToken(); err != nil {
   485  				// if parsing token fails, try to distinguish legitimate token error from transient Vault initialization/connection issue
   486  				if !initStatus {
   487  					if _, err := v.clientSys.Sys().Health(); err != nil {
   488  						v.logger.Warn("failed to contact Vault API", "retry", v.config.ConnectionRetryIntv, "error", err)
   489  						retryTimer.Reset(v.config.ConnectionRetryIntv)
   490  						continue OUTER
   491  					}
   492  					initStatus = true
   493  				}
   494  
   495  				v.logger.Error("failed to validate self token/role", "retry", v.config.ConnectionRetryIntv, "error", err)
   496  				retryTimer.Reset(v.config.ConnectionRetryIntv)
   497  				v.l.Lock()
   498  				v.connEstablished = true
   499  				v.connEstablishedErr = fmt.Errorf("failed to establish connection to Vault: %v", err)
   500  				v.l.Unlock()
   501  				continue OUTER
   502  			}
   503  
   504  			break OUTER
   505  		}
   506  	}
   507  
   508  	// Set the wrapping function such that token creation is wrapped now
   509  	// that we know our role
   510  	v.client.SetWrappingLookupFunc(v.getWrappingFn())
   511  
   512  	// If we are given a non-root token, start renewing it
   513  	if v.tokenData.Root && v.tokenData.CreationTTL == 0 {
   514  		v.logger.Debug("not renewing token as it is root")
   515  	} else {
   516  		v.logger.Debug("starting renewal loop", "creation_ttl", time.Duration(v.tokenData.CreationTTL)*time.Second)
   517  		v.tomb.Go(wrapNilError(v.renewalLoop))
   518  	}
   519  
   520  	v.l.Lock()
   521  	v.connEstablished = true
   522  	v.connEstablishedErr = nil
   523  	v.l.Unlock()
   524  }
   525  
   526  func (v *vaultClient) isRenewLoopActive() bool {
   527  	return atomic.LoadInt32(&v.renewLoopActive) == 1
   528  }
   529  
   530  // renewalLoop runs the renew loop. This should only be called if we are given a
   531  // non-root token.
   532  func (v *vaultClient) renewalLoop() {
   533  	atomic.StoreInt32(&v.renewLoopActive, 1)
   534  	defer atomic.StoreInt32(&v.renewLoopActive, 0)
   535  
   536  	// Create the renewal timer and set initial duration to zero so it fires
   537  	// immediately
   538  	authRenewTimer := time.NewTimer(0)
   539  
   540  	// Backoff is to reduce the rate we try to renew with Vault under error
   541  	// situations
   542  	backoff := 0.0
   543  
   544  	for {
   545  		select {
   546  		case <-v.tomb.Dying():
   547  			return
   548  		case <-authRenewTimer.C:
   549  			// Renew the token and determine the new expiration
   550  			recoverable, err := v.renew()
   551  			v.currentExpirationLock.Lock()
   552  			currentExpiration := v.currentExpiration
   553  			v.currentExpirationLock.Unlock()
   554  
   555  			// Successfully renewed
   556  			if err == nil {
   557  				// Attempt to renew the token at half the expiration time
   558  				durationUntilRenew := time.Until(currentExpiration) / 2
   559  
   560  				v.logger.Info("successfully renewed token", "next_renewal", durationUntilRenew)
   561  				authRenewTimer.Reset(durationUntilRenew)
   562  
   563  				// Reset any backoff
   564  				backoff = 0
   565  				break
   566  			}
   567  
   568  			metrics.IncrCounter([]string{"nomad", "vault", "renew_failed"}, 1)
   569  			v.logger.Warn("got error or bad auth, so backing off", "error", err, "recoverable", recoverable)
   570  
   571  			if !recoverable {
   572  				return
   573  			}
   574  
   575  			backoff = nextBackoff(backoff, currentExpiration)
   576  			if backoff < 0 {
   577  				// We have failed to renew the token past its expiration. Stop
   578  				// renewing with Vault.
   579  				v.logger.Error("failed to renew Vault token before lease expiration. Shutting down Vault client",
   580  					"error", err)
   581  				v.l.Lock()
   582  				v.connEstablished = false
   583  				v.connEstablishedErr = err
   584  				v.l.Unlock()
   585  				return
   586  			}
   587  
   588  			durationUntilRetry := time.Duration(backoff) * time.Second
   589  			v.logger.Info("backing off renewal", "retry", durationUntilRetry)
   590  
   591  			authRenewTimer.Reset(durationUntilRetry)
   592  		}
   593  	}
   594  }
   595  
   596  // nextBackoff returns the delay for the next auto renew interval, in seconds.
   597  // Returns negative value if past expiration
   598  //
   599  // It should increase the amount of backoff each time, with the following rules:
   600  //
   601  // * If token expired already despite earlier renewal attempts,
   602  //   back off for 1 minute + jitter
   603  // * If we have an existing authentication that is going to expire,
   604  // never back off more than half of the amount of time remaining
   605  // until expiration (with 5s floor)
   606  // * Never back off more than 30 seconds multiplied by a random
   607  // value between 1 and 2
   608  // * Use randomness so that many clients won't keep hitting Vault
   609  // at the same time
   610  func nextBackoff(backoff float64, expiry time.Time) float64 {
   611  	maxBackoff := time.Until(expiry) / 2
   612  
   613  	if maxBackoff < 0 {
   614  		// expiry passed
   615  		return 60 * (1.0 + rand.Float64())
   616  	}
   617  
   618  	switch {
   619  	case backoff >= 24:
   620  		backoff = 30
   621  	default:
   622  		backoff = backoff * 1.25
   623  	}
   624  
   625  	// Add randomness
   626  	backoff = backoff * (1.0 + rand.Float64())
   627  
   628  	if backoff > maxBackoff.Seconds() {
   629  		backoff = maxBackoff.Seconds()
   630  	}
   631  
   632  	if backoff < 5 {
   633  		backoff = 5
   634  	}
   635  
   636  	return backoff
   637  }
   638  
   639  // renew attempts to renew our Vault token. If the renewal fails, an error is
   640  // returned.  The boolean indicates whether it's safe to attempt to renew again.
   641  // This method updates the currentExpiration time
   642  func (v *vaultClient) renew() (bool, error) {
   643  	// Track how long the request takes
   644  	defer metrics.MeasureSince([]string{"nomad", "vault", "renew"}, time.Now())
   645  
   646  	// Attempt to renew the token
   647  	secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL)
   648  	if err != nil {
   649  		// Check if there is a permission denied
   650  		recoverable := !structs.VaultUnrecoverableError.MatchString(err.Error())
   651  		return recoverable, fmt.Errorf("failed to renew the vault token: %v", err)
   652  	}
   653  
   654  	if secret == nil {
   655  		// It's possible for RenewSelf to return (nil, nil) if the
   656  		// response body from Vault is empty.
   657  		return true, fmt.Errorf("renewal failed: empty response from vault")
   658  	}
   659  
   660  	// these treated as transient errors, where can keep renewing
   661  	auth := secret.Auth
   662  	if auth == nil {
   663  		return true, fmt.Errorf("renewal successful but not auth information returned")
   664  	} else if auth.LeaseDuration == 0 {
   665  		return true, fmt.Errorf("renewal successful but no lease duration returned")
   666  	}
   667  
   668  	v.extendExpiration(auth.LeaseDuration)
   669  
   670  	v.logger.Debug("successfully renewed server token")
   671  	return true, nil
   672  }
   673  
   674  // getWrappingFn returns an appropriate wrapping function for Nomad Servers
   675  func (v *vaultClient) getWrappingFn() func(operation, path string) string {
   676  	createPath := "auth/token/create"
   677  	role := v.getRole()
   678  	if role != "" {
   679  		createPath = fmt.Sprintf("auth/token/create/%s", role)
   680  	}
   681  
   682  	return func(operation, path string) string {
   683  		// Only wrap the token create operation
   684  		if operation != "POST" || path != createPath {
   685  			return ""
   686  		}
   687  
   688  		return vaultTokenCreateTTL
   689  	}
   690  }
   691  
   692  // parseSelfToken looks up the Vault token in Vault and parses its data storing
   693  // it in the client. If the token is not valid for Nomads purposes an error is
   694  // returned.
   695  func (v *vaultClient) parseSelfToken() error {
   696  	// Try looking up the token using the self endpoint
   697  	secret, err := v.lookupSelf()
   698  	if err != nil {
   699  		return err
   700  	}
   701  
   702  	// Read and parse the fields
   703  	var data tokenData
   704  	if err := mapstructure.WeakDecode(secret.Data, &data); err != nil {
   705  		return fmt.Errorf("failed to parse Vault token's data block: %v", err)
   706  	}
   707  	root := false
   708  	for _, p := range data.Policies {
   709  		if p == "root" {
   710  			root = true
   711  			break
   712  		}
   713  	}
   714  	data.Root = root
   715  	v.tokenData = &data
   716  	v.extendExpiration(data.TTL)
   717  
   718  	// The criteria that must be met for the token to be valid are as follows:
   719  	// 1) If token is non-root or is but has a creation ttl
   720  	//   a) The token must be renewable
   721  	//   b) Token must have a non-zero TTL
   722  	// 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens)
   723  	// 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens)
   724  	// 4) If configured to create tokens against a role:
   725  	//   a) Must have read capability for "auth/token/roles/<role_name" (Can just attempt a read)
   726  	//   b) Must have update capability for path "auth/token/create/<role_name>"
   727  	//   c) Role must:
   728  	//     1) Must allow tokens to be renewed
   729  	//     2) Must not have an explicit max TTL
   730  	//     3) Must have non-zero period
   731  	// 5) If not configured against a role, the token must be root
   732  
   733  	var mErr multierror.Error
   734  	role := v.getRole()
   735  	if !data.Root {
   736  		// All non-root tokens must be renewable
   737  		if !data.Renewable {
   738  			_ = multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root"))
   739  		}
   740  
   741  		// All non-root tokens must have a lease duration
   742  		if data.CreationTTL == 0 {
   743  			_ = multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero"))
   744  		}
   745  
   746  		// The lease duration can not be expired
   747  		if data.TTL == 0 {
   748  			_ = multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   749  		}
   750  
   751  		// There must be a valid role since we aren't root
   752  		if role == "" {
   753  			_ = multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token"))
   754  		}
   755  
   756  	} else if data.CreationTTL != 0 {
   757  		// If the root token has a TTL it must be renewable
   758  		if !data.Renewable {
   759  			_ = multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable"))
   760  		} else if data.TTL == 0 {
   761  			// If the token has a TTL make sure it has not expired
   762  			_ = multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   763  		}
   764  	}
   765  
   766  	// Check we have the correct capabilities
   767  	if err := v.validateCapabilities(role, data.Root); err != nil {
   768  		_ = multierror.Append(&mErr, err)
   769  	}
   770  
   771  	// If given a role validate it
   772  	if role != "" {
   773  		if err := v.validateRole(role); err != nil {
   774  			_ = multierror.Append(&mErr, err)
   775  		}
   776  	}
   777  
   778  	return mErr.ErrorOrNil()
   779  }
   780  
   781  // lookupSelf is a helper function that looks up latest self lease info.
   782  func (v *vaultClient) lookupSelf() (*vapi.Secret, error) {
   783  	// Get the initial lease duration
   784  	auth := v.client.Auth().Token()
   785  
   786  	secret, err := auth.LookupSelf()
   787  	if err == nil && secret != nil && secret.Data != nil {
   788  		return secret, nil
   789  	}
   790  
   791  	// Try looking up our token directly, even when we get an empty response,
   792  	// in case of an unexpected event - a true failure would occur in this lookup again
   793  	secret, err = auth.Lookup(v.client.Token())
   794  	switch {
   795  	case err != nil:
   796  		return nil, fmt.Errorf("failed to lookup Vault periodic token: %v", err)
   797  	case secret == nil || secret.Data == nil:
   798  		return nil, fmt.Errorf("failed to lookup Vault periodic token: got empty response")
   799  	default:
   800  		return secret, nil
   801  	}
   802  }
   803  
   804  // getRole returns the role name to be used when creating tokens
   805  func (v *vaultClient) getRole() string {
   806  	if v.config.Role != "" {
   807  		return v.config.Role
   808  	}
   809  
   810  	return v.tokenData.Role
   811  }
   812  
   813  // validateCapabilities checks that Nomad's Vault token has the correct
   814  // capabilities.
   815  func (v *vaultClient) validateCapabilities(role string, root bool) error {
   816  	// Check if the token can lookup capabilities.
   817  	var mErr multierror.Error
   818  	_, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability)
   819  	if err != nil {
   820  		// Check if there is a permission denied
   821  		if structs.VaultUnrecoverableError.MatchString(err.Error()) {
   822  			// Since we can't read permissions, we just log a warning that we
   823  			// can't tell if the Vault token will work
   824  			msg := fmt.Sprintf("can not lookup token capabilities. "+
   825  				"As such certain operations may fail in the future. "+
   826  				"Please give Nomad a Vault token with one of the following "+
   827  				"capabilities %q on %q so that the required capabilities can be verified",
   828  				vaultCapabilitiesCapability, vaultCapabilitiesLookupPath)
   829  			v.logger.Warn(msg)
   830  			return nil
   831  		} else {
   832  			_ = multierror.Append(&mErr, err)
   833  		}
   834  	}
   835  
   836  	// verify is a helper function that verifies the token has one of the
   837  	// capabilities on the given path and adds an issue to the error
   838  	verify := func(path string, requiredCaps []string) {
   839  		ok, caps, err := v.hasCapability(path, requiredCaps)
   840  		if err != nil {
   841  			_ = multierror.Append(&mErr, err)
   842  		} else if !ok {
   843  			_ = multierror.Append(&mErr,
   844  				fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps))
   845  		}
   846  	}
   847  
   848  	// Check if we are verifying incoming tokens
   849  	if !v.config.AllowsUnauthenticated() {
   850  		verify(vaultTokenLookupPath, vaultTokenLookupCapability)
   851  	}
   852  
   853  	// Verify we can renew our selves tokens
   854  	verify(vaultTokenRenewPath, vaultTokenRenewCapability)
   855  
   856  	// Verify we can revoke tokens
   857  	verify(vaultTokenRevokePath, vaultTokenRevokeCapability)
   858  
   859  	// If we are using a role verify the capability
   860  	if role != "" {
   861  		// Verify we can read the role
   862  		verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability)
   863  
   864  		// Verify we can create from the role
   865  		verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability)
   866  	}
   867  
   868  	return mErr.ErrorOrNil()
   869  }
   870  
   871  // hasCapability takes a path and returns whether the token has at least one of
   872  // the required capabilities on the given path. It also returns the set of
   873  // capabilities the token does have as well as any error that occurred.
   874  func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) {
   875  	caps, err := v.client.Sys().CapabilitiesSelf(path)
   876  	if err != nil {
   877  		return false, nil, err
   878  	}
   879  	for _, c := range caps {
   880  		for _, r := range required {
   881  			if c == r {
   882  				return true, caps, nil
   883  			}
   884  		}
   885  	}
   886  	return false, caps, nil
   887  }
   888  
   889  // validateRole contacts Vault and checks that the given Vault role is valid for
   890  // the purposes of being used by Nomad
   891  func (v *vaultClient) validateRole(role string) error {
   892  	if role == "" {
   893  		return fmt.Errorf("Invalid empty role name")
   894  	}
   895  
   896  	// Validate the role
   897  	rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role))
   898  	if err != nil {
   899  		return fmt.Errorf("failed to lookup role %q: %v", role, err)
   900  	}
   901  	if rsecret == nil {
   902  		return fmt.Errorf("Role %q does not exist", role)
   903  	}
   904  
   905  	// Read and parse the fields
   906  	var data struct {
   907  		ExplicitMaxTtl      int `mapstructure:"explicit_max_ttl"`
   908  		TokenExplicitMaxTtl int `mapstructure:"token_explicit_max_ttl"`
   909  		Orphan              bool
   910  		Period              int
   911  		TokenPeriod         int `mapstructure:"token_period"`
   912  		Renewable           bool
   913  	}
   914  	if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil {
   915  		return fmt.Errorf("failed to parse Vault role's data block: %v", err)
   916  	}
   917  
   918  	// Validate the role is acceptable
   919  	var mErr multierror.Error
   920  	if !data.Renewable {
   921  		_ = multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed"))
   922  	}
   923  
   924  	if data.ExplicitMaxTtl != 0 || data.TokenExplicitMaxTtl != 0 {
   925  		_ = multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic."))
   926  	}
   927  
   928  	if data.Period == 0 && data.TokenPeriod == 0 {
   929  		_ = multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic."))
   930  	}
   931  
   932  	return mErr.ErrorOrNil()
   933  }
   934  
   935  // ConnectionEstablished returns whether a connection to Vault has been
   936  // established and any error that potentially caused it to be false
   937  func (v *vaultClient) ConnectionEstablished() (bool, error) {
   938  	v.l.Lock()
   939  	defer v.l.Unlock()
   940  	return v.connEstablished, v.connEstablishedErr
   941  }
   942  
   943  // Enabled returns whether the client is active
   944  func (v *vaultClient) Enabled() bool {
   945  	v.l.Lock()
   946  	defer v.l.Unlock()
   947  	return v.config.IsEnabled()
   948  }
   949  
   950  // Active returns whether the client is active
   951  func (v *vaultClient) Active() bool {
   952  	return atomic.LoadInt32(&v.active) == 1
   953  }
   954  
   955  // CreateToken takes the allocation and task and returns an appropriate Vault
   956  // token. The call is rate limited and may be canceled with the passed policy.
   957  // When the error is recoverable, it will be of type RecoverableError
   958  func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) {
   959  	if !v.Enabled() {
   960  		return nil, fmt.Errorf("Vault integration disabled")
   961  	}
   962  	if !v.Active() {
   963  		return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true)
   964  	}
   965  	// Check if we have established a connection with Vault
   966  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   967  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   968  	} else if err != nil {
   969  		return nil, err
   970  	}
   971  
   972  	// Track how long the request takes
   973  	defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now())
   974  
   975  	// Retrieve the Vault block for the task
   976  	policies := a.Job.VaultPolicies()
   977  	if policies == nil {
   978  		return nil, fmt.Errorf("Job doesn't require Vault policies")
   979  	}
   980  	tg, ok := policies[a.TaskGroup]
   981  	if !ok {
   982  		return nil, fmt.Errorf("Task group does not require Vault policies")
   983  	}
   984  	taskVault, ok := tg[task]
   985  	if !ok {
   986  		return nil, fmt.Errorf("Task does not require Vault policies")
   987  	}
   988  
   989  	// Set namespace for task
   990  	namespaceForTask := v.config.Namespace
   991  	if taskVault.Namespace != "" {
   992  		namespaceForTask = taskVault.Namespace
   993  	}
   994  
   995  	// Build the creation request
   996  	req := &vapi.TokenCreateRequest{
   997  		Policies: taskVault.Policies,
   998  		Metadata: map[string]string{
   999  			"AllocationID": a.ID,
  1000  			"Task":         task,
  1001  			"NodeID":       a.NodeID,
  1002  			"Namespace":    namespaceForTask,
  1003  		},
  1004  		TTL:         v.childTTL,
  1005  		DisplayName: fmt.Sprintf("%s-%s", a.ID, task),
  1006  	}
  1007  
  1008  	// Ensure we are under our rate limit
  1009  	if err := v.limiter.Wait(ctx); err != nil {
  1010  		return nil, err
  1011  	}
  1012  
  1013  	// Make the request and switch depending on whether we are using a root
  1014  	// token or a role based token
  1015  	var secret *vapi.Secret
  1016  	var err error
  1017  	role := v.getRole()
  1018  
  1019  	// Fetch client for task
  1020  	taskClient, err := v.entHandler.clientForTask(v, namespaceForTask)
  1021  	if err != nil {
  1022  		return nil, err
  1023  	}
  1024  
  1025  	if v.tokenData.Root && role == "" {
  1026  		req.Period = v.childTTL
  1027  		secret, err = taskClient.Auth().Token().Create(req)
  1028  	} else {
  1029  		// Make the token using the role
  1030  		secret, err = taskClient.Auth().Token().CreateWithRole(req, v.getRole())
  1031  	}
  1032  
  1033  	// Determine whether it is unrecoverable
  1034  	if err != nil {
  1035  		err = fmt.Errorf("failed to create an alloc vault token: %v", err)
  1036  		if structs.VaultUnrecoverableError.MatchString(err.Error()) {
  1037  			return secret, err
  1038  		}
  1039  
  1040  		// The error is recoverable
  1041  		return nil, structs.NewRecoverableError(err, true)
  1042  	}
  1043  
  1044  	// Validate the response
  1045  	var validationErr error
  1046  	if secret == nil {
  1047  		validationErr = fmt.Errorf("Vault returned nil Secret")
  1048  	} else if secret.WrapInfo == nil {
  1049  		validationErr = fmt.Errorf("Vault returned Secret with nil WrapInfo. Secret warnings: %v", secret.Warnings)
  1050  	} else if secret.WrapInfo.WrappedAccessor == "" {
  1051  		validationErr = fmt.Errorf("Vault returned WrapInfo without WrappedAccessor. Secret warnings: %v", secret.Warnings)
  1052  	}
  1053  	if validationErr != nil {
  1054  		v.logger.Warn("failed to CreateToken", "error", validationErr)
  1055  		return nil, structs.NewRecoverableError(validationErr, true)
  1056  	}
  1057  
  1058  	// Got a valid response
  1059  	return secret, nil
  1060  }
  1061  
  1062  // LookupToken takes a Vault token and does a lookup against Vault. The call is
  1063  // rate limited and may be canceled with passed context.
  1064  func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) {
  1065  	if !v.Enabled() {
  1066  		return nil, fmt.Errorf("Vault integration disabled")
  1067  	}
  1068  
  1069  	if !v.Active() {
  1070  		return nil, fmt.Errorf("Vault client not active")
  1071  	}
  1072  
  1073  	// Check if we have established a connection with Vault
  1074  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1075  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
  1076  	} else if err != nil {
  1077  		return nil, err
  1078  	}
  1079  
  1080  	// Track how long the request takes
  1081  	defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now())
  1082  
  1083  	// Ensure we are under our rate limit
  1084  	if err := v.limiter.Wait(ctx); err != nil {
  1085  		return nil, err
  1086  	}
  1087  
  1088  	// Lookup the token
  1089  	return v.auth.Lookup(token)
  1090  }
  1091  
  1092  // PoliciesFrom parses the set of policies returned by a token lookup.
  1093  func PoliciesFrom(s *vapi.Secret) ([]string, error) {
  1094  	return s.TokenPolicies()
  1095  }
  1096  
  1097  // PolicyDataFrom parses the Data returned by a token lookup.
  1098  // It should not be used to parse TokenPolicies as the list will not be
  1099  // exhaustive.
  1100  func PolicyDataFrom(s *vapi.Secret) (tokenData, error) {
  1101  	if s == nil {
  1102  		return tokenData{}, fmt.Errorf("cannot parse nil Vault secret")
  1103  	}
  1104  	var data tokenData
  1105  
  1106  	if err := mapstructure.WeakDecode(s.Data, &data); err != nil {
  1107  		return tokenData{}, fmt.Errorf("failed to parse Vault token's data block: %v", err)
  1108  	}
  1109  
  1110  	return data, nil
  1111  }
  1112  
  1113  // RevokeTokens revokes the passed set of accessors. If committed is set, the
  1114  // purge function passed to the client is called. If there is an error purging
  1115  // either because of Vault failures or because of the purge function, the
  1116  // revocation is retried until the tokens TTL.
  1117  func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error {
  1118  	if !v.Enabled() {
  1119  		return nil
  1120  	}
  1121  
  1122  	if !v.Active() {
  1123  		return fmt.Errorf("Vault client not active")
  1124  	}
  1125  
  1126  	// Track how long the request takes
  1127  	defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now())
  1128  
  1129  	// Check if we have established a connection with Vault. If not just add it
  1130  	// to the queue
  1131  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1132  		// Only bother tracking it for later revocation if the accessor was
  1133  		// committed
  1134  		if committed {
  1135  			v.storeForRevocation(accessors)
  1136  		}
  1137  
  1138  		// Track that we are abandoning these accessors.
  1139  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
  1140  		return nil
  1141  	}
  1142  
  1143  	// Attempt to revoke immediately and if it fails, add it to the revoke queue
  1144  	err := v.parallelRevoke(ctx, accessors)
  1145  	if err != nil {
  1146  		// If it is uncommitted, it is a best effort revoke as it will shortly
  1147  		// TTL within the cubbyhole and has not been leaked to any outside
  1148  		// system
  1149  		if !committed {
  1150  			metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
  1151  			return nil
  1152  		}
  1153  
  1154  		v.logger.Warn("failed to revoke tokens. Will reattempt until TTL", "error", err)
  1155  		v.storeForRevocation(accessors)
  1156  		return nil
  1157  	} else if !committed {
  1158  		// Mark that it was revoked but there is nothing to purge so exit
  1159  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors)))
  1160  		return nil
  1161  	}
  1162  
  1163  	if err := v.purgeFn(accessors); err != nil {
  1164  		v.logger.Error("failed to purge Vault accessors", "error", err)
  1165  		v.storeForRevocation(accessors)
  1166  		return nil
  1167  	}
  1168  
  1169  	// Track that it was revoked successfully
  1170  	metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors)))
  1171  
  1172  	return nil
  1173  }
  1174  
  1175  func (v *vaultClient) MarkForRevocation(accessors []*structs.VaultAccessor) error {
  1176  	if !v.Enabled() {
  1177  		return nil
  1178  	}
  1179  
  1180  	if !v.Active() {
  1181  		return fmt.Errorf("Vault client not active")
  1182  	}
  1183  
  1184  	v.storeForRevocation(accessors)
  1185  	return nil
  1186  }
  1187  
  1188  // storeForRevocation stores the passed set of accessors for revocation. It
  1189  // captures their effective TTL by storing their create TTL plus the current
  1190  // time.
  1191  func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) {
  1192  	v.revLock.Lock()
  1193  
  1194  	now := time.Now()
  1195  	for _, a := range accessors {
  1196  		if _, ok := v.revoking[a]; !ok {
  1197  			v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second)
  1198  		}
  1199  	}
  1200  	v.revLock.Unlock()
  1201  }
  1202  
  1203  // parallelRevoke revokes the passed VaultAccessors in parallel.
  1204  func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error {
  1205  	if !v.Enabled() {
  1206  		return fmt.Errorf("Vault integration disabled")
  1207  	}
  1208  
  1209  	if !v.Active() {
  1210  		return fmt.Errorf("Vault client not active")
  1211  	}
  1212  
  1213  	// Check if we have established a connection with Vault
  1214  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1215  		return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
  1216  	} else if err != nil {
  1217  		return err
  1218  	}
  1219  
  1220  	g, pCtx := errgroup.WithContext(ctx)
  1221  
  1222  	// Cap the handlers
  1223  	handlers := len(accessors)
  1224  	if handlers > maxParallelRevokes {
  1225  		handlers = maxParallelRevokes
  1226  	}
  1227  
  1228  	// Revoke the Vault Token Accessors
  1229  	input := make(chan *structs.VaultAccessor, handlers)
  1230  	for i := 0; i < handlers; i++ {
  1231  		g.Go(func() error {
  1232  			for {
  1233  				select {
  1234  				case va, ok := <-input:
  1235  					if !ok {
  1236  						return nil
  1237  					}
  1238  
  1239  					err := v.auth.RevokeAccessor(va.Accessor)
  1240  					if err != nil && !strings.Contains(err.Error(), "invalid accessor") {
  1241  						return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err)
  1242  					}
  1243  				case <-pCtx.Done():
  1244  					return nil
  1245  				}
  1246  			}
  1247  		})
  1248  	}
  1249  
  1250  	// Send the input
  1251  	go func() {
  1252  		defer close(input)
  1253  		for _, va := range accessors {
  1254  			select {
  1255  			case <-pCtx.Done():
  1256  				return
  1257  			case input <- va:
  1258  			}
  1259  		}
  1260  
  1261  	}()
  1262  
  1263  	// Wait for everything to complete
  1264  	return g.Wait()
  1265  }
  1266  
  1267  // maxVaultRevokeBatchSize is the maximum tokens a revokeDaemon should revoke
  1268  // and purge at any given time.
  1269  //
  1270  // Limiting the revocation batch size is beneficial for few reasons:
  1271  // * A single revocation failure of any entry in batch result into retrying the whole batch;
  1272  //   the larger the batch is the higher likelihood of such failure
  1273  // * Smaller batch sizes result into more co-operativeness: provides hooks for
  1274  //   reconsidering token TTL and leadership steps down.
  1275  // * Batches limit the size of the Raft message purging tokens. Due to bugs
  1276  //   pre-0.11.3, expired tokens were not properly purged, so users upgrading from
  1277  //   older versions may have huge numbers (millions) of expired tokens to purge.
  1278  const maxVaultRevokeBatchSize = 1000
  1279  
  1280  // revokeDaemon should be called in a goroutine and is used to periodically
  1281  // revoke Vault accessors that failed the original revocation
  1282  func (v *vaultClient) revokeDaemon() {
  1283  	ticker := time.NewTicker(v.revocationIntv)
  1284  	defer ticker.Stop()
  1285  
  1286  	for {
  1287  		select {
  1288  		case <-v.tomb.Dying():
  1289  			return
  1290  		case now := <-ticker.C:
  1291  			if established, err := v.ConnectionEstablished(); !established || err != nil {
  1292  				continue
  1293  			}
  1294  
  1295  			v.revLock.Lock()
  1296  
  1297  			// Fast path
  1298  			if len(v.revoking) == 0 {
  1299  				v.revLock.Unlock()
  1300  				continue
  1301  			}
  1302  
  1303  			// Build the list of accessors that need to be revoked while pruning any TTL'd checks
  1304  			toRevoke := len(v.revoking)
  1305  			if toRevoke > v.maxRevokeBatchSize {
  1306  				v.logger.Info("batching tokens to be revoked",
  1307  					"to_revoke", toRevoke, "batch_size", v.maxRevokeBatchSize,
  1308  					"batch_interval", v.revocationIntv)
  1309  				toRevoke = v.maxRevokeBatchSize
  1310  			}
  1311  			revoking := make([]*structs.VaultAccessor, 0, toRevoke)
  1312  			ttlExpired := []*structs.VaultAccessor{}
  1313  			for va, ttl := range v.revoking {
  1314  				if now.After(ttl) {
  1315  					ttlExpired = append(ttlExpired, va)
  1316  				} else {
  1317  					revoking = append(revoking, va)
  1318  				}
  1319  
  1320  				// Batches should consider tokens to be revoked
  1321  				// as well as expired tokens to ensure the Raft
  1322  				// message is reasonably sized.
  1323  				if len(revoking)+len(ttlExpired) >= toRevoke {
  1324  					break
  1325  				}
  1326  			}
  1327  
  1328  			if err := v.parallelRevoke(context.Background(), revoking); err != nil {
  1329  				v.logger.Warn("background token revocation errored", "error", err)
  1330  				v.revLock.Unlock()
  1331  				continue
  1332  			}
  1333  
  1334  			// Unlock before a potentially expensive operation
  1335  			v.revLock.Unlock()
  1336  
  1337  			// purge all explicitly revoked as well as ttl expired tokens
  1338  			// and only remove them locally on purge success
  1339  			revoking = append(revoking, ttlExpired...)
  1340  
  1341  			// Call the passed in token revocation function
  1342  			if err := v.purgeFn(revoking); err != nil {
  1343  				// Can continue since revocation is idempotent
  1344  				v.logger.Error("token revocation errored", "error", err)
  1345  				continue
  1346  			}
  1347  
  1348  			// Track that tokens were revoked successfully
  1349  			metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking)))
  1350  
  1351  			// Can delete from the tracked list now that we have purged
  1352  			v.revLock.Lock()
  1353  			for _, va := range revoking {
  1354  				delete(v.revoking, va)
  1355  			}
  1356  			v.revLock.Unlock()
  1357  
  1358  		}
  1359  	}
  1360  }
  1361  
  1362  // purgeVaultAccessors creates a Raft transaction to remove the passed Vault
  1363  // Accessors
  1364  func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error {
  1365  	// Commit this update via Raft
  1366  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1367  	_, _, err := s.raftApply(structs.VaultAccessorDeregisterRequestType, req)
  1368  	return err
  1369  }
  1370  
  1371  // wrapNilError is a helper that returns a wrapped function that returns a nil
  1372  // error
  1373  func wrapNilError(f func()) func() error {
  1374  	return func() error {
  1375  		f()
  1376  		return nil
  1377  	}
  1378  }
  1379  
  1380  // setLimit is used to update the rate limit
  1381  func (v *vaultClient) setLimit(l rate.Limit) {
  1382  	v.l.Lock()
  1383  	defer v.l.Unlock()
  1384  	v.limiter = rate.NewLimiter(l, int(l))
  1385  }
  1386  
  1387  func (v *vaultClient) Stats() map[string]string {
  1388  	stat := v.stats()
  1389  
  1390  	expireTimeStr := ""
  1391  
  1392  	if !stat.TokenExpiry.IsZero() {
  1393  		expireTimeStr = stat.TokenExpiry.Format(time.RFC3339)
  1394  	}
  1395  
  1396  	return map[string]string{
  1397  		"tracked_for_revoked": strconv.Itoa(stat.TrackedForRevoke),
  1398  		"token_ttl":           stat.TokenTTL.Round(time.Second).String(),
  1399  		"token_expire_time":   expireTimeStr,
  1400  	}
  1401  }
  1402  
  1403  func (v *vaultClient) stats() *VaultStats {
  1404  	// Allocate a new stats struct
  1405  	stats := new(VaultStats)
  1406  
  1407  	v.revLock.Lock()
  1408  	stats.TrackedForRevoke = len(v.revoking)
  1409  	v.revLock.Unlock()
  1410  
  1411  	v.currentExpirationLock.Lock()
  1412  	stats.TokenExpiry = v.currentExpiration
  1413  	v.currentExpirationLock.Unlock()
  1414  
  1415  	if !stats.TokenExpiry.IsZero() {
  1416  		stats.TokenTTL = time.Until(stats.TokenExpiry)
  1417  	}
  1418  
  1419  	return stats
  1420  }
  1421  
  1422  // EmitStats is used to export metrics about the blocked eval tracker while enabled
  1423  func (v *vaultClient) EmitStats(period time.Duration, stopCh <-chan struct{}) {
  1424  	for {
  1425  		select {
  1426  		case <-time.After(period):
  1427  			stats := v.stats()
  1428  			metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke))
  1429  			metrics.SetGauge([]string{"nomad", "vault", "token_ttl"}, float32(stats.TokenTTL/time.Millisecond))
  1430  
  1431  		case <-stopCh:
  1432  			return
  1433  		}
  1434  	}
  1435  }
  1436  
  1437  // extendExpiration sets the current auth token expiration record to ttLSeconds seconds from now
  1438  func (v *vaultClient) extendExpiration(ttlSeconds int) {
  1439  	v.currentExpirationLock.Lock()
  1440  	v.currentExpiration = time.Now().Add(time.Duration(ttlSeconds) * time.Second)
  1441  	v.currentExpirationLock.Unlock()
  1442  }
  1443  
  1444  // VaultVaultNoopDelegate returns the default vault api auth token handler
  1445  type VaultNoopDelegate struct{}
  1446  
  1447  func (e *VaultNoopDelegate) clientForTask(v *vaultClient, namespace string) (*vapi.Client, error) {
  1448  	return v.client, nil
  1449  }