github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/vault.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"log"
     8  	"math/rand"
     9  	"regexp"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"gopkg.in/tomb.v2"
    15  
    16  	multierror "github.com/hashicorp/go-multierror"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/hashicorp/nomad/nomad/structs/config"
    19  	vapi "github.com/hashicorp/vault/api"
    20  	"github.com/mitchellh/mapstructure"
    21  
    22  	"golang.org/x/sync/errgroup"
    23  	"golang.org/x/time/rate"
    24  )
    25  
    26  const (
    27  	// vaultTokenCreateTTL is the duration the wrapped token for the client is
    28  	// valid for. The units are in seconds.
    29  	vaultTokenCreateTTL = "60s"
    30  
    31  	// minimumTokenTTL is the minimum Token TTL allowed for child tokens.
    32  	minimumTokenTTL = 5 * time.Minute
    33  
    34  	// defaultTokenTTL is the default Token TTL used when the passed token is a
    35  	// root token such that child tokens aren't being created against a role
    36  	// that has defined a TTL
    37  	defaultTokenTTL = "72h"
    38  
    39  	// requestRateLimit is the maximum number of requests per second Nomad will
    40  	// make against Vault
    41  	requestRateLimit rate.Limit = 500.0
    42  
    43  	// maxParallelRevokes is the maximum number of parallel Vault
    44  	// token revocation requests
    45  	maxParallelRevokes = 64
    46  
    47  	// vaultRevocationIntv is the interval at which Vault tokens that failed
    48  	// initial revocation are retried
    49  	vaultRevocationIntv = 5 * time.Minute
    50  )
    51  
    52  var (
    53  	// vaultUnrecoverableError matches unrecoverable errors
    54  	vaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`)
    55  )
    56  
    57  // VaultClient is the Servers interface for interfacing with Vault
    58  type VaultClient interface {
    59  	// SetActive activates or de-activates the Vault client. When active, token
    60  	// creation/lookup/revocation operation are allowed.
    61  	SetActive(active bool)
    62  
    63  	// SetConfig updates the config used by the Vault client
    64  	SetConfig(config *config.VaultConfig) error
    65  
    66  	// CreateToken takes an allocation and task and returns an appropriate Vault
    67  	// Secret
    68  	CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error)
    69  
    70  	// LookupToken takes a token string and returns its capabilities.
    71  	LookupToken(ctx context.Context, token string) (*vapi.Secret, error)
    72  
    73  	// RevokeTokens takes a set of tokens accessor and revokes the tokens
    74  	RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error
    75  
    76  	// Stop is used to stop token renewal
    77  	Stop()
    78  }
    79  
    80  // PurgeVaultAccessor is called to remove VaultAccessors from the system. If
    81  // the function returns an error, the token will still be tracked and revocation
    82  // will retry till there is a success
    83  type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error
    84  
    85  // tokenData holds the relevant information about the Vault token passed to the
    86  // client.
    87  type tokenData struct {
    88  	CreationTTL int      `mapstructure:"creation_ttl"`
    89  	TTL         int      `mapstructure:"ttl"`
    90  	Renewable   bool     `mapstructure:"renewable"`
    91  	Policies    []string `mapstructure:"policies"`
    92  	Role        string   `mapstructure:"role"`
    93  	Root        bool
    94  }
    95  
    96  // vaultClient is the Servers implementation of the VaultClient interface. The
    97  // client renews the PeriodicToken given in the Vault configuration and provides
    98  // the Server with the ability to create child tokens and lookup the permissions
    99  // of tokens.
   100  type vaultClient struct {
   101  	// limiter is used to rate limit requests to Vault
   102  	limiter *rate.Limiter
   103  
   104  	// client is the Vault API client
   105  	client *vapi.Client
   106  
   107  	// auth is the Vault token auth API client
   108  	auth *vapi.TokenAuth
   109  
   110  	// config is the user passed Vault config
   111  	config *config.VaultConfig
   112  
   113  	// connEstablished marks whether we have an established connection to Vault.
   114  	connEstablished bool
   115  
   116  	// connEstablishedErr marks an error that can occur when establishing a
   117  	// connection
   118  	connEstablishedErr error
   119  
   120  	// token is the raw token used by the client
   121  	token string
   122  
   123  	// tokenData is the data of the passed Vault token
   124  	tokenData *tokenData
   125  
   126  	// revoking tracks the VaultAccessors that must be revoked
   127  	revoking map[*structs.VaultAccessor]time.Time
   128  	purgeFn  PurgeVaultAccessorFn
   129  	revLock  sync.Mutex
   130  
   131  	// active indicates whether the vaultClient is active. It should be
   132  	// accessed using a helper and updated atomically
   133  	active int32
   134  
   135  	// running indicates whether the vault client is started.
   136  	running bool
   137  
   138  	// childTTL is the TTL for child tokens.
   139  	childTTL string
   140  
   141  	// lastRenewed is the time the token was last renewed
   142  	lastRenewed time.Time
   143  
   144  	tomb   *tomb.Tomb
   145  	logger *log.Logger
   146  
   147  	// l is used to lock the configuration aspects of the client such that
   148  	// multiple callers can't cause conflicting config updates
   149  	l sync.Mutex
   150  }
   151  
   152  // NewVaultClient returns a Vault client from the given config. If the client
   153  // couldn't be made an error is returned.
   154  func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) {
   155  	if c == nil {
   156  		return nil, fmt.Errorf("must pass valid VaultConfig")
   157  	}
   158  
   159  	if logger == nil {
   160  		return nil, fmt.Errorf("must pass valid logger")
   161  	}
   162  
   163  	v := &vaultClient{
   164  		config:   c,
   165  		logger:   logger,
   166  		limiter:  rate.NewLimiter(requestRateLimit, int(requestRateLimit)),
   167  		revoking: make(map[*structs.VaultAccessor]time.Time),
   168  		purgeFn:  purgeFn,
   169  		tomb:     &tomb.Tomb{},
   170  	}
   171  
   172  	if v.config.IsEnabled() {
   173  		if err := v.buildClient(); err != nil {
   174  			return nil, err
   175  		}
   176  
   177  		// Launch the required goroutines
   178  		v.tomb.Go(wrapNilError(v.establishConnection))
   179  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   180  
   181  		v.running = true
   182  	}
   183  
   184  	return v, nil
   185  }
   186  
   187  func (v *vaultClient) Stop() {
   188  	v.l.Lock()
   189  	running := v.running
   190  	v.running = false
   191  	v.l.Unlock()
   192  
   193  	if running {
   194  		v.tomb.Kill(nil)
   195  		v.tomb.Wait()
   196  		v.flush()
   197  	}
   198  }
   199  
   200  // SetActive activates or de-activates the Vault client. When active, token
   201  // creation/lookup/revocation operation are allowed. All queued revocations are
   202  // cancelled if set un-active as it is assumed another instances is taking over
   203  func (v *vaultClient) SetActive(active bool) {
   204  	if active {
   205  		atomic.StoreInt32(&v.active, 1)
   206  	} else {
   207  		atomic.StoreInt32(&v.active, 0)
   208  	}
   209  
   210  	// Clear out the revoking tokens
   211  	v.revLock.Lock()
   212  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   213  	v.revLock.Unlock()
   214  
   215  	return
   216  }
   217  
   218  // flush is used to reset the state of the vault client
   219  func (v *vaultClient) flush() {
   220  	v.l.Lock()
   221  	defer v.l.Unlock()
   222  
   223  	v.client = nil
   224  	v.auth = nil
   225  	v.connEstablished = false
   226  	v.connEstablishedErr = nil
   227  	v.token = ""
   228  	v.tokenData = nil
   229  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   230  	v.childTTL = ""
   231  	v.tomb = &tomb.Tomb{}
   232  }
   233  
   234  // SetConfig is used to update the Vault config being used. A temporary outage
   235  // may occur after calling as it re-establishes a connection to Vault
   236  func (v *vaultClient) SetConfig(config *config.VaultConfig) error {
   237  	if config == nil {
   238  		return fmt.Errorf("must pass valid VaultConfig")
   239  	}
   240  
   241  	v.l.Lock()
   242  	defer v.l.Unlock()
   243  
   244  	// Store the new config
   245  	v.config = config
   246  
   247  	if v.config.IsEnabled() {
   248  		// Stop accepting any new request
   249  		v.connEstablished = false
   250  
   251  		// Kill any background routine and create a new tomb
   252  		v.tomb.Kill(nil)
   253  		v.tomb.Wait()
   254  		v.tomb = &tomb.Tomb{}
   255  
   256  		// Rebuild the client
   257  		if err := v.buildClient(); err != nil {
   258  			v.l.Unlock()
   259  			return err
   260  		}
   261  
   262  		// Launch the required goroutines
   263  		v.tomb.Go(wrapNilError(v.establishConnection))
   264  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   265  	}
   266  
   267  	return nil
   268  }
   269  
   270  // buildClient is used to build a Vault client based on the stored Vault config
   271  func (v *vaultClient) buildClient() error {
   272  	// Validate we have the required fields.
   273  	if v.config.Token == "" {
   274  		return errors.New("Vault token must be set")
   275  	} else if v.config.Addr == "" {
   276  		return errors.New("Vault address must be set")
   277  	}
   278  
   279  	// Parse the TTL if it is set
   280  	if v.config.TaskTokenTTL != "" {
   281  		d, err := time.ParseDuration(v.config.TaskTokenTTL)
   282  		if err != nil {
   283  			return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err)
   284  		}
   285  
   286  		if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() {
   287  			return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL)
   288  		}
   289  
   290  		v.childTTL = v.config.TaskTokenTTL
   291  	} else {
   292  		// Default the TaskTokenTTL
   293  		v.childTTL = defaultTokenTTL
   294  	}
   295  
   296  	// Get the Vault API configuration
   297  	apiConf, err := v.config.ApiConfig()
   298  	if err != nil {
   299  		return fmt.Errorf("Failed to create Vault API config: %v", err)
   300  	}
   301  
   302  	// Create the Vault API client
   303  	client, err := vapi.NewClient(apiConf)
   304  	if err != nil {
   305  		v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err)
   306  		return err
   307  	}
   308  
   309  	// Set the token and store the client
   310  	v.token = v.config.Token
   311  	client.SetToken(v.token)
   312  	v.client = client
   313  	v.auth = client.Auth().Token()
   314  	return nil
   315  }
   316  
   317  // establishConnection is used to make first contact with Vault. This should be
   318  // called in a go-routine since the connection is retried til the Vault Client
   319  // is stopped or the connection is successfully made at which point the renew
   320  // loop is started.
   321  func (v *vaultClient) establishConnection() {
   322  	// Create the retry timer and set initial duration to zero so it fires
   323  	// immediately
   324  	retryTimer := time.NewTimer(0)
   325  
   326  OUTER:
   327  	for {
   328  		select {
   329  		case <-v.tomb.Dying():
   330  			return
   331  		case <-retryTimer.C:
   332  			// Ensure the API is reachable
   333  			if _, err := v.client.Sys().InitStatus(); err != nil {
   334  				v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v",
   335  					v.config.ConnectionRetryIntv, err)
   336  				retryTimer.Reset(v.config.ConnectionRetryIntv)
   337  				continue OUTER
   338  			}
   339  
   340  			break OUTER
   341  		}
   342  	}
   343  
   344  	// Retrieve our token, validate it and parse the lease duration
   345  	if err := v.parseSelfToken(); err != nil {
   346  		v.logger.Printf("[ERR] vault: failed to validate self token/role and not retrying: %v", err)
   347  		v.l.Lock()
   348  		v.connEstablished = false
   349  		v.connEstablishedErr = err
   350  		v.l.Unlock()
   351  		return
   352  	}
   353  
   354  	// Set the wrapping function such that token creation is wrapped now
   355  	// that we know our role
   356  	v.client.SetWrappingLookupFunc(v.getWrappingFn())
   357  
   358  	// If we are given a non-root token, start renewing it
   359  	if v.tokenData.Root && v.tokenData.CreationTTL == 0 {
   360  		v.logger.Printf("[DEBUG] vault: not renewing token as it is root")
   361  	} else {
   362  		v.logger.Printf("[DEBUG] vault: token lease duration is %v",
   363  			time.Duration(v.tokenData.CreationTTL)*time.Second)
   364  		v.tomb.Go(wrapNilError(v.renewalLoop))
   365  	}
   366  
   367  	v.l.Lock()
   368  	v.connEstablished = true
   369  	v.connEstablishedErr = nil
   370  	v.l.Unlock()
   371  }
   372  
   373  // renewalLoop runs the renew loop. This should only be called if we are given a
   374  // non-root token.
   375  func (v *vaultClient) renewalLoop() {
   376  	// Create the renewal timer and set initial duration to zero so it fires
   377  	// immediately
   378  	authRenewTimer := time.NewTimer(0)
   379  
   380  	// Backoff is to reduce the rate we try to renew with Vault under error
   381  	// situations
   382  	backoff := 0.0
   383  
   384  	for {
   385  		select {
   386  		case <-v.tomb.Dying():
   387  			return
   388  		case <-authRenewTimer.C:
   389  			// Renew the token and determine the new expiration
   390  			err := v.renew()
   391  			currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second)
   392  
   393  			// Successfully renewed
   394  			if err == nil {
   395  				// If we take the expiration (lastRenewed + auth duration) and
   396  				// subtract the current time, we get a duration until expiry.
   397  				// Set the timer to poke us after half of that time is up.
   398  				durationUntilRenew := currentExpiration.Sub(time.Now()) / 2
   399  
   400  				v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew)
   401  				authRenewTimer.Reset(durationUntilRenew)
   402  
   403  				// Reset any backoff
   404  				backoff = 0
   405  				break
   406  			}
   407  
   408  			// Back off, increasing the amount of backoff each time. There are some rules:
   409  			//
   410  			// * If we have an existing authentication that is going to expire,
   411  			// never back off more than half of the amount of time remaining
   412  			// until expiration
   413  			// * Never back off more than 30 seconds multiplied by a random
   414  			// value between 1 and 2
   415  			// * Use randomness so that many clients won't keep hitting Vault
   416  			// at the same time
   417  
   418  			// Set base values and add some backoff
   419  
   420  			v.logger.Printf("[DEBUG] vault: got error or bad auth, so backing off: %v", err)
   421  			switch {
   422  			case backoff < 5:
   423  				backoff = 5
   424  			case backoff >= 24:
   425  				backoff = 30
   426  			default:
   427  				backoff = backoff * 1.25
   428  			}
   429  
   430  			// Add randomness
   431  			backoff = backoff * (1.0 + rand.Float64())
   432  
   433  			maxBackoff := currentExpiration.Sub(time.Now()) / 2
   434  			if maxBackoff < 0 {
   435  				// We have failed to renew the token past its expiration. Stop
   436  				// renewing with Vault.
   437  				v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client")
   438  				v.l.Lock()
   439  				v.connEstablished = false
   440  				v.connEstablishedErr = err
   441  				v.l.Unlock()
   442  				return
   443  
   444  			} else if backoff > maxBackoff.Seconds() {
   445  				backoff = maxBackoff.Seconds()
   446  			}
   447  
   448  			durationUntilRetry := time.Duration(backoff) * time.Second
   449  			v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry)
   450  
   451  			authRenewTimer.Reset(durationUntilRetry)
   452  		}
   453  	}
   454  }
   455  
   456  // renew attempts to renew our Vault token. If the renewal fails, an error is
   457  // returned. This method updates the lastRenewed time
   458  func (v *vaultClient) renew() error {
   459  	// Attempt to renew the token
   460  	secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL)
   461  	if err != nil {
   462  		return err
   463  	}
   464  
   465  	auth := secret.Auth
   466  	if auth == nil {
   467  		return fmt.Errorf("renewal successful but not auth information returned")
   468  	} else if auth.LeaseDuration == 0 {
   469  		return fmt.Errorf("renewal successful but no lease duration returned")
   470  	}
   471  
   472  	v.lastRenewed = time.Now()
   473  	v.logger.Printf("[DEBUG] vault: succesfully renewed server token")
   474  	return nil
   475  }
   476  
   477  // getWrappingFn returns an appropriate wrapping function for Nomad Servers
   478  func (v *vaultClient) getWrappingFn() func(operation, path string) string {
   479  	createPath := "auth/token/create"
   480  	if !v.tokenData.Root {
   481  		createPath = fmt.Sprintf("auth/token/create/%s", v.tokenData.Role)
   482  	}
   483  
   484  	return func(operation, path string) string {
   485  		// Only wrap the token create operation
   486  		if operation != "POST" || path != createPath {
   487  			return ""
   488  		}
   489  
   490  		return vaultTokenCreateTTL
   491  	}
   492  }
   493  
   494  // parseSelfToken looks up the Vault token in Vault and parses its data storing
   495  // it in the client. If the token is not valid for Nomads purposes an error is
   496  // returned.
   497  func (v *vaultClient) parseSelfToken() error {
   498  	// Get the initial lease duration
   499  	auth := v.client.Auth().Token()
   500  	self, err := auth.LookupSelf()
   501  	if err != nil {
   502  		return fmt.Errorf("failed to lookup Vault periodic token: %v", err)
   503  	}
   504  
   505  	// Read and parse the fields
   506  	var data tokenData
   507  	if err := mapstructure.WeakDecode(self.Data, &data); err != nil {
   508  		return fmt.Errorf("failed to parse Vault token's data block: %v", err)
   509  	}
   510  
   511  	root := false
   512  	for _, p := range data.Policies {
   513  		if p == "root" {
   514  			root = true
   515  			break
   516  		}
   517  	}
   518  
   519  	var mErr multierror.Error
   520  	if !root {
   521  		// All non-root tokens must be renewable
   522  		if !data.Renewable {
   523  			multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root"))
   524  		}
   525  
   526  		// All non-root tokens must have a lease duration
   527  		if data.CreationTTL == 0 {
   528  			multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero"))
   529  		}
   530  
   531  		// The lease duration can not be expired
   532  		if data.TTL == 0 {
   533  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   534  		}
   535  
   536  		// There must be a valid role since we aren't root
   537  		if data.Role == "" {
   538  			multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token"))
   539  		}
   540  
   541  	} else if data.CreationTTL != 0 {
   542  		// If the root token has a TTL it must be renewable
   543  		if !data.Renewable {
   544  			multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable"))
   545  		} else if data.TTL == 0 {
   546  			// If the token has a TTL make sure it has not expired
   547  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   548  		}
   549  	}
   550  
   551  	// If given a role validate it
   552  	if data.Role != "" {
   553  		if err := v.validateRole(data.Role); err != nil {
   554  			multierror.Append(&mErr, err)
   555  		}
   556  	}
   557  
   558  	data.Root = root
   559  	v.tokenData = &data
   560  	return mErr.ErrorOrNil()
   561  }
   562  
   563  // validateRole contacts Vault and checks that the given Vault role is valid for
   564  // the purposes of being used by Nomad
   565  func (v *vaultClient) validateRole(role string) error {
   566  	if role == "" {
   567  		return fmt.Errorf("Invalid empty role name")
   568  	}
   569  
   570  	// Validate the role
   571  	rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role))
   572  	if err != nil {
   573  		return fmt.Errorf("failed to lookup role %q: %v", role, err)
   574  	}
   575  
   576  	// Read and parse the fields
   577  	var data struct {
   578  		ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"`
   579  		Orphan         bool
   580  		Period         int
   581  		Renewable      bool
   582  	}
   583  	if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil {
   584  		return fmt.Errorf("failed to parse Vault role's data block: %v", err)
   585  	}
   586  
   587  	// Validate the role is acceptable
   588  	var mErr multierror.Error
   589  	if data.Orphan {
   590  		multierror.Append(&mErr, fmt.Errorf("Role must not allow orphans"))
   591  	}
   592  
   593  	if !data.Renewable {
   594  		multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed"))
   595  	}
   596  
   597  	if data.ExplicitMaxTtl != 0 {
   598  		multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic."))
   599  	}
   600  
   601  	if data.Period == 0 {
   602  		multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic."))
   603  	}
   604  
   605  	return mErr.ErrorOrNil()
   606  }
   607  
   608  // ConnectionEstablished returns whether a connection to Vault has been
   609  // established and any error that potentially caused it to be false
   610  func (v *vaultClient) ConnectionEstablished() (bool, error) {
   611  	v.l.Lock()
   612  	defer v.l.Unlock()
   613  	return v.connEstablished, v.connEstablishedErr
   614  }
   615  
   616  // Enabled returns whether the client is active
   617  func (v *vaultClient) Enabled() bool {
   618  	v.l.Lock()
   619  	defer v.l.Unlock()
   620  	return v.config.IsEnabled()
   621  }
   622  
   623  // Active returns whether the client is active
   624  func (v *vaultClient) Active() bool {
   625  	return atomic.LoadInt32(&v.active) == 1
   626  }
   627  
   628  // CreateToken takes the allocation and task and returns an appropriate Vault
   629  // token. The call is rate limited and may be canceled with the passed policy.
   630  // When the error is recoverable, it will be of type RecoverableError
   631  func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) {
   632  	if !v.Enabled() {
   633  		return nil, fmt.Errorf("Vault integration disabled")
   634  	}
   635  
   636  	if !v.Active() {
   637  		return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true)
   638  	}
   639  
   640  	// Check if we have established a connection with Vault
   641  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   642  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   643  	} else if !established {
   644  		return nil, fmt.Errorf("Connection to Vault failed: %v", err)
   645  	}
   646  
   647  	// Retrieve the Vault block for the task
   648  	policies := a.Job.VaultPolicies()
   649  	if policies == nil {
   650  		return nil, fmt.Errorf("Job doesn't require Vault policies")
   651  	}
   652  	tg, ok := policies[a.TaskGroup]
   653  	if !ok {
   654  		return nil, fmt.Errorf("Task group does not require Vault policies")
   655  	}
   656  	taskVault, ok := tg[task]
   657  	if !ok {
   658  		return nil, fmt.Errorf("Task does not require Vault policies")
   659  	}
   660  
   661  	// Build the creation request
   662  	req := &vapi.TokenCreateRequest{
   663  		Policies: taskVault.Policies,
   664  		Metadata: map[string]string{
   665  			"AllocationID": a.ID,
   666  			"Task":         task,
   667  			"NodeID":       a.NodeID,
   668  		},
   669  		TTL:         v.childTTL,
   670  		DisplayName: fmt.Sprintf("%s-%s", a.ID, task),
   671  	}
   672  
   673  	// Ensure we are under our rate limit
   674  	if err := v.limiter.Wait(ctx); err != nil {
   675  		return nil, err
   676  	}
   677  
   678  	// Make the request and switch depending on whether we are using a root
   679  	// token or a role based token
   680  	var secret *vapi.Secret
   681  	var err error
   682  	if v.tokenData.Root {
   683  		req.Period = v.childTTL
   684  		secret, err = v.auth.Create(req)
   685  	} else {
   686  		// Make the token using the role
   687  		secret, err = v.auth.CreateWithRole(req, v.tokenData.Role)
   688  	}
   689  
   690  	// Determine whether it is unrecoverable
   691  	if err != nil {
   692  		if vaultUnrecoverableError.MatchString(err.Error()) {
   693  			return secret, err
   694  		}
   695  
   696  		// The error is recoverable
   697  		return nil, structs.NewRecoverableError(err, true)
   698  	}
   699  
   700  	return secret, nil
   701  }
   702  
   703  // LookupToken takes a Vault token and does a lookup against Vault. The call is
   704  // rate limited and may be canceled with passed context.
   705  func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) {
   706  	if !v.Enabled() {
   707  		return nil, fmt.Errorf("Vault integration disabled")
   708  	}
   709  
   710  	if !v.Active() {
   711  		return nil, fmt.Errorf("Vault client not active")
   712  	}
   713  
   714  	// Check if we have established a connection with Vault
   715  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   716  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   717  	} else if !established {
   718  		return nil, fmt.Errorf("Connection to Vault failed: %v", err)
   719  	}
   720  
   721  	// Ensure we are under our rate limit
   722  	if err := v.limiter.Wait(ctx); err != nil {
   723  		return nil, err
   724  	}
   725  
   726  	// Lookup the token
   727  	return v.auth.Lookup(token)
   728  }
   729  
   730  // PoliciesFrom parses the set of policies returned by a token lookup.
   731  func PoliciesFrom(s *vapi.Secret) ([]string, error) {
   732  	if s == nil {
   733  		return nil, fmt.Errorf("cannot parse nil Vault secret")
   734  	}
   735  	var data tokenData
   736  	if err := mapstructure.WeakDecode(s.Data, &data); err != nil {
   737  		return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err)
   738  	}
   739  
   740  	return data.Policies, nil
   741  }
   742  
   743  // RevokeTokens revokes the passed set of accessors. If committed is set, the
   744  // purge function passed to the client is called. If there is an error purging
   745  // either because of Vault failures or because of the purge function, the
   746  // revocation is retried until the tokens TTL.
   747  func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error {
   748  	if !v.Enabled() {
   749  		return nil
   750  	}
   751  
   752  	if !v.Active() {
   753  		return fmt.Errorf("Vault client not active")
   754  	}
   755  
   756  	// Check if we have established a connection with Vault. If not just add it
   757  	// to the queue
   758  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   759  		// Only bother tracking it for later revocation if the accessor was
   760  		// committed
   761  		if committed {
   762  			v.storeForRevocation(accessors)
   763  		}
   764  
   765  		return nil
   766  	}
   767  
   768  	// Attempt to revoke immediately and if it fails, add it to the revoke queue
   769  	err := v.parallelRevoke(ctx, accessors)
   770  	if !committed {
   771  		// If it is uncommitted, it is a best effort revoke as it will shortly
   772  		// TTL within the cubbyhole and has not been leaked to any outside
   773  		// system
   774  		return nil
   775  	}
   776  
   777  	if err != nil {
   778  		v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt til TTL: %v", err)
   779  		v.storeForRevocation(accessors)
   780  		return nil
   781  	}
   782  
   783  	if err := v.purgeFn(accessors); err != nil {
   784  		v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err)
   785  		v.storeForRevocation(accessors)
   786  		return nil
   787  	}
   788  
   789  	return nil
   790  }
   791  
   792  // storeForRevocation stores the passed set of accessors for revocation. It
   793  // captrues their effective TTL by storing their create TTL plus the current
   794  // time.
   795  func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) {
   796  	v.revLock.Lock()
   797  	now := time.Now()
   798  	for _, a := range accessors {
   799  		v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second)
   800  	}
   801  	v.revLock.Unlock()
   802  }
   803  
   804  // parallelRevoke revokes the passed VaultAccessors in parallel.
   805  func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error {
   806  	if !v.Enabled() {
   807  		return fmt.Errorf("Vault integration disabled")
   808  	}
   809  
   810  	if !v.Active() {
   811  		return fmt.Errorf("Vault client not active")
   812  	}
   813  
   814  	// Check if we have established a connection with Vault
   815  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   816  		return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   817  	} else if !established {
   818  		return fmt.Errorf("Connection to Vault failed: %v", err)
   819  	}
   820  
   821  	g, pCtx := errgroup.WithContext(ctx)
   822  
   823  	// Cap the handlers
   824  	handlers := len(accessors)
   825  	if handlers > maxParallelRevokes {
   826  		handlers = maxParallelRevokes
   827  	}
   828  
   829  	// Create the Vault Tokens
   830  	input := make(chan *structs.VaultAccessor, handlers)
   831  	for i := 0; i < handlers; i++ {
   832  		g.Go(func() error {
   833  			for {
   834  				select {
   835  				case va, ok := <-input:
   836  					if !ok {
   837  						return nil
   838  					}
   839  
   840  					if err := v.auth.RevokeAccessor(va.Accessor); err != nil {
   841  						return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err)
   842  					}
   843  				case <-pCtx.Done():
   844  					return nil
   845  				}
   846  			}
   847  		})
   848  	}
   849  
   850  	// Send the input
   851  	go func() {
   852  		defer close(input)
   853  		for _, va := range accessors {
   854  			select {
   855  			case <-pCtx.Done():
   856  				return
   857  			case input <- va:
   858  			}
   859  		}
   860  
   861  	}()
   862  
   863  	// Wait for everything to complete
   864  	return g.Wait()
   865  }
   866  
   867  // revokeDaemon should be called in a goroutine and is used to periodically
   868  // revoke Vault accessors that failed the original revocation
   869  func (v *vaultClient) revokeDaemon() {
   870  	ticker := time.NewTicker(vaultRevocationIntv)
   871  	defer ticker.Stop()
   872  
   873  	for {
   874  		select {
   875  		case <-v.tomb.Dying():
   876  			return
   877  		case now := <-ticker.C:
   878  			if established, _ := v.ConnectionEstablished(); !established {
   879  				continue
   880  			}
   881  
   882  			v.revLock.Lock()
   883  
   884  			// Fast path
   885  			if len(v.revoking) == 0 {
   886  				v.revLock.Unlock()
   887  				continue
   888  			}
   889  
   890  			// Build the list of allocations that need to revoked while pruning any TTL'd checks
   891  			revoking := make([]*structs.VaultAccessor, 0, len(v.revoking))
   892  			for va, ttl := range v.revoking {
   893  				if now.After(ttl) {
   894  					delete(v.revoking, va)
   895  				} else {
   896  					revoking = append(revoking, va)
   897  				}
   898  			}
   899  
   900  			if err := v.parallelRevoke(context.Background(), revoking); err != nil {
   901  				v.logger.Printf("[WARN] vault: background token revocation errored: %v", err)
   902  				v.revLock.Unlock()
   903  				continue
   904  			}
   905  
   906  			// Unlock before a potentially expensive operation
   907  			v.revLock.Unlock()
   908  
   909  			// Call the passed in token revocation function
   910  			if err := v.purgeFn(revoking); err != nil {
   911  				// Can continue since revocation is idempotent
   912  				v.logger.Printf("[ERR] vault: token revocation errored: %v", err)
   913  				continue
   914  			}
   915  
   916  			// Can delete from the tracked list now that we have purged
   917  			v.revLock.Lock()
   918  			for _, va := range revoking {
   919  				delete(v.revoking, va)
   920  			}
   921  			v.revLock.Unlock()
   922  		}
   923  	}
   924  }
   925  
   926  // purgeVaultAccessors creates a Raft transaction to remove the passed Vault
   927  // Accessors
   928  func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error {
   929  	// Commit this update via Raft
   930  	req := structs.VaultAccessorsRequest{Accessors: accessors}
   931  	_, _, err := s.raftApply(structs.VaultAccessorDegisterRequestType, req)
   932  	return err
   933  }
   934  
   935  // wrapNilError is a helper that returns a wrapped function that returns a nil
   936  // error
   937  func wrapNilError(f func()) func() error {
   938  	return func() error {
   939  		f()
   940  		return nil
   941  	}
   942  }
   943  
   944  // setLimit is used to update the rate limit
   945  func (v *vaultClient) setLimit(l rate.Limit) {
   946  	v.l.Lock()
   947  	defer v.l.Unlock()
   948  	v.limiter = rate.NewLimiter(l, int(l))
   949  }