github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/vault.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"log"
     8  	"math/rand"
     9  	"regexp"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"gopkg.in/tomb.v2"
    15  
    16  	metrics "github.com/armon/go-metrics"
    17  	multierror "github.com/hashicorp/go-multierror"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  	"github.com/hashicorp/nomad/nomad/structs/config"
    20  	vapi "github.com/hashicorp/vault/api"
    21  	"github.com/mitchellh/mapstructure"
    22  
    23  	"golang.org/x/sync/errgroup"
    24  	"golang.org/x/time/rate"
    25  )
    26  
    27  const (
    28  	// vaultTokenCreateTTL is the duration the wrapped token for the client is
    29  	// valid for. The units are in seconds.
    30  	vaultTokenCreateTTL = "60s"
    31  
    32  	// minimumTokenTTL is the minimum Token TTL allowed for child tokens.
    33  	minimumTokenTTL = 5 * time.Minute
    34  
    35  	// defaultTokenTTL is the default Token TTL used when the passed token is a
    36  	// root token such that child tokens aren't being created against a role
    37  	// that has defined a TTL
    38  	defaultTokenTTL = "72h"
    39  
    40  	// requestRateLimit is the maximum number of requests per second Nomad will
    41  	// make against Vault
    42  	requestRateLimit rate.Limit = 500.0
    43  
    44  	// maxParallelRevokes is the maximum number of parallel Vault
    45  	// token revocation requests
    46  	maxParallelRevokes = 64
    47  
    48  	// vaultRevocationIntv is the interval at which Vault tokens that failed
    49  	// initial revocation are retried
    50  	vaultRevocationIntv = 5 * time.Minute
    51  
    52  	// vaultCapabilitiesLookupPath is the path to lookup the capabilities of
    53  	// ones token.
    54  	vaultCapabilitiesLookupPath = "sys/capabilities-self"
    55  
    56  	// vaultTokenRenewPath is the path used to renew our token
    57  	vaultTokenRenewPath = "auth/token/renew-self"
    58  
    59  	// vaultTokenLookupPath is the path used to lookup a token
    60  	vaultTokenLookupPath = "auth/token/lookup"
    61  
    62  	// vaultTokenRevokePath is the path used to revoke a token
    63  	vaultTokenRevokePath = "auth/token/revoke-accessor"
    64  
    65  	// vaultRoleLookupPath is the path to lookup a role
    66  	vaultRoleLookupPath = "auth/token/roles/%s"
    67  
    68  	// vaultRoleCreatePath is the path to create a token from a role
    69  	vaultTokenRoleCreatePath = "auth/token/create/%s"
    70  )
    71  
    72  var (
    73  	// vaultUnrecoverableError matches unrecoverable errors
    74  	vaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`)
    75  
    76  	// vaultCapabilitiesCapability is the expected capability of Nomad's Vault
    77  	// token on the the path. The token must have at least one of the
    78  	// capabilities.
    79  	vaultCapabilitiesCapability = []string{"update", "root"}
    80  
    81  	// vaultTokenRenewCapability is the expected capability Nomad's
    82  	// Vault token should have on the path. The token must have at least one of
    83  	// the capabilities.
    84  	vaultTokenRenewCapability = []string{"update", "root"}
    85  
    86  	// vaultTokenLookupCapability is the expected capability Nomad's
    87  	// Vault token should have on the path. The token must have at least one of
    88  	// the capabilities.
    89  	vaultTokenLookupCapability = []string{"update", "root"}
    90  
    91  	// vaultTokenRevokeCapability is the expected capability Nomad's
    92  	// Vault token should have on the path. The token must have at least one of
    93  	// the capabilities.
    94  	vaultTokenRevokeCapability = []string{"update", "root"}
    95  
    96  	// vaultRoleLookupCapability is the the expected capability Nomad's Vault
    97  	// token should have on the path. The token must have at least one of the
    98  	// capabilities.
    99  	vaultRoleLookupCapability = []string{"read", "root"}
   100  
   101  	// vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault
   102  	// token should have on the path. The token must have at least one of the
   103  	// capabilities.
   104  	vaultTokenRoleCreateCapability = []string{"update", "root"}
   105  )
   106  
   107  // VaultClient is the Servers interface for interfacing with Vault
   108  type VaultClient interface {
   109  	// SetActive activates or de-activates the Vault client. When active, token
   110  	// creation/lookup/revocation operation are allowed.
   111  	SetActive(active bool)
   112  
   113  	// SetConfig updates the config used by the Vault client
   114  	SetConfig(config *config.VaultConfig) error
   115  
   116  	// CreateToken takes an allocation and task and returns an appropriate Vault
   117  	// Secret
   118  	CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error)
   119  
   120  	// LookupToken takes a token string and returns its capabilities.
   121  	LookupToken(ctx context.Context, token string) (*vapi.Secret, error)
   122  
   123  	// RevokeTokens takes a set of tokens accessor and revokes the tokens
   124  	RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error
   125  
   126  	// Stop is used to stop token renewal
   127  	Stop()
   128  
   129  	// Running returns whether the Vault client is running
   130  	Running() bool
   131  
   132  	// Stats returns the Vault clients statistics
   133  	Stats() *VaultStats
   134  
   135  	// EmitStats emits that clients statistics at the given period until stopCh
   136  	// is called.
   137  	EmitStats(period time.Duration, stopCh chan struct{})
   138  }
   139  
   140  // VaultStats returns all the stats about Vault tokens created and managed by
   141  // Nomad.
   142  type VaultStats struct {
   143  	// TrackedForRevoke is the count of tokens that are being tracked to be
   144  	// revoked since they could not be immediately revoked.
   145  	TrackedForRevoke int
   146  }
   147  
   148  // PurgeVaultAccessor is called to remove VaultAccessors from the system. If
   149  // the function returns an error, the token will still be tracked and revocation
   150  // will retry till there is a success
   151  type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error
   152  
   153  // tokenData holds the relevant information about the Vault token passed to the
   154  // client.
   155  type tokenData struct {
   156  	CreationTTL int      `mapstructure:"creation_ttl"`
   157  	TTL         int      `mapstructure:"ttl"`
   158  	Renewable   bool     `mapstructure:"renewable"`
   159  	Policies    []string `mapstructure:"policies"`
   160  	Role        string   `mapstructure:"role"`
   161  	Root        bool
   162  }
   163  
   164  // vaultClient is the Servers implementation of the VaultClient interface. The
   165  // client renews the PeriodicToken given in the Vault configuration and provides
   166  // the Server with the ability to create child tokens and lookup the permissions
   167  // of tokens.
   168  type vaultClient struct {
   169  	// limiter is used to rate limit requests to Vault
   170  	limiter *rate.Limiter
   171  
   172  	// client is the Vault API client
   173  	client *vapi.Client
   174  
   175  	// auth is the Vault token auth API client
   176  	auth *vapi.TokenAuth
   177  
   178  	// config is the user passed Vault config
   179  	config *config.VaultConfig
   180  
   181  	// connEstablished marks whether we have an established connection to Vault.
   182  	connEstablished bool
   183  
   184  	// connEstablishedErr marks an error that can occur when establishing a
   185  	// connection
   186  	connEstablishedErr error
   187  
   188  	// token is the raw token used by the client
   189  	token string
   190  
   191  	// tokenData is the data of the passed Vault token
   192  	tokenData *tokenData
   193  
   194  	// revoking tracks the VaultAccessors that must be revoked
   195  	revoking map[*structs.VaultAccessor]time.Time
   196  	purgeFn  PurgeVaultAccessorFn
   197  	revLock  sync.Mutex
   198  
   199  	// active indicates whether the vaultClient is active. It should be
   200  	// accessed using a helper and updated atomically
   201  	active int32
   202  
   203  	// running indicates whether the vault client is started.
   204  	running bool
   205  
   206  	// childTTL is the TTL for child tokens.
   207  	childTTL string
   208  
   209  	// lastRenewed is the time the token was last renewed
   210  	lastRenewed time.Time
   211  
   212  	tomb   *tomb.Tomb
   213  	logger *log.Logger
   214  
   215  	// stats stores the stats
   216  	stats     *VaultStats
   217  	statsLock sync.RWMutex
   218  
   219  	// l is used to lock the configuration aspects of the client such that
   220  	// multiple callers can't cause conflicting config updates
   221  	l sync.Mutex
   222  }
   223  
   224  // NewVaultClient returns a Vault client from the given config. If the client
   225  // couldn't be made an error is returned.
   226  func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) {
   227  	if c == nil {
   228  		return nil, fmt.Errorf("must pass valid VaultConfig")
   229  	}
   230  
   231  	if logger == nil {
   232  		return nil, fmt.Errorf("must pass valid logger")
   233  	}
   234  
   235  	v := &vaultClient{
   236  		config:   c,
   237  		logger:   logger,
   238  		limiter:  rate.NewLimiter(requestRateLimit, int(requestRateLimit)),
   239  		revoking: make(map[*structs.VaultAccessor]time.Time),
   240  		purgeFn:  purgeFn,
   241  		tomb:     &tomb.Tomb{},
   242  		stats:    new(VaultStats),
   243  	}
   244  
   245  	if v.config.IsEnabled() {
   246  		if err := v.buildClient(); err != nil {
   247  			return nil, err
   248  		}
   249  
   250  		// Launch the required goroutines
   251  		v.tomb.Go(wrapNilError(v.establishConnection))
   252  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   253  
   254  		v.running = true
   255  	}
   256  
   257  	return v, nil
   258  }
   259  
   260  func (v *vaultClient) Stop() {
   261  	v.l.Lock()
   262  	running := v.running
   263  	v.running = false
   264  	v.l.Unlock()
   265  
   266  	if running {
   267  		v.tomb.Kill(nil)
   268  		v.tomb.Wait()
   269  		v.flush()
   270  	}
   271  }
   272  
   273  func (v *vaultClient) Running() bool {
   274  	v.l.Lock()
   275  	defer v.l.Unlock()
   276  	return v.running
   277  }
   278  
   279  // SetActive activates or de-activates the Vault client. When active, token
   280  // creation/lookup/revocation operation are allowed. All queued revocations are
   281  // cancelled if set un-active as it is assumed another instances is taking over
   282  func (v *vaultClient) SetActive(active bool) {
   283  	if active {
   284  		atomic.StoreInt32(&v.active, 1)
   285  	} else {
   286  		atomic.StoreInt32(&v.active, 0)
   287  	}
   288  
   289  	// Clear out the revoking tokens
   290  	v.revLock.Lock()
   291  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   292  	v.revLock.Unlock()
   293  
   294  	return
   295  }
   296  
   297  // flush is used to reset the state of the vault client
   298  func (v *vaultClient) flush() {
   299  	v.l.Lock()
   300  	defer v.l.Unlock()
   301  
   302  	v.client = nil
   303  	v.auth = nil
   304  	v.connEstablished = false
   305  	v.connEstablishedErr = nil
   306  	v.token = ""
   307  	v.tokenData = nil
   308  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   309  	v.childTTL = ""
   310  	v.tomb = &tomb.Tomb{}
   311  }
   312  
   313  // SetConfig is used to update the Vault config being used. A temporary outage
   314  // may occur after calling as it re-establishes a connection to Vault
   315  func (v *vaultClient) SetConfig(config *config.VaultConfig) error {
   316  	if config == nil {
   317  		return fmt.Errorf("must pass valid VaultConfig")
   318  	}
   319  
   320  	v.l.Lock()
   321  	defer v.l.Unlock()
   322  
   323  	// Kill any background routintes
   324  	if v.running {
   325  		// Stop accepting any new request
   326  		v.connEstablished = false
   327  
   328  		// Kill any background routine and create a new tomb
   329  		v.tomb.Kill(nil)
   330  		v.tomb.Wait()
   331  		v.tomb = &tomb.Tomb{}
   332  		v.running = false
   333  	}
   334  
   335  	// Store the new config
   336  	v.config = config
   337  
   338  	// Check if we should relaunch
   339  	if v.config.IsEnabled() {
   340  		// Rebuild the client
   341  		if err := v.buildClient(); err != nil {
   342  			return err
   343  		}
   344  
   345  		// Launch the required goroutines
   346  		v.tomb.Go(wrapNilError(v.establishConnection))
   347  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   348  		v.running = true
   349  	}
   350  
   351  	return nil
   352  }
   353  
   354  // buildClient is used to build a Vault client based on the stored Vault config
   355  func (v *vaultClient) buildClient() error {
   356  	// Validate we have the required fields.
   357  	if v.config.Token == "" {
   358  		return errors.New("Vault token must be set")
   359  	} else if v.config.Addr == "" {
   360  		return errors.New("Vault address must be set")
   361  	}
   362  
   363  	// Parse the TTL if it is set
   364  	if v.config.TaskTokenTTL != "" {
   365  		d, err := time.ParseDuration(v.config.TaskTokenTTL)
   366  		if err != nil {
   367  			return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err)
   368  		}
   369  
   370  		if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() {
   371  			return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL)
   372  		}
   373  
   374  		v.childTTL = v.config.TaskTokenTTL
   375  	} else {
   376  		// Default the TaskTokenTTL
   377  		v.childTTL = defaultTokenTTL
   378  	}
   379  
   380  	// Get the Vault API configuration
   381  	apiConf, err := v.config.ApiConfig()
   382  	if err != nil {
   383  		return fmt.Errorf("Failed to create Vault API config: %v", err)
   384  	}
   385  
   386  	// Create the Vault API client
   387  	client, err := vapi.NewClient(apiConf)
   388  	if err != nil {
   389  		v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err)
   390  		return err
   391  	}
   392  
   393  	// Set the token and store the client
   394  	v.token = v.config.Token
   395  	client.SetToken(v.token)
   396  	v.client = client
   397  	v.auth = client.Auth().Token()
   398  	return nil
   399  }
   400  
   401  // establishConnection is used to make first contact with Vault. This should be
   402  // called in a go-routine since the connection is retried til the Vault Client
   403  // is stopped or the connection is successfully made at which point the renew
   404  // loop is started.
   405  func (v *vaultClient) establishConnection() {
   406  	// Create the retry timer and set initial duration to zero so it fires
   407  	// immediately
   408  	retryTimer := time.NewTimer(0)
   409  
   410  OUTER:
   411  	for {
   412  		select {
   413  		case <-v.tomb.Dying():
   414  			return
   415  		case <-retryTimer.C:
   416  			// Ensure the API is reachable
   417  			if _, err := v.client.Sys().InitStatus(); err != nil {
   418  				v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v",
   419  					v.config.ConnectionRetryIntv, err)
   420  				retryTimer.Reset(v.config.ConnectionRetryIntv)
   421  				continue OUTER
   422  			}
   423  
   424  			break OUTER
   425  		}
   426  	}
   427  
   428  	// Retrieve our token, validate it and parse the lease duration
   429  	if err := v.parseSelfToken(); err != nil {
   430  		v.logger.Printf("[ERR] vault: failed to validate self token/role and not retrying: %v", err)
   431  		v.l.Lock()
   432  		v.connEstablished = false
   433  		v.connEstablishedErr = err
   434  		v.l.Unlock()
   435  		return
   436  	}
   437  
   438  	// Set the wrapping function such that token creation is wrapped now
   439  	// that we know our role
   440  	v.client.SetWrappingLookupFunc(v.getWrappingFn())
   441  
   442  	// If we are given a non-root token, start renewing it
   443  	if v.tokenData.Root && v.tokenData.CreationTTL == 0 {
   444  		v.logger.Printf("[DEBUG] vault: not renewing token as it is root")
   445  	} else {
   446  		v.logger.Printf("[DEBUG] vault: token lease duration is %v",
   447  			time.Duration(v.tokenData.CreationTTL)*time.Second)
   448  		v.tomb.Go(wrapNilError(v.renewalLoop))
   449  	}
   450  
   451  	v.l.Lock()
   452  	v.connEstablished = true
   453  	v.connEstablishedErr = nil
   454  	v.l.Unlock()
   455  }
   456  
   457  // renewalLoop runs the renew loop. This should only be called if we are given a
   458  // non-root token.
   459  func (v *vaultClient) renewalLoop() {
   460  	// Create the renewal timer and set initial duration to zero so it fires
   461  	// immediately
   462  	authRenewTimer := time.NewTimer(0)
   463  
   464  	// Backoff is to reduce the rate we try to renew with Vault under error
   465  	// situations
   466  	backoff := 0.0
   467  
   468  	for {
   469  		select {
   470  		case <-v.tomb.Dying():
   471  			return
   472  		case <-authRenewTimer.C:
   473  			// Renew the token and determine the new expiration
   474  			err := v.renew()
   475  			currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second)
   476  
   477  			// Successfully renewed
   478  			if err == nil {
   479  				// If we take the expiration (lastRenewed + auth duration) and
   480  				// subtract the current time, we get a duration until expiry.
   481  				// Set the timer to poke us after half of that time is up.
   482  				durationUntilRenew := currentExpiration.Sub(time.Now()) / 2
   483  
   484  				v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew)
   485  				authRenewTimer.Reset(durationUntilRenew)
   486  
   487  				// Reset any backoff
   488  				backoff = 0
   489  				break
   490  			}
   491  
   492  			// Back off, increasing the amount of backoff each time. There are some rules:
   493  			//
   494  			// * If we have an existing authentication that is going to expire,
   495  			// never back off more than half of the amount of time remaining
   496  			// until expiration
   497  			// * Never back off more than 30 seconds multiplied by a random
   498  			// value between 1 and 2
   499  			// * Use randomness so that many clients won't keep hitting Vault
   500  			// at the same time
   501  
   502  			// Set base values and add some backoff
   503  
   504  			v.logger.Printf("[WARN] vault: got error or bad auth, so backing off: %v", err)
   505  			switch {
   506  			case backoff < 5:
   507  				backoff = 5
   508  			case backoff >= 24:
   509  				backoff = 30
   510  			default:
   511  				backoff = backoff * 1.25
   512  			}
   513  
   514  			// Add randomness
   515  			backoff = backoff * (1.0 + rand.Float64())
   516  
   517  			maxBackoff := currentExpiration.Sub(time.Now()) / 2
   518  			if maxBackoff < 0 {
   519  				// We have failed to renew the token past its expiration. Stop
   520  				// renewing with Vault.
   521  				v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client")
   522  				v.l.Lock()
   523  				v.connEstablished = false
   524  				v.connEstablishedErr = err
   525  				v.l.Unlock()
   526  				return
   527  
   528  			} else if backoff > maxBackoff.Seconds() {
   529  				backoff = maxBackoff.Seconds()
   530  			}
   531  
   532  			durationUntilRetry := time.Duration(backoff) * time.Second
   533  			v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry)
   534  
   535  			authRenewTimer.Reset(durationUntilRetry)
   536  		}
   537  	}
   538  }
   539  
   540  // renew attempts to renew our Vault token. If the renewal fails, an error is
   541  // returned. This method updates the lastRenewed time
   542  func (v *vaultClient) renew() error {
   543  	// Attempt to renew the token
   544  	secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL)
   545  	if err != nil {
   546  		return err
   547  	}
   548  
   549  	auth := secret.Auth
   550  	if auth == nil {
   551  		return fmt.Errorf("renewal successful but not auth information returned")
   552  	} else if auth.LeaseDuration == 0 {
   553  		return fmt.Errorf("renewal successful but no lease duration returned")
   554  	}
   555  
   556  	v.lastRenewed = time.Now()
   557  	v.logger.Printf("[DEBUG] vault: successfully renewed server token")
   558  	return nil
   559  }
   560  
   561  // getWrappingFn returns an appropriate wrapping function for Nomad Servers
   562  func (v *vaultClient) getWrappingFn() func(operation, path string) string {
   563  	createPath := "auth/token/create"
   564  	role := v.getRole()
   565  	if role != "" {
   566  		createPath = fmt.Sprintf("auth/token/create/%s", role)
   567  	}
   568  
   569  	return func(operation, path string) string {
   570  		// Only wrap the token create operation
   571  		if operation != "POST" || path != createPath {
   572  			return ""
   573  		}
   574  
   575  		return vaultTokenCreateTTL
   576  	}
   577  }
   578  
   579  // parseSelfToken looks up the Vault token in Vault and parses its data storing
   580  // it in the client. If the token is not valid for Nomads purposes an error is
   581  // returned.
   582  func (v *vaultClient) parseSelfToken() error {
   583  	// Get the initial lease duration
   584  	auth := v.client.Auth().Token()
   585  	var self *vapi.Secret
   586  
   587  	// Try looking up the token using the self endpoint
   588  	secret, err := auth.LookupSelf()
   589  	if err != nil {
   590  		// Try looking up our token directly
   591  		self, err = auth.Lookup(v.client.Token())
   592  		if err != nil {
   593  			return fmt.Errorf("failed to lookup Vault periodic token: %v", err)
   594  		}
   595  	}
   596  	self = secret
   597  
   598  	// Read and parse the fields
   599  	var data tokenData
   600  	if err := mapstructure.WeakDecode(self.Data, &data); err != nil {
   601  		return fmt.Errorf("failed to parse Vault token's data block: %v", err)
   602  	}
   603  
   604  	root := false
   605  	for _, p := range data.Policies {
   606  		if p == "root" {
   607  			root = true
   608  			break
   609  		}
   610  	}
   611  
   612  	// Store the token data
   613  	data.Root = root
   614  	v.tokenData = &data
   615  
   616  	// The criteria that must be met for the token to be valid are as follows:
   617  	// 1) If token is non-root or is but has a creation ttl
   618  	//   a) The token must be renewable
   619  	//   b) Token must have a non-zero TTL
   620  	// 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens)
   621  	// 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens)
   622  	// 4) If configured to create tokens against a role:
   623  	//   a) Must have read capability for "auth/token/roles/<role_name" (Can just attempt a read)
   624  	//   b) Must have update capability for path "auth/token/create/<role_name>"
   625  	//   c) Role must:
   626  	//     1) Not allow orphans
   627  	//     2) Must allow tokens to be renewed
   628  	//     3) Must not have an explicit max TTL
   629  	//     4) Must have non-zero period
   630  	// 5) If not configured against a role, the token must be root
   631  
   632  	var mErr multierror.Error
   633  	role := v.getRole()
   634  	if !root {
   635  		// All non-root tokens must be renewable
   636  		if !data.Renewable {
   637  			multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root"))
   638  		}
   639  
   640  		// All non-root tokens must have a lease duration
   641  		if data.CreationTTL == 0 {
   642  			multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero"))
   643  		}
   644  
   645  		// The lease duration can not be expired
   646  		if data.TTL == 0 {
   647  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   648  		}
   649  
   650  		// There must be a valid role since we aren't root
   651  		if role == "" {
   652  			multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token"))
   653  		}
   654  
   655  	} else if data.CreationTTL != 0 {
   656  		// If the root token has a TTL it must be renewable
   657  		if !data.Renewable {
   658  			multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable"))
   659  		} else if data.TTL == 0 {
   660  			// If the token has a TTL make sure it has not expired
   661  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   662  		}
   663  	}
   664  
   665  	// Check we have the correct capabilities
   666  	if err := v.validateCapabilities(role, root); err != nil {
   667  		multierror.Append(&mErr, err)
   668  	}
   669  
   670  	// If given a role validate it
   671  	if role != "" {
   672  		if err := v.validateRole(role); err != nil {
   673  			multierror.Append(&mErr, err)
   674  		}
   675  	}
   676  
   677  	return mErr.ErrorOrNil()
   678  }
   679  
   680  // getRole returns the role name to be used when creating tokens
   681  func (v *vaultClient) getRole() string {
   682  	if v.config.Role != "" {
   683  		return v.config.Role
   684  	}
   685  
   686  	return v.tokenData.Role
   687  }
   688  
   689  // validateCapabilities checks that Nomad's Vault token has the correct
   690  // capabilities.
   691  func (v *vaultClient) validateCapabilities(role string, root bool) error {
   692  	// Check if the token can lookup capabilities.
   693  	var mErr multierror.Error
   694  	_, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability)
   695  	if err != nil {
   696  		// Check if there is a permission denied
   697  		if vaultUnrecoverableError.MatchString(err.Error()) {
   698  			// Since we can't read permissions, we just log a warning that we
   699  			// can't tell if the Vault token will work
   700  			msg := fmt.Sprintf("Can not lookup token capabilities. "+
   701  				"As such certain operations may fail in the future. "+
   702  				"Please give Nomad a Vault token with one of the following "+
   703  				"capabilities %q on %q so that the required capabilities can be verified",
   704  				vaultCapabilitiesCapability, vaultCapabilitiesLookupPath)
   705  			v.logger.Printf("[WARN] vault: %s", msg)
   706  			return nil
   707  		} else {
   708  			multierror.Append(&mErr, err)
   709  		}
   710  	}
   711  
   712  	// verify is a helper function that verifies the token has one of the
   713  	// capabilities on the given path and adds an issue to the error
   714  	verify := func(path string, requiredCaps []string) {
   715  		ok, caps, err := v.hasCapability(path, requiredCaps)
   716  		if err != nil {
   717  			multierror.Append(&mErr, err)
   718  		} else if !ok {
   719  			multierror.Append(&mErr,
   720  				fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps))
   721  		}
   722  	}
   723  
   724  	// Check if we are verifying incoming tokens
   725  	if !v.config.AllowsUnauthenticated() {
   726  		verify(vaultTokenLookupPath, vaultTokenLookupCapability)
   727  	}
   728  
   729  	// Verify we can renew our selves tokens
   730  	verify(vaultTokenRenewPath, vaultTokenRenewCapability)
   731  
   732  	// Verify we can revoke tokens
   733  	verify(vaultTokenRevokePath, vaultTokenRevokeCapability)
   734  
   735  	// If we are using a role verify the capability
   736  	if role != "" {
   737  		// Verify we can read the role
   738  		verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability)
   739  
   740  		// Verify we can create from the role
   741  		verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability)
   742  	}
   743  
   744  	return mErr.ErrorOrNil()
   745  }
   746  
   747  // hasCapability takes a path and returns whether the token has at least one of
   748  // the required capabilities on the given path. It also returns the set of
   749  // capabilities the token does have as well as any error that occurred.
   750  func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) {
   751  	caps, err := v.client.Sys().CapabilitiesSelf(path)
   752  	if err != nil {
   753  		return false, nil, err
   754  	}
   755  	for _, c := range caps {
   756  		for _, r := range required {
   757  			if c == r {
   758  				return true, caps, nil
   759  			}
   760  		}
   761  	}
   762  	return false, caps, nil
   763  }
   764  
   765  // validateRole contacts Vault and checks that the given Vault role is valid for
   766  // the purposes of being used by Nomad
   767  func (v *vaultClient) validateRole(role string) error {
   768  	if role == "" {
   769  		return fmt.Errorf("Invalid empty role name")
   770  	}
   771  
   772  	// Validate the role
   773  	rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role))
   774  	if err != nil {
   775  		return fmt.Errorf("failed to lookup role %q: %v", role, err)
   776  	}
   777  	if rsecret == nil {
   778  		return fmt.Errorf("Role %q does not exist", role)
   779  	}
   780  
   781  	// Read and parse the fields
   782  	var data struct {
   783  		ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"`
   784  		Orphan         bool
   785  		Period         int
   786  		Renewable      bool
   787  	}
   788  	if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil {
   789  		return fmt.Errorf("failed to parse Vault role's data block: %v", err)
   790  	}
   791  
   792  	// Validate the role is acceptable
   793  	var mErr multierror.Error
   794  	if data.Orphan {
   795  		multierror.Append(&mErr, fmt.Errorf("Role must not allow orphans"))
   796  	}
   797  
   798  	if !data.Renewable {
   799  		multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed"))
   800  	}
   801  
   802  	if data.ExplicitMaxTtl != 0 {
   803  		multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic."))
   804  	}
   805  
   806  	if data.Period == 0 {
   807  		multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic."))
   808  	}
   809  
   810  	return mErr.ErrorOrNil()
   811  }
   812  
   813  // ConnectionEstablished returns whether a connection to Vault has been
   814  // established and any error that potentially caused it to be false
   815  func (v *vaultClient) ConnectionEstablished() (bool, error) {
   816  	v.l.Lock()
   817  	defer v.l.Unlock()
   818  	return v.connEstablished, v.connEstablishedErr
   819  }
   820  
   821  // Enabled returns whether the client is active
   822  func (v *vaultClient) Enabled() bool {
   823  	v.l.Lock()
   824  	defer v.l.Unlock()
   825  	return v.config.IsEnabled()
   826  }
   827  
   828  // Active returns whether the client is active
   829  func (v *vaultClient) Active() bool {
   830  	return atomic.LoadInt32(&v.active) == 1
   831  }
   832  
   833  // CreateToken takes the allocation and task and returns an appropriate Vault
   834  // token. The call is rate limited and may be canceled with the passed policy.
   835  // When the error is recoverable, it will be of type RecoverableError
   836  func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) {
   837  	if !v.Enabled() {
   838  		return nil, fmt.Errorf("Vault integration disabled")
   839  	}
   840  	if !v.Active() {
   841  		return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true)
   842  	}
   843  
   844  	// Check if we have established a connection with Vault
   845  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   846  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   847  	} else if !established {
   848  		return nil, fmt.Errorf("Connection to Vault failed: %v", err)
   849  	}
   850  
   851  	// Track how long the request takes
   852  	defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now())
   853  
   854  	// Retrieve the Vault block for the task
   855  	policies := a.Job.VaultPolicies()
   856  	if policies == nil {
   857  		return nil, fmt.Errorf("Job doesn't require Vault policies")
   858  	}
   859  	tg, ok := policies[a.TaskGroup]
   860  	if !ok {
   861  		return nil, fmt.Errorf("Task group does not require Vault policies")
   862  	}
   863  	taskVault, ok := tg[task]
   864  	if !ok {
   865  		return nil, fmt.Errorf("Task does not require Vault policies")
   866  	}
   867  
   868  	// Build the creation request
   869  	req := &vapi.TokenCreateRequest{
   870  		Policies: taskVault.Policies,
   871  		Metadata: map[string]string{
   872  			"AllocationID": a.ID,
   873  			"Task":         task,
   874  			"NodeID":       a.NodeID,
   875  		},
   876  		TTL:         v.childTTL,
   877  		DisplayName: fmt.Sprintf("%s-%s", a.ID, task),
   878  	}
   879  
   880  	// Ensure we are under our rate limit
   881  	if err := v.limiter.Wait(ctx); err != nil {
   882  		return nil, err
   883  	}
   884  
   885  	// Make the request and switch depending on whether we are using a root
   886  	// token or a role based token
   887  	var secret *vapi.Secret
   888  	var err error
   889  	role := v.getRole()
   890  	if v.tokenData.Root && role == "" {
   891  		req.Period = v.childTTL
   892  		secret, err = v.auth.Create(req)
   893  	} else {
   894  		// Make the token using the role
   895  		secret, err = v.auth.CreateWithRole(req, v.getRole())
   896  	}
   897  
   898  	// Determine whether it is unrecoverable
   899  	if err != nil {
   900  		if vaultUnrecoverableError.MatchString(err.Error()) {
   901  			return secret, err
   902  		}
   903  
   904  		// The error is recoverable
   905  		return nil, structs.NewRecoverableError(err, true)
   906  	}
   907  
   908  	return secret, nil
   909  }
   910  
   911  // LookupToken takes a Vault token and does a lookup against Vault. The call is
   912  // rate limited and may be canceled with passed context.
   913  func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) {
   914  	if !v.Enabled() {
   915  		return nil, fmt.Errorf("Vault integration disabled")
   916  	}
   917  
   918  	if !v.Active() {
   919  		return nil, fmt.Errorf("Vault client not active")
   920  	}
   921  
   922  	// Check if we have established a connection with Vault
   923  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   924  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   925  	} else if !established {
   926  		return nil, fmt.Errorf("Connection to Vault failed: %v", err)
   927  	}
   928  
   929  	// Track how long the request takes
   930  	defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now())
   931  
   932  	// Ensure we are under our rate limit
   933  	if err := v.limiter.Wait(ctx); err != nil {
   934  		return nil, err
   935  	}
   936  
   937  	// Lookup the token
   938  	return v.auth.Lookup(token)
   939  }
   940  
   941  // PoliciesFrom parses the set of policies returned by a token lookup.
   942  func PoliciesFrom(s *vapi.Secret) ([]string, error) {
   943  	if s == nil {
   944  		return nil, fmt.Errorf("cannot parse nil Vault secret")
   945  	}
   946  	var data tokenData
   947  	if err := mapstructure.WeakDecode(s.Data, &data); err != nil {
   948  		return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err)
   949  	}
   950  
   951  	return data.Policies, nil
   952  }
   953  
   954  // RevokeTokens revokes the passed set of accessors. If committed is set, the
   955  // purge function passed to the client is called. If there is an error purging
   956  // either because of Vault failures or because of the purge function, the
   957  // revocation is retried until the tokens TTL.
   958  func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error {
   959  	if !v.Enabled() {
   960  		return nil
   961  	}
   962  
   963  	if !v.Active() {
   964  		return fmt.Errorf("Vault client not active")
   965  	}
   966  
   967  	// Track how long the request takes
   968  	defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now())
   969  
   970  	// Check if we have established a connection with Vault. If not just add it
   971  	// to the queue
   972  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   973  		// Only bother tracking it for later revocation if the accessor was
   974  		// committed
   975  		if committed {
   976  			v.storeForRevocation(accessors)
   977  		}
   978  
   979  		// Track that we are abandoning these accessors.
   980  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
   981  		return nil
   982  	}
   983  
   984  	// Attempt to revoke immediately and if it fails, add it to the revoke queue
   985  	err := v.parallelRevoke(ctx, accessors)
   986  	if err != nil {
   987  		// If it is uncommitted, it is a best effort revoke as it will shortly
   988  		// TTL within the cubbyhole and has not been leaked to any outside
   989  		// system
   990  		if !committed {
   991  			metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
   992  			return nil
   993  		}
   994  
   995  		v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt til TTL: %v", err)
   996  		v.storeForRevocation(accessors)
   997  		return nil
   998  	} else if !committed {
   999  		// Mark that it was revoked but there is nothing to purge so exit
  1000  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors)))
  1001  		return nil
  1002  	}
  1003  
  1004  	if err := v.purgeFn(accessors); err != nil {
  1005  		v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err)
  1006  		v.storeForRevocation(accessors)
  1007  		return nil
  1008  	}
  1009  
  1010  	// Track that it was revoked successfully
  1011  	metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors)))
  1012  
  1013  	return nil
  1014  }
  1015  
  1016  // storeForRevocation stores the passed set of accessors for revocation. It
  1017  // captrues their effective TTL by storing their create TTL plus the current
  1018  // time.
  1019  func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) {
  1020  	v.revLock.Lock()
  1021  	v.statsLock.Lock()
  1022  	now := time.Now()
  1023  	for _, a := range accessors {
  1024  		v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second)
  1025  	}
  1026  	v.stats.TrackedForRevoke = len(v.revoking)
  1027  	v.statsLock.Unlock()
  1028  	v.revLock.Unlock()
  1029  }
  1030  
  1031  // parallelRevoke revokes the passed VaultAccessors in parallel.
  1032  func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error {
  1033  	if !v.Enabled() {
  1034  		return fmt.Errorf("Vault integration disabled")
  1035  	}
  1036  
  1037  	if !v.Active() {
  1038  		return fmt.Errorf("Vault client not active")
  1039  	}
  1040  
  1041  	// Check if we have established a connection with Vault
  1042  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1043  		return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
  1044  	} else if !established {
  1045  		return fmt.Errorf("Connection to Vault failed: %v", err)
  1046  	}
  1047  
  1048  	g, pCtx := errgroup.WithContext(ctx)
  1049  
  1050  	// Cap the handlers
  1051  	handlers := len(accessors)
  1052  	if handlers > maxParallelRevokes {
  1053  		handlers = maxParallelRevokes
  1054  	}
  1055  
  1056  	// Create the Vault Tokens
  1057  	input := make(chan *structs.VaultAccessor, handlers)
  1058  	for i := 0; i < handlers; i++ {
  1059  		g.Go(func() error {
  1060  			for {
  1061  				select {
  1062  				case va, ok := <-input:
  1063  					if !ok {
  1064  						return nil
  1065  					}
  1066  
  1067  					if err := v.auth.RevokeAccessor(va.Accessor); err != nil {
  1068  						return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err)
  1069  					}
  1070  				case <-pCtx.Done():
  1071  					return nil
  1072  				}
  1073  			}
  1074  		})
  1075  	}
  1076  
  1077  	// Send the input
  1078  	go func() {
  1079  		defer close(input)
  1080  		for _, va := range accessors {
  1081  			select {
  1082  			case <-pCtx.Done():
  1083  				return
  1084  			case input <- va:
  1085  			}
  1086  		}
  1087  
  1088  	}()
  1089  
  1090  	// Wait for everything to complete
  1091  	return g.Wait()
  1092  }
  1093  
  1094  // revokeDaemon should be called in a goroutine and is used to periodically
  1095  // revoke Vault accessors that failed the original revocation
  1096  func (v *vaultClient) revokeDaemon() {
  1097  	ticker := time.NewTicker(vaultRevocationIntv)
  1098  	defer ticker.Stop()
  1099  
  1100  	for {
  1101  		select {
  1102  		case <-v.tomb.Dying():
  1103  			return
  1104  		case now := <-ticker.C:
  1105  			if established, _ := v.ConnectionEstablished(); !established {
  1106  				continue
  1107  			}
  1108  
  1109  			v.revLock.Lock()
  1110  
  1111  			// Fast path
  1112  			if len(v.revoking) == 0 {
  1113  				v.revLock.Unlock()
  1114  				continue
  1115  			}
  1116  
  1117  			// Build the list of allocations that need to revoked while pruning any TTL'd checks
  1118  			revoking := make([]*structs.VaultAccessor, 0, len(v.revoking))
  1119  			for va, ttl := range v.revoking {
  1120  				if now.After(ttl) {
  1121  					delete(v.revoking, va)
  1122  				} else {
  1123  					revoking = append(revoking, va)
  1124  				}
  1125  			}
  1126  
  1127  			if err := v.parallelRevoke(context.Background(), revoking); err != nil {
  1128  				v.logger.Printf("[WARN] vault: background token revocation errored: %v", err)
  1129  				v.revLock.Unlock()
  1130  				continue
  1131  			}
  1132  
  1133  			// Unlock before a potentially expensive operation
  1134  			v.revLock.Unlock()
  1135  
  1136  			// Call the passed in token revocation function
  1137  			if err := v.purgeFn(revoking); err != nil {
  1138  				// Can continue since revocation is idempotent
  1139  				v.logger.Printf("[ERR] vault: token revocation errored: %v", err)
  1140  				continue
  1141  			}
  1142  
  1143  			// Track that tokens were revoked successfully
  1144  			metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking)))
  1145  
  1146  			// Can delete from the tracked list now that we have purged
  1147  			v.revLock.Lock()
  1148  			v.statsLock.Lock()
  1149  			for _, va := range revoking {
  1150  				delete(v.revoking, va)
  1151  			}
  1152  			v.stats.TrackedForRevoke = len(v.revoking)
  1153  			v.statsLock.Unlock()
  1154  			v.revLock.Unlock()
  1155  
  1156  		}
  1157  	}
  1158  }
  1159  
  1160  // purgeVaultAccessors creates a Raft transaction to remove the passed Vault
  1161  // Accessors
  1162  func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error {
  1163  	// Commit this update via Raft
  1164  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1165  	_, _, err := s.raftApply(structs.VaultAccessorDegisterRequestType, req)
  1166  	return err
  1167  }
  1168  
  1169  // wrapNilError is a helper that returns a wrapped function that returns a nil
  1170  // error
  1171  func wrapNilError(f func()) func() error {
  1172  	return func() error {
  1173  		f()
  1174  		return nil
  1175  	}
  1176  }
  1177  
  1178  // setLimit is used to update the rate limit
  1179  func (v *vaultClient) setLimit(l rate.Limit) {
  1180  	v.l.Lock()
  1181  	defer v.l.Unlock()
  1182  	v.limiter = rate.NewLimiter(l, int(l))
  1183  }
  1184  
  1185  // Stats is used to query the state of the blocked eval tracker.
  1186  func (v *vaultClient) Stats() *VaultStats {
  1187  	// Allocate a new stats struct
  1188  	stats := new(VaultStats)
  1189  
  1190  	v.statsLock.RLock()
  1191  	defer v.statsLock.RUnlock()
  1192  
  1193  	// Copy all the stats
  1194  	stats.TrackedForRevoke = v.stats.TrackedForRevoke
  1195  
  1196  	return stats
  1197  }
  1198  
  1199  // EmitStats is used to export metrics about the blocked eval tracker while enabled
  1200  func (v *vaultClient) EmitStats(period time.Duration, stopCh chan struct{}) {
  1201  	for {
  1202  		select {
  1203  		case <-time.After(period):
  1204  			stats := v.Stats()
  1205  			metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke))
  1206  		case <-stopCh:
  1207  			return
  1208  		}
  1209  	}
  1210  }