github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/vault.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"log"
     8  	"math/rand"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"gopkg.in/tomb.v2"
    14  
    15  	metrics "github.com/armon/go-metrics"
    16  	multierror "github.com/hashicorp/go-multierror"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/hashicorp/nomad/nomad/structs/config"
    19  	vapi "github.com/hashicorp/vault/api"
    20  	"github.com/mitchellh/mapstructure"
    21  
    22  	"golang.org/x/sync/errgroup"
    23  	"golang.org/x/time/rate"
    24  )
    25  
    26  const (
    27  	// vaultTokenCreateTTL is the duration the wrapped token for the client is
    28  	// valid for. The units are in seconds.
    29  	vaultTokenCreateTTL = "60s"
    30  
    31  	// minimumTokenTTL is the minimum Token TTL allowed for child tokens.
    32  	minimumTokenTTL = 5 * time.Minute
    33  
    34  	// defaultTokenTTL is the default Token TTL used when the passed token is a
    35  	// root token such that child tokens aren't being created against a role
    36  	// that has defined a TTL
    37  	defaultTokenTTL = "72h"
    38  
    39  	// requestRateLimit is the maximum number of requests per second Nomad will
    40  	// make against Vault
    41  	requestRateLimit rate.Limit = 500.0
    42  
    43  	// maxParallelRevokes is the maximum number of parallel Vault
    44  	// token revocation requests
    45  	maxParallelRevokes = 64
    46  
    47  	// vaultRevocationIntv is the interval at which Vault tokens that failed
    48  	// initial revocation are retried
    49  	vaultRevocationIntv = 5 * time.Minute
    50  
    51  	// vaultCapabilitiesLookupPath is the path to lookup the capabilities of
    52  	// ones token.
    53  	vaultCapabilitiesLookupPath = "sys/capabilities-self"
    54  
    55  	// vaultTokenRenewPath is the path used to renew our token
    56  	vaultTokenRenewPath = "auth/token/renew-self"
    57  
    58  	// vaultTokenLookupPath is the path used to lookup a token
    59  	vaultTokenLookupPath = "auth/token/lookup"
    60  
    61  	// vaultTokenRevokePath is the path used to revoke a token
    62  	vaultTokenRevokePath = "auth/token/revoke-accessor"
    63  
    64  	// vaultRoleLookupPath is the path to lookup a role
    65  	vaultRoleLookupPath = "auth/token/roles/%s"
    66  
    67  	// vaultRoleCreatePath is the path to create a token from a role
    68  	vaultTokenRoleCreatePath = "auth/token/create/%s"
    69  )
    70  
    71  var (
    72  	// vaultCapabilitiesCapability is the expected capability of Nomad's Vault
    73  	// token on the the path. The token must have at least one of the
    74  	// capabilities.
    75  	vaultCapabilitiesCapability = []string{"update", "root"}
    76  
    77  	// vaultTokenRenewCapability is the expected capability Nomad's
    78  	// Vault token should have on the path. The token must have at least one of
    79  	// the capabilities.
    80  	vaultTokenRenewCapability = []string{"update", "root"}
    81  
    82  	// vaultTokenLookupCapability is the expected capability Nomad's
    83  	// Vault token should have on the path. The token must have at least one of
    84  	// the capabilities.
    85  	vaultTokenLookupCapability = []string{"update", "root"}
    86  
    87  	// vaultTokenRevokeCapability is the expected capability Nomad's
    88  	// Vault token should have on the path. The token must have at least one of
    89  	// the capabilities.
    90  	vaultTokenRevokeCapability = []string{"update", "root"}
    91  
    92  	// vaultRoleLookupCapability is the the expected capability Nomad's Vault
    93  	// token should have on the path. The token must have at least one of the
    94  	// capabilities.
    95  	vaultRoleLookupCapability = []string{"read", "root"}
    96  
    97  	// vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault
    98  	// token should have on the path. The token must have at least one of the
    99  	// capabilities.
   100  	vaultTokenRoleCreateCapability = []string{"update", "root"}
   101  )
   102  
   103  // VaultClient is the Servers interface for interfacing with Vault
   104  type VaultClient interface {
   105  	// SetActive activates or de-activates the Vault client. When active, token
   106  	// creation/lookup/revocation operation are allowed.
   107  	SetActive(active bool)
   108  
   109  	// SetConfig updates the config used by the Vault client
   110  	SetConfig(config *config.VaultConfig) error
   111  
   112  	// CreateToken takes an allocation and task and returns an appropriate Vault
   113  	// Secret
   114  	CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error)
   115  
   116  	// LookupToken takes a token string and returns its capabilities.
   117  	LookupToken(ctx context.Context, token string) (*vapi.Secret, error)
   118  
   119  	// RevokeTokens takes a set of tokens accessor and revokes the tokens
   120  	RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error
   121  
   122  	// Stop is used to stop token renewal
   123  	Stop()
   124  
   125  	// Running returns whether the Vault client is running
   126  	Running() bool
   127  
   128  	// Stats returns the Vault clients statistics
   129  	Stats() *VaultStats
   130  
   131  	// EmitStats emits that clients statistics at the given period until stopCh
   132  	// is called.
   133  	EmitStats(period time.Duration, stopCh chan struct{})
   134  }
   135  
   136  // VaultStats returns all the stats about Vault tokens created and managed by
   137  // Nomad.
   138  type VaultStats struct {
   139  	// TrackedForRevoke is the count of tokens that are being tracked to be
   140  	// revoked since they could not be immediately revoked.
   141  	TrackedForRevoke int
   142  }
   143  
   144  // PurgeVaultAccessor is called to remove VaultAccessors from the system. If
   145  // the function returns an error, the token will still be tracked and revocation
   146  // will retry till there is a success
   147  type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error
   148  
   149  // tokenData holds the relevant information about the Vault token passed to the
   150  // client.
   151  type tokenData struct {
   152  	CreationTTL int      `mapstructure:"creation_ttl"`
   153  	TTL         int      `mapstructure:"ttl"`
   154  	Renewable   bool     `mapstructure:"renewable"`
   155  	Policies    []string `mapstructure:"policies"`
   156  	Role        string   `mapstructure:"role"`
   157  	Root        bool
   158  }
   159  
   160  // vaultClient is the Servers implementation of the VaultClient interface. The
   161  // client renews the PeriodicToken given in the Vault configuration and provides
   162  // the Server with the ability to create child tokens and lookup the permissions
   163  // of tokens.
   164  type vaultClient struct {
   165  	// limiter is used to rate limit requests to Vault
   166  	limiter *rate.Limiter
   167  
   168  	// client is the Vault API client
   169  	client *vapi.Client
   170  
   171  	// auth is the Vault token auth API client
   172  	auth *vapi.TokenAuth
   173  
   174  	// config is the user passed Vault config
   175  	config *config.VaultConfig
   176  
   177  	// connEstablished marks whether we have an established connection to Vault.
   178  	connEstablished bool
   179  
   180  	// connEstablishedErr marks an error that can occur when establishing a
   181  	// connection
   182  	connEstablishedErr error
   183  
   184  	// token is the raw token used by the client
   185  	token string
   186  
   187  	// tokenData is the data of the passed Vault token
   188  	tokenData *tokenData
   189  
   190  	// revoking tracks the VaultAccessors that must be revoked
   191  	revoking map[*structs.VaultAccessor]time.Time
   192  	purgeFn  PurgeVaultAccessorFn
   193  	revLock  sync.Mutex
   194  
   195  	// active indicates whether the vaultClient is active. It should be
   196  	// accessed using a helper and updated atomically
   197  	active int32
   198  
   199  	// running indicates whether the vault client is started.
   200  	running bool
   201  
   202  	// childTTL is the TTL for child tokens.
   203  	childTTL string
   204  
   205  	// lastRenewed is the time the token was last renewed
   206  	lastRenewed time.Time
   207  
   208  	tomb   *tomb.Tomb
   209  	logger *log.Logger
   210  
   211  	// stats stores the stats
   212  	stats     *VaultStats
   213  	statsLock sync.RWMutex
   214  
   215  	// l is used to lock the configuration aspects of the client such that
   216  	// multiple callers can't cause conflicting config updates
   217  	l sync.Mutex
   218  }
   219  
   220  // NewVaultClient returns a Vault client from the given config. If the client
   221  // couldn't be made an error is returned.
   222  func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) {
   223  	if c == nil {
   224  		return nil, fmt.Errorf("must pass valid VaultConfig")
   225  	}
   226  
   227  	if logger == nil {
   228  		return nil, fmt.Errorf("must pass valid logger")
   229  	}
   230  
   231  	v := &vaultClient{
   232  		config:   c,
   233  		logger:   logger,
   234  		limiter:  rate.NewLimiter(requestRateLimit, int(requestRateLimit)),
   235  		revoking: make(map[*structs.VaultAccessor]time.Time),
   236  		purgeFn:  purgeFn,
   237  		tomb:     &tomb.Tomb{},
   238  		stats:    new(VaultStats),
   239  	}
   240  
   241  	if v.config.IsEnabled() {
   242  		if err := v.buildClient(); err != nil {
   243  			return nil, err
   244  		}
   245  
   246  		// Launch the required goroutines
   247  		v.tomb.Go(wrapNilError(v.establishConnection))
   248  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   249  
   250  		v.running = true
   251  	}
   252  
   253  	return v, nil
   254  }
   255  
   256  func (v *vaultClient) Stop() {
   257  	v.l.Lock()
   258  	running := v.running
   259  	v.running = false
   260  	v.l.Unlock()
   261  
   262  	if running {
   263  		v.tomb.Kill(nil)
   264  		v.tomb.Wait()
   265  		v.flush()
   266  	}
   267  }
   268  
   269  func (v *vaultClient) Running() bool {
   270  	v.l.Lock()
   271  	defer v.l.Unlock()
   272  	return v.running
   273  }
   274  
   275  // SetActive activates or de-activates the Vault client. When active, token
   276  // creation/lookup/revocation operation are allowed. All queued revocations are
   277  // cancelled if set un-active as it is assumed another instances is taking over
   278  func (v *vaultClient) SetActive(active bool) {
   279  	if active {
   280  		atomic.StoreInt32(&v.active, 1)
   281  	} else {
   282  		atomic.StoreInt32(&v.active, 0)
   283  	}
   284  
   285  	// Clear out the revoking tokens
   286  	v.revLock.Lock()
   287  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   288  	v.revLock.Unlock()
   289  
   290  	return
   291  }
   292  
   293  // flush is used to reset the state of the vault client
   294  func (v *vaultClient) flush() {
   295  	v.l.Lock()
   296  	defer v.l.Unlock()
   297  
   298  	v.client = nil
   299  	v.auth = nil
   300  	v.connEstablished = false
   301  	v.connEstablishedErr = nil
   302  	v.token = ""
   303  	v.tokenData = nil
   304  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   305  	v.childTTL = ""
   306  	v.tomb = &tomb.Tomb{}
   307  }
   308  
   309  // SetConfig is used to update the Vault config being used. A temporary outage
   310  // may occur after calling as it re-establishes a connection to Vault
   311  func (v *vaultClient) SetConfig(config *config.VaultConfig) error {
   312  	if config == nil {
   313  		return fmt.Errorf("must pass valid VaultConfig")
   314  	}
   315  
   316  	v.l.Lock()
   317  	defer v.l.Unlock()
   318  
   319  	// If reloading the same config, no-op
   320  	if v.config.IsEqual(config) {
   321  		return nil
   322  	}
   323  
   324  	// Kill any background routines
   325  	if v.running {
   326  		// Stop accepting any new request
   327  		v.connEstablished = false
   328  
   329  		// Kill any background routine and create a new tomb
   330  		v.tomb.Kill(nil)
   331  		v.tomb.Wait()
   332  		v.tomb = &tomb.Tomb{}
   333  		v.running = false
   334  	}
   335  
   336  	// Store the new config
   337  	v.config = config
   338  
   339  	// Check if we should relaunch
   340  	if v.config.IsEnabled() {
   341  		// Rebuild the client
   342  		if err := v.buildClient(); err != nil {
   343  			return err
   344  		}
   345  
   346  		// Launch the required goroutines
   347  		v.tomb.Go(wrapNilError(v.establishConnection))
   348  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   349  		v.running = true
   350  	}
   351  
   352  	return nil
   353  }
   354  
   355  // buildClient is used to build a Vault client based on the stored Vault config
   356  func (v *vaultClient) buildClient() error {
   357  	// Validate we have the required fields.
   358  	if v.config.Token == "" {
   359  		return errors.New("Vault token must be set")
   360  	} else if v.config.Addr == "" {
   361  		return errors.New("Vault address must be set")
   362  	}
   363  
   364  	// Parse the TTL if it is set
   365  	if v.config.TaskTokenTTL != "" {
   366  		d, err := time.ParseDuration(v.config.TaskTokenTTL)
   367  		if err != nil {
   368  			return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err)
   369  		}
   370  
   371  		if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() {
   372  			return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL)
   373  		}
   374  
   375  		v.childTTL = v.config.TaskTokenTTL
   376  	} else {
   377  		// Default the TaskTokenTTL
   378  		v.childTTL = defaultTokenTTL
   379  	}
   380  
   381  	// Get the Vault API configuration
   382  	apiConf, err := v.config.ApiConfig()
   383  	if err != nil {
   384  		return fmt.Errorf("Failed to create Vault API config: %v", err)
   385  	}
   386  
   387  	// Create the Vault API client
   388  	client, err := vapi.NewClient(apiConf)
   389  	if err != nil {
   390  		v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err)
   391  		return err
   392  	}
   393  
   394  	// Set the token and store the client
   395  	v.token = v.config.Token
   396  	client.SetToken(v.token)
   397  	v.client = client
   398  	v.auth = client.Auth().Token()
   399  	return nil
   400  }
   401  
   402  // establishConnection is used to make first contact with Vault. This should be
   403  // called in a go-routine since the connection is retried until the Vault Client
   404  // is stopped or the connection is successfully made at which point the renew
   405  // loop is started.
   406  func (v *vaultClient) establishConnection() {
   407  	// Create the retry timer and set initial duration to zero so it fires
   408  	// immediately
   409  	retryTimer := time.NewTimer(0)
   410  	initStatus := false
   411  OUTER:
   412  	for {
   413  		select {
   414  		case <-v.tomb.Dying():
   415  			return
   416  		case <-retryTimer.C:
   417  			// Ensure the API is reachable
   418  			if !initStatus {
   419  				if _, err := v.client.Sys().InitStatus(); err != nil {
   420  					v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v",
   421  						v.config.ConnectionRetryIntv, err)
   422  					retryTimer.Reset(v.config.ConnectionRetryIntv)
   423  					continue OUTER
   424  				}
   425  				initStatus = true
   426  			}
   427  			// Retry validating the token till success
   428  			if err := v.parseSelfToken(); err != nil {
   429  				v.logger.Printf("[ERR] vault: failed to validate self token/role. Retrying in %v: %v", v.config.ConnectionRetryIntv, err)
   430  				retryTimer.Reset(v.config.ConnectionRetryIntv)
   431  				v.l.Lock()
   432  				v.connEstablished = true
   433  				v.connEstablishedErr = fmt.Errorf("Nomad Server failed to establish connections to Vault: %v", err)
   434  				v.l.Unlock()
   435  				continue OUTER
   436  			}
   437  			break OUTER
   438  		}
   439  	}
   440  
   441  	// Set the wrapping function such that token creation is wrapped now
   442  	// that we know our role
   443  	v.client.SetWrappingLookupFunc(v.getWrappingFn())
   444  
   445  	// If we are given a non-root token, start renewing it
   446  	if v.tokenData.Root && v.tokenData.CreationTTL == 0 {
   447  		v.logger.Printf("[DEBUG] vault: not renewing token as it is root")
   448  	} else {
   449  		v.logger.Printf("[DEBUG] vault: token lease duration is %v",
   450  			time.Duration(v.tokenData.CreationTTL)*time.Second)
   451  		v.tomb.Go(wrapNilError(v.renewalLoop))
   452  	}
   453  
   454  	v.l.Lock()
   455  	v.connEstablished = true
   456  	v.connEstablishedErr = nil
   457  	v.l.Unlock()
   458  }
   459  
   460  // renewalLoop runs the renew loop. This should only be called if we are given a
   461  // non-root token.
   462  func (v *vaultClient) renewalLoop() {
   463  	// Create the renewal timer and set initial duration to zero so it fires
   464  	// immediately
   465  	authRenewTimer := time.NewTimer(0)
   466  
   467  	// Backoff is to reduce the rate we try to renew with Vault under error
   468  	// situations
   469  	backoff := 0.0
   470  
   471  	for {
   472  		select {
   473  		case <-v.tomb.Dying():
   474  			return
   475  		case <-authRenewTimer.C:
   476  			// Renew the token and determine the new expiration
   477  			err := v.renew()
   478  			currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second)
   479  
   480  			// Successfully renewed
   481  			if err == nil {
   482  				// If we take the expiration (lastRenewed + auth duration) and
   483  				// subtract the current time, we get a duration until expiry.
   484  				// Set the timer to poke us after half of that time is up.
   485  				durationUntilRenew := currentExpiration.Sub(time.Now()) / 2
   486  
   487  				v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew)
   488  				authRenewTimer.Reset(durationUntilRenew)
   489  
   490  				// Reset any backoff
   491  				backoff = 0
   492  				break
   493  			}
   494  
   495  			// Back off, increasing the amount of backoff each time. There are some rules:
   496  			//
   497  			// * If we have an existing authentication that is going to expire,
   498  			// never back off more than half of the amount of time remaining
   499  			// until expiration
   500  			// * Never back off more than 30 seconds multiplied by a random
   501  			// value between 1 and 2
   502  			// * Use randomness so that many clients won't keep hitting Vault
   503  			// at the same time
   504  
   505  			// Set base values and add some backoff
   506  
   507  			v.logger.Printf("[WARN] vault: got error or bad auth, so backing off: %v", err)
   508  			switch {
   509  			case backoff < 5:
   510  				backoff = 5
   511  			case backoff >= 24:
   512  				backoff = 30
   513  			default:
   514  				backoff = backoff * 1.25
   515  			}
   516  
   517  			// Add randomness
   518  			backoff = backoff * (1.0 + rand.Float64())
   519  
   520  			maxBackoff := currentExpiration.Sub(time.Now()) / 2
   521  			if maxBackoff < 0 {
   522  				// We have failed to renew the token past its expiration. Stop
   523  				// renewing with Vault.
   524  				v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client")
   525  				v.l.Lock()
   526  				v.connEstablished = false
   527  				v.connEstablishedErr = err
   528  				v.l.Unlock()
   529  				return
   530  
   531  			} else if backoff > maxBackoff.Seconds() {
   532  				backoff = maxBackoff.Seconds()
   533  			}
   534  
   535  			durationUntilRetry := time.Duration(backoff) * time.Second
   536  			v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry)
   537  
   538  			authRenewTimer.Reset(durationUntilRetry)
   539  		}
   540  	}
   541  }
   542  
   543  // renew attempts to renew our Vault token. If the renewal fails, an error is
   544  // returned. This method updates the lastRenewed time
   545  func (v *vaultClient) renew() error {
   546  	// Attempt to renew the token
   547  	secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL)
   548  	if err != nil {
   549  		return err
   550  	}
   551  
   552  	auth := secret.Auth
   553  	if auth == nil {
   554  		return fmt.Errorf("renewal successful but not auth information returned")
   555  	} else if auth.LeaseDuration == 0 {
   556  		return fmt.Errorf("renewal successful but no lease duration returned")
   557  	}
   558  
   559  	v.lastRenewed = time.Now()
   560  	v.logger.Printf("[DEBUG] vault: successfully renewed server token")
   561  	return nil
   562  }
   563  
   564  // getWrappingFn returns an appropriate wrapping function for Nomad Servers
   565  func (v *vaultClient) getWrappingFn() func(operation, path string) string {
   566  	createPath := "auth/token/create"
   567  	role := v.getRole()
   568  	if role != "" {
   569  		createPath = fmt.Sprintf("auth/token/create/%s", role)
   570  	}
   571  
   572  	return func(operation, path string) string {
   573  		// Only wrap the token create operation
   574  		if operation != "POST" || path != createPath {
   575  			return ""
   576  		}
   577  
   578  		return vaultTokenCreateTTL
   579  	}
   580  }
   581  
   582  // parseSelfToken looks up the Vault token in Vault and parses its data storing
   583  // it in the client. If the token is not valid for Nomads purposes an error is
   584  // returned.
   585  func (v *vaultClient) parseSelfToken() error {
   586  	// Get the initial lease duration
   587  	auth := v.client.Auth().Token()
   588  	var self *vapi.Secret
   589  
   590  	// Try looking up the token using the self endpoint
   591  	secret, err := auth.LookupSelf()
   592  	if err != nil {
   593  		// Try looking up our token directly
   594  		self, err = auth.Lookup(v.client.Token())
   595  		if err != nil {
   596  			return fmt.Errorf("failed to lookup Vault periodic token: %v", err)
   597  		}
   598  	}
   599  	self = secret
   600  
   601  	// Read and parse the fields
   602  	var data tokenData
   603  	if err := mapstructure.WeakDecode(self.Data, &data); err != nil {
   604  		return fmt.Errorf("failed to parse Vault token's data block: %v", err)
   605  	}
   606  
   607  	root := false
   608  	for _, p := range data.Policies {
   609  		if p == "root" {
   610  			root = true
   611  			break
   612  		}
   613  	}
   614  
   615  	// Store the token data
   616  	data.Root = root
   617  	v.tokenData = &data
   618  
   619  	// The criteria that must be met for the token to be valid are as follows:
   620  	// 1) If token is non-root or is but has a creation ttl
   621  	//   a) The token must be renewable
   622  	//   b) Token must have a non-zero TTL
   623  	// 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens)
   624  	// 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens)
   625  	// 4) If configured to create tokens against a role:
   626  	//   a) Must have read capability for "auth/token/roles/<role_name" (Can just attempt a read)
   627  	//   b) Must have update capability for path "auth/token/create/<role_name>"
   628  	//   c) Role must:
   629  	//     1) Must allow tokens to be renewed
   630  	//     2) Must not have an explicit max TTL
   631  	//     3) Must have non-zero period
   632  	// 5) If not configured against a role, the token must be root
   633  
   634  	var mErr multierror.Error
   635  	role := v.getRole()
   636  	if !root {
   637  		// All non-root tokens must be renewable
   638  		if !data.Renewable {
   639  			multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root"))
   640  		}
   641  
   642  		// All non-root tokens must have a lease duration
   643  		if data.CreationTTL == 0 {
   644  			multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero"))
   645  		}
   646  
   647  		// The lease duration can not be expired
   648  		if data.TTL == 0 {
   649  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   650  		}
   651  
   652  		// There must be a valid role since we aren't root
   653  		if role == "" {
   654  			multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token"))
   655  		}
   656  
   657  	} else if data.CreationTTL != 0 {
   658  		// If the root token has a TTL it must be renewable
   659  		if !data.Renewable {
   660  			multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable"))
   661  		} else if data.TTL == 0 {
   662  			// If the token has a TTL make sure it has not expired
   663  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   664  		}
   665  	}
   666  
   667  	// Check we have the correct capabilities
   668  	if err := v.validateCapabilities(role, root); err != nil {
   669  		multierror.Append(&mErr, err)
   670  	}
   671  
   672  	// If given a role validate it
   673  	if role != "" {
   674  		if err := v.validateRole(role); err != nil {
   675  			multierror.Append(&mErr, err)
   676  		}
   677  	}
   678  
   679  	return mErr.ErrorOrNil()
   680  }
   681  
   682  // getRole returns the role name to be used when creating tokens
   683  func (v *vaultClient) getRole() string {
   684  	if v.config.Role != "" {
   685  		return v.config.Role
   686  	}
   687  
   688  	return v.tokenData.Role
   689  }
   690  
   691  // validateCapabilities checks that Nomad's Vault token has the correct
   692  // capabilities.
   693  func (v *vaultClient) validateCapabilities(role string, root bool) error {
   694  	// Check if the token can lookup capabilities.
   695  	var mErr multierror.Error
   696  	_, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability)
   697  	if err != nil {
   698  		// Check if there is a permission denied
   699  		if structs.VaultUnrecoverableError.MatchString(err.Error()) {
   700  			// Since we can't read permissions, we just log a warning that we
   701  			// can't tell if the Vault token will work
   702  			msg := fmt.Sprintf("Can not lookup token capabilities. "+
   703  				"As such certain operations may fail in the future. "+
   704  				"Please give Nomad a Vault token with one of the following "+
   705  				"capabilities %q on %q so that the required capabilities can be verified",
   706  				vaultCapabilitiesCapability, vaultCapabilitiesLookupPath)
   707  			v.logger.Printf("[WARN] vault: %s", msg)
   708  			return nil
   709  		} else {
   710  			multierror.Append(&mErr, err)
   711  		}
   712  	}
   713  
   714  	// verify is a helper function that verifies the token has one of the
   715  	// capabilities on the given path and adds an issue to the error
   716  	verify := func(path string, requiredCaps []string) {
   717  		ok, caps, err := v.hasCapability(path, requiredCaps)
   718  		if err != nil {
   719  			multierror.Append(&mErr, err)
   720  		} else if !ok {
   721  			multierror.Append(&mErr,
   722  				fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps))
   723  		}
   724  	}
   725  
   726  	// Check if we are verifying incoming tokens
   727  	if !v.config.AllowsUnauthenticated() {
   728  		verify(vaultTokenLookupPath, vaultTokenLookupCapability)
   729  	}
   730  
   731  	// Verify we can renew our selves tokens
   732  	verify(vaultTokenRenewPath, vaultTokenRenewCapability)
   733  
   734  	// Verify we can revoke tokens
   735  	verify(vaultTokenRevokePath, vaultTokenRevokeCapability)
   736  
   737  	// If we are using a role verify the capability
   738  	if role != "" {
   739  		// Verify we can read the role
   740  		verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability)
   741  
   742  		// Verify we can create from the role
   743  		verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability)
   744  	}
   745  
   746  	return mErr.ErrorOrNil()
   747  }
   748  
   749  // hasCapability takes a path and returns whether the token has at least one of
   750  // the required capabilities on the given path. It also returns the set of
   751  // capabilities the token does have as well as any error that occurred.
   752  func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) {
   753  	caps, err := v.client.Sys().CapabilitiesSelf(path)
   754  	if err != nil {
   755  		return false, nil, err
   756  	}
   757  	for _, c := range caps {
   758  		for _, r := range required {
   759  			if c == r {
   760  				return true, caps, nil
   761  			}
   762  		}
   763  	}
   764  	return false, caps, nil
   765  }
   766  
   767  // validateRole contacts Vault and checks that the given Vault role is valid for
   768  // the purposes of being used by Nomad
   769  func (v *vaultClient) validateRole(role string) error {
   770  	if role == "" {
   771  		return fmt.Errorf("Invalid empty role name")
   772  	}
   773  
   774  	// Validate the role
   775  	rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role))
   776  	if err != nil {
   777  		return fmt.Errorf("failed to lookup role %q: %v", role, err)
   778  	}
   779  	if rsecret == nil {
   780  		return fmt.Errorf("Role %q does not exist", role)
   781  	}
   782  
   783  	// Read and parse the fields
   784  	var data struct {
   785  		ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"`
   786  		Orphan         bool
   787  		Period         int
   788  		Renewable      bool
   789  	}
   790  	if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil {
   791  		return fmt.Errorf("failed to parse Vault role's data block: %v", err)
   792  	}
   793  
   794  	// Validate the role is acceptable
   795  	var mErr multierror.Error
   796  	if !data.Renewable {
   797  		multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed"))
   798  	}
   799  
   800  	if data.ExplicitMaxTtl != 0 {
   801  		multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic."))
   802  	}
   803  
   804  	if data.Period == 0 {
   805  		multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic."))
   806  	}
   807  
   808  	return mErr.ErrorOrNil()
   809  }
   810  
   811  // ConnectionEstablished returns whether a connection to Vault has been
   812  // established and any error that potentially caused it to be false
   813  func (v *vaultClient) ConnectionEstablished() (bool, error) {
   814  	v.l.Lock()
   815  	defer v.l.Unlock()
   816  	return v.connEstablished, v.connEstablishedErr
   817  }
   818  
   819  // Enabled returns whether the client is active
   820  func (v *vaultClient) Enabled() bool {
   821  	v.l.Lock()
   822  	defer v.l.Unlock()
   823  	return v.config.IsEnabled()
   824  }
   825  
   826  // Active returns whether the client is active
   827  func (v *vaultClient) Active() bool {
   828  	return atomic.LoadInt32(&v.active) == 1
   829  }
   830  
   831  // CreateToken takes the allocation and task and returns an appropriate Vault
   832  // token. The call is rate limited and may be canceled with the passed policy.
   833  // When the error is recoverable, it will be of type RecoverableError
   834  func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) {
   835  	if !v.Enabled() {
   836  		return nil, fmt.Errorf("Vault integration disabled")
   837  	}
   838  	if !v.Active() {
   839  		return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true)
   840  	}
   841  
   842  	// Check if we have established a connection with Vault
   843  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   844  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   845  	} else if err != nil {
   846  		return nil, err
   847  	}
   848  
   849  	// Track how long the request takes
   850  	defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now())
   851  
   852  	// Retrieve the Vault block for the task
   853  	policies := a.Job.VaultPolicies()
   854  	if policies == nil {
   855  		return nil, fmt.Errorf("Job doesn't require Vault policies")
   856  	}
   857  	tg, ok := policies[a.TaskGroup]
   858  	if !ok {
   859  		return nil, fmt.Errorf("Task group does not require Vault policies")
   860  	}
   861  	taskVault, ok := tg[task]
   862  	if !ok {
   863  		return nil, fmt.Errorf("Task does not require Vault policies")
   864  	}
   865  
   866  	// Build the creation request
   867  	req := &vapi.TokenCreateRequest{
   868  		Policies: taskVault.Policies,
   869  		Metadata: map[string]string{
   870  			"AllocationID": a.ID,
   871  			"Task":         task,
   872  			"NodeID":       a.NodeID,
   873  		},
   874  		TTL:         v.childTTL,
   875  		DisplayName: fmt.Sprintf("%s-%s", a.ID, task),
   876  	}
   877  
   878  	// Ensure we are under our rate limit
   879  	if err := v.limiter.Wait(ctx); err != nil {
   880  		return nil, err
   881  	}
   882  
   883  	// Make the request and switch depending on whether we are using a root
   884  	// token or a role based token
   885  	var secret *vapi.Secret
   886  	var err error
   887  	role := v.getRole()
   888  	if v.tokenData.Root && role == "" {
   889  		req.Period = v.childTTL
   890  		secret, err = v.auth.Create(req)
   891  	} else {
   892  		// Make the token using the role
   893  		secret, err = v.auth.CreateWithRole(req, v.getRole())
   894  	}
   895  
   896  	// Determine whether it is unrecoverable
   897  	if err != nil {
   898  		if structs.VaultUnrecoverableError.MatchString(err.Error()) {
   899  			return secret, err
   900  		}
   901  
   902  		// The error is recoverable
   903  		return nil, structs.NewRecoverableError(err, true)
   904  	}
   905  
   906  	// Validate the response
   907  	var validationErr error
   908  	if secret == nil {
   909  		validationErr = fmt.Errorf("Vault returned nil Secret")
   910  	} else if secret.WrapInfo == nil {
   911  		validationErr = fmt.Errorf("Vault returned Secret with nil WrapInfo. Secret warnings: %v", secret.Warnings)
   912  	} else if secret.WrapInfo.WrappedAccessor == "" {
   913  		validationErr = fmt.Errorf("Vault returned WrapInfo without WrappedAccessor. Secret warnings: %v", secret.Warnings)
   914  	}
   915  	if validationErr != nil {
   916  		v.logger.Printf("[WARN] vault: failed to CreateToken: %v", err)
   917  		return nil, structs.NewRecoverableError(validationErr, true)
   918  	}
   919  
   920  	// Got a valid response
   921  	return secret, nil
   922  }
   923  
   924  // LookupToken takes a Vault token and does a lookup against Vault. The call is
   925  // rate limited and may be canceled with passed context.
   926  func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) {
   927  	if !v.Enabled() {
   928  		return nil, fmt.Errorf("Vault integration disabled")
   929  	}
   930  
   931  	if !v.Active() {
   932  		return nil, fmt.Errorf("Vault client not active")
   933  	}
   934  
   935  	// Check if we have established a connection with Vault
   936  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   937  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   938  	} else if err != nil {
   939  		return nil, err
   940  	}
   941  
   942  	// Track how long the request takes
   943  	defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now())
   944  
   945  	// Ensure we are under our rate limit
   946  	if err := v.limiter.Wait(ctx); err != nil {
   947  		return nil, err
   948  	}
   949  
   950  	// Lookup the token
   951  	return v.auth.Lookup(token)
   952  }
   953  
   954  // PoliciesFrom parses the set of policies returned by a token lookup.
   955  func PoliciesFrom(s *vapi.Secret) ([]string, error) {
   956  	if s == nil {
   957  		return nil, fmt.Errorf("cannot parse nil Vault secret")
   958  	}
   959  	var data tokenData
   960  	if err := mapstructure.WeakDecode(s.Data, &data); err != nil {
   961  		return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err)
   962  	}
   963  
   964  	return data.Policies, nil
   965  }
   966  
   967  // RevokeTokens revokes the passed set of accessors. If committed is set, the
   968  // purge function passed to the client is called. If there is an error purging
   969  // either because of Vault failures or because of the purge function, the
   970  // revocation is retried until the tokens TTL.
   971  func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error {
   972  	if !v.Enabled() {
   973  		return nil
   974  	}
   975  
   976  	if !v.Active() {
   977  		return fmt.Errorf("Vault client not active")
   978  	}
   979  
   980  	// Track how long the request takes
   981  	defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now())
   982  
   983  	// Check if we have established a connection with Vault. If not just add it
   984  	// to the queue
   985  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   986  		// Only bother tracking it for later revocation if the accessor was
   987  		// committed
   988  		if committed {
   989  			v.storeForRevocation(accessors)
   990  		}
   991  
   992  		// Track that we are abandoning these accessors.
   993  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
   994  		return nil
   995  	}
   996  
   997  	// Attempt to revoke immediately and if it fails, add it to the revoke queue
   998  	err := v.parallelRevoke(ctx, accessors)
   999  	if err != nil {
  1000  		// If it is uncommitted, it is a best effort revoke as it will shortly
  1001  		// TTL within the cubbyhole and has not been leaked to any outside
  1002  		// system
  1003  		if !committed {
  1004  			metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
  1005  			return nil
  1006  		}
  1007  
  1008  		v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt until TTL: %v", err)
  1009  		v.storeForRevocation(accessors)
  1010  		return nil
  1011  	} else if !committed {
  1012  		// Mark that it was revoked but there is nothing to purge so exit
  1013  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors)))
  1014  		return nil
  1015  	}
  1016  
  1017  	if err := v.purgeFn(accessors); err != nil {
  1018  		v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err)
  1019  		v.storeForRevocation(accessors)
  1020  		return nil
  1021  	}
  1022  
  1023  	// Track that it was revoked successfully
  1024  	metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors)))
  1025  
  1026  	return nil
  1027  }
  1028  
  1029  // storeForRevocation stores the passed set of accessors for revocation. It
  1030  // captures their effective TTL by storing their create TTL plus the current
  1031  // time.
  1032  func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) {
  1033  	v.revLock.Lock()
  1034  	v.statsLock.Lock()
  1035  	now := time.Now()
  1036  	for _, a := range accessors {
  1037  		v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second)
  1038  	}
  1039  	v.stats.TrackedForRevoke = len(v.revoking)
  1040  	v.statsLock.Unlock()
  1041  	v.revLock.Unlock()
  1042  }
  1043  
  1044  // parallelRevoke revokes the passed VaultAccessors in parallel.
  1045  func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error {
  1046  	if !v.Enabled() {
  1047  		return fmt.Errorf("Vault integration disabled")
  1048  	}
  1049  
  1050  	if !v.Active() {
  1051  		return fmt.Errorf("Vault client not active")
  1052  	}
  1053  
  1054  	// Check if we have established a connection with Vault
  1055  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1056  		return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
  1057  	} else if err != nil {
  1058  		return err
  1059  	}
  1060  
  1061  	g, pCtx := errgroup.WithContext(ctx)
  1062  
  1063  	// Cap the handlers
  1064  	handlers := len(accessors)
  1065  	if handlers > maxParallelRevokes {
  1066  		handlers = maxParallelRevokes
  1067  	}
  1068  
  1069  	// Create the Vault Tokens
  1070  	input := make(chan *structs.VaultAccessor, handlers)
  1071  	for i := 0; i < handlers; i++ {
  1072  		g.Go(func() error {
  1073  			for {
  1074  				select {
  1075  				case va, ok := <-input:
  1076  					if !ok {
  1077  						return nil
  1078  					}
  1079  
  1080  					if err := v.auth.RevokeAccessor(va.Accessor); err != nil {
  1081  						return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err)
  1082  					}
  1083  				case <-pCtx.Done():
  1084  					return nil
  1085  				}
  1086  			}
  1087  		})
  1088  	}
  1089  
  1090  	// Send the input
  1091  	go func() {
  1092  		defer close(input)
  1093  		for _, va := range accessors {
  1094  			select {
  1095  			case <-pCtx.Done():
  1096  				return
  1097  			case input <- va:
  1098  			}
  1099  		}
  1100  
  1101  	}()
  1102  
  1103  	// Wait for everything to complete
  1104  	return g.Wait()
  1105  }
  1106  
  1107  // revokeDaemon should be called in a goroutine and is used to periodically
  1108  // revoke Vault accessors that failed the original revocation
  1109  func (v *vaultClient) revokeDaemon() {
  1110  	ticker := time.NewTicker(vaultRevocationIntv)
  1111  	defer ticker.Stop()
  1112  
  1113  	for {
  1114  		select {
  1115  		case <-v.tomb.Dying():
  1116  			return
  1117  		case now := <-ticker.C:
  1118  			if established, _ := v.ConnectionEstablished(); !established {
  1119  				continue
  1120  			}
  1121  
  1122  			v.revLock.Lock()
  1123  
  1124  			// Fast path
  1125  			if len(v.revoking) == 0 {
  1126  				v.revLock.Unlock()
  1127  				continue
  1128  			}
  1129  
  1130  			// Build the list of allocations that need to revoked while pruning any TTL'd checks
  1131  			revoking := make([]*structs.VaultAccessor, 0, len(v.revoking))
  1132  			for va, ttl := range v.revoking {
  1133  				if now.After(ttl) {
  1134  					delete(v.revoking, va)
  1135  				} else {
  1136  					revoking = append(revoking, va)
  1137  				}
  1138  			}
  1139  
  1140  			if err := v.parallelRevoke(context.Background(), revoking); err != nil {
  1141  				v.logger.Printf("[WARN] vault: background token revocation errored: %v", err)
  1142  				v.revLock.Unlock()
  1143  				continue
  1144  			}
  1145  
  1146  			// Unlock before a potentially expensive operation
  1147  			v.revLock.Unlock()
  1148  
  1149  			// Call the passed in token revocation function
  1150  			if err := v.purgeFn(revoking); err != nil {
  1151  				// Can continue since revocation is idempotent
  1152  				v.logger.Printf("[ERR] vault: token revocation errored: %v", err)
  1153  				continue
  1154  			}
  1155  
  1156  			// Track that tokens were revoked successfully
  1157  			metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking)))
  1158  
  1159  			// Can delete from the tracked list now that we have purged
  1160  			v.revLock.Lock()
  1161  			v.statsLock.Lock()
  1162  			for _, va := range revoking {
  1163  				delete(v.revoking, va)
  1164  			}
  1165  			v.stats.TrackedForRevoke = len(v.revoking)
  1166  			v.statsLock.Unlock()
  1167  			v.revLock.Unlock()
  1168  
  1169  		}
  1170  	}
  1171  }
  1172  
  1173  // purgeVaultAccessors creates a Raft transaction to remove the passed Vault
  1174  // Accessors
  1175  func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error {
  1176  	// Commit this update via Raft
  1177  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1178  	_, _, err := s.raftApply(structs.VaultAccessorDeregisterRequestType, req)
  1179  	return err
  1180  }
  1181  
  1182  // wrapNilError is a helper that returns a wrapped function that returns a nil
  1183  // error
  1184  func wrapNilError(f func()) func() error {
  1185  	return func() error {
  1186  		f()
  1187  		return nil
  1188  	}
  1189  }
  1190  
  1191  // setLimit is used to update the rate limit
  1192  func (v *vaultClient) setLimit(l rate.Limit) {
  1193  	v.l.Lock()
  1194  	defer v.l.Unlock()
  1195  	v.limiter = rate.NewLimiter(l, int(l))
  1196  }
  1197  
  1198  // Stats is used to query the state of the blocked eval tracker.
  1199  func (v *vaultClient) Stats() *VaultStats {
  1200  	// Allocate a new stats struct
  1201  	stats := new(VaultStats)
  1202  
  1203  	v.statsLock.RLock()
  1204  	defer v.statsLock.RUnlock()
  1205  
  1206  	// Copy all the stats
  1207  	stats.TrackedForRevoke = v.stats.TrackedForRevoke
  1208  
  1209  	return stats
  1210  }
  1211  
  1212  // EmitStats is used to export metrics about the blocked eval tracker while enabled
  1213  func (v *vaultClient) EmitStats(period time.Duration, stopCh chan struct{}) {
  1214  	for {
  1215  		select {
  1216  		case <-time.After(period):
  1217  			stats := v.Stats()
  1218  			metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke))
  1219  		case <-stopCh:
  1220  			return
  1221  		}
  1222  	}
  1223  }