github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/nomad/vault.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"log"
     8  	"math/rand"
     9  	"regexp"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"gopkg.in/tomb.v2"
    15  
    16  	metrics "github.com/armon/go-metrics"
    17  	multierror "github.com/hashicorp/go-multierror"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  	"github.com/hashicorp/nomad/nomad/structs/config"
    20  	vapi "github.com/hashicorp/vault/api"
    21  	"github.com/mitchellh/mapstructure"
    22  
    23  	"golang.org/x/sync/errgroup"
    24  	"golang.org/x/time/rate"
    25  )
    26  
    27  const (
    28  	// vaultTokenCreateTTL is the duration the wrapped token for the client is
    29  	// valid for. The units are in seconds.
    30  	vaultTokenCreateTTL = "60s"
    31  
    32  	// minimumTokenTTL is the minimum Token TTL allowed for child tokens.
    33  	minimumTokenTTL = 5 * time.Minute
    34  
    35  	// defaultTokenTTL is the default Token TTL used when the passed token is a
    36  	// root token such that child tokens aren't being created against a role
    37  	// that has defined a TTL
    38  	defaultTokenTTL = "72h"
    39  
    40  	// requestRateLimit is the maximum number of requests per second Nomad will
    41  	// make against Vault
    42  	requestRateLimit rate.Limit = 500.0
    43  
    44  	// maxParallelRevokes is the maximum number of parallel Vault
    45  	// token revocation requests
    46  	maxParallelRevokes = 64
    47  
    48  	// vaultRevocationIntv is the interval at which Vault tokens that failed
    49  	// initial revocation are retried
    50  	vaultRevocationIntv = 5 * time.Minute
    51  
    52  	// vaultCapabilitiesLookupPath is the path to lookup the capabilities of
    53  	// ones token.
    54  	vaultCapabilitiesLookupPath = "sys/capabilities-self"
    55  
    56  	// vaultTokenRenewPath is the path used to renew our token
    57  	vaultTokenRenewPath = "auth/token/renew-self"
    58  
    59  	// vaultTokenLookupPath is the path used to lookup a token
    60  	vaultTokenLookupPath = "auth/token/lookup"
    61  
    62  	// vaultTokenLookupSelfPath is the path used to lookup self token
    63  	vaultTokenLookupSelfPath = "auth/token/lookup-self"
    64  
    65  	// vaultTokenRevokePath is the path used to revoke a token
    66  	vaultTokenRevokePath = "auth/token/revoke-accessor"
    67  
    68  	// vaultRoleLookupPath is the path to lookup a role
    69  	vaultRoleLookupPath = "auth/token/roles/%s"
    70  
    71  	// vaultRoleCreatePath is the path to create a token from a role
    72  	vaultTokenRoleCreatePath = "auth/token/create/%s"
    73  )
    74  
    75  var (
    76  	// vaultUnrecoverableError matches unrecoverable errors
    77  	vaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`)
    78  
    79  	// vaultCapabilitiesCapability is the expected capability of Nomad's Vault
    80  	// token on the the path. The token must have at least one of the
    81  	// capabilities.
    82  	vaultCapabilitiesCapability = []string{"update", "root"}
    83  
    84  	// vaultTokenRenewCapability is the expected capability Nomad's
    85  	// Vault token should have on the path. The token must have at least one of
    86  	// the capabilities.
    87  	vaultTokenRenewCapability = []string{"update", "root"}
    88  
    89  	// vaultTokenLookupCapability is the expected capability Nomad's
    90  	// Vault token should have on the path. The token must have at least one of
    91  	// the capabilities.
    92  	vaultTokenLookupCapability = []string{"update", "root"}
    93  
    94  	// vaultTokenLookupSelfCapability is the expected capability Nomad's
    95  	// Vault token should have on the path. The token must have at least one of
    96  	// the capabilities.
    97  	vaultTokenLookupSelfCapability = []string{"update", "root"}
    98  
    99  	// vaultTokenRevokeCapability is the expected capability Nomad's
   100  	// Vault token should have on the path. The token must have at least one of
   101  	// the capabilities.
   102  	vaultTokenRevokeCapability = []string{"update", "root"}
   103  
   104  	// vaultRoleLookupCapability is the the expected capability Nomad's Vault
   105  	// token should have on the path. The token must have at least one of the
   106  	// capabilities.
   107  	vaultRoleLookupCapability = []string{"read", "root"}
   108  
   109  	// vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault
   110  	// token should have on the path. The token must have at least one of the
   111  	// capabilities.
   112  	vaultTokenRoleCreateCapability = []string{"update", "root"}
   113  )
   114  
   115  // VaultClient is the Servers interface for interfacing with Vault
   116  type VaultClient interface {
   117  	// SetActive activates or de-activates the Vault client. When active, token
   118  	// creation/lookup/revocation operation are allowed.
   119  	SetActive(active bool)
   120  
   121  	// SetConfig updates the config used by the Vault client
   122  	SetConfig(config *config.VaultConfig) error
   123  
   124  	// CreateToken takes an allocation and task and returns an appropriate Vault
   125  	// Secret
   126  	CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error)
   127  
   128  	// LookupToken takes a token string and returns its capabilities.
   129  	LookupToken(ctx context.Context, token string) (*vapi.Secret, error)
   130  
   131  	// RevokeTokens takes a set of tokens accessor and revokes the tokens
   132  	RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error
   133  
   134  	// Stop is used to stop token renewal
   135  	Stop()
   136  
   137  	// Running returns whether the Vault client is running
   138  	Running() bool
   139  
   140  	// Stats returns the Vault clients statistics
   141  	Stats() *VaultStats
   142  
   143  	// EmitStats emits that clients statistics at the given period until stopCh
   144  	// is called.
   145  	EmitStats(period time.Duration, stopCh chan struct{})
   146  }
   147  
   148  // VaultStats returns all the stats about Vault tokens created and managed by
   149  // Nomad.
   150  type VaultStats struct {
   151  	// TrackedForRevoke is the count of tokens that are being tracked to be
   152  	// revoked since they could not be immediately revoked.
   153  	TrackedForRevoke int
   154  }
   155  
   156  // PurgeVaultAccessor is called to remove VaultAccessors from the system. If
   157  // the function returns an error, the token will still be tracked and revocation
   158  // will retry till there is a success
   159  type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error
   160  
   161  // tokenData holds the relevant information about the Vault token passed to the
   162  // client.
   163  type tokenData struct {
   164  	CreationTTL int      `mapstructure:"creation_ttl"`
   165  	TTL         int      `mapstructure:"ttl"`
   166  	Renewable   bool     `mapstructure:"renewable"`
   167  	Policies    []string `mapstructure:"policies"`
   168  	Role        string   `mapstructure:"role"`
   169  	Root        bool
   170  }
   171  
   172  // vaultClient is the Servers implementation of the VaultClient interface. The
   173  // client renews the PeriodicToken given in the Vault configuration and provides
   174  // the Server with the ability to create child tokens and lookup the permissions
   175  // of tokens.
   176  type vaultClient struct {
   177  	// limiter is used to rate limit requests to Vault
   178  	limiter *rate.Limiter
   179  
   180  	// client is the Vault API client
   181  	client *vapi.Client
   182  
   183  	// auth is the Vault token auth API client
   184  	auth *vapi.TokenAuth
   185  
   186  	// config is the user passed Vault config
   187  	config *config.VaultConfig
   188  
   189  	// connEstablished marks whether we have an established connection to Vault.
   190  	connEstablished bool
   191  
   192  	// connEstablishedErr marks an error that can occur when establishing a
   193  	// connection
   194  	connEstablishedErr error
   195  
   196  	// token is the raw token used by the client
   197  	token string
   198  
   199  	// tokenData is the data of the passed Vault token
   200  	tokenData *tokenData
   201  
   202  	// revoking tracks the VaultAccessors that must be revoked
   203  	revoking map[*structs.VaultAccessor]time.Time
   204  	purgeFn  PurgeVaultAccessorFn
   205  	revLock  sync.Mutex
   206  
   207  	// active indicates whether the vaultClient is active. It should be
   208  	// accessed using a helper and updated atomically
   209  	active int32
   210  
   211  	// running indicates whether the vault client is started.
   212  	running bool
   213  
   214  	// childTTL is the TTL for child tokens.
   215  	childTTL string
   216  
   217  	// lastRenewed is the time the token was last renewed
   218  	lastRenewed time.Time
   219  
   220  	tomb   *tomb.Tomb
   221  	logger *log.Logger
   222  
   223  	// stats stores the stats
   224  	stats     *VaultStats
   225  	statsLock sync.RWMutex
   226  
   227  	// l is used to lock the configuration aspects of the client such that
   228  	// multiple callers can't cause conflicting config updates
   229  	l sync.Mutex
   230  }
   231  
   232  // NewVaultClient returns a Vault client from the given config. If the client
   233  // couldn't be made an error is returned.
   234  func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) {
   235  	if c == nil {
   236  		return nil, fmt.Errorf("must pass valid VaultConfig")
   237  	}
   238  
   239  	if logger == nil {
   240  		return nil, fmt.Errorf("must pass valid logger")
   241  	}
   242  
   243  	v := &vaultClient{
   244  		config:   c,
   245  		logger:   logger,
   246  		limiter:  rate.NewLimiter(requestRateLimit, int(requestRateLimit)),
   247  		revoking: make(map[*structs.VaultAccessor]time.Time),
   248  		purgeFn:  purgeFn,
   249  		tomb:     &tomb.Tomb{},
   250  		stats:    new(VaultStats),
   251  	}
   252  
   253  	if v.config.IsEnabled() {
   254  		if err := v.buildClient(); err != nil {
   255  			return nil, err
   256  		}
   257  
   258  		// Launch the required goroutines
   259  		v.tomb.Go(wrapNilError(v.establishConnection))
   260  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   261  
   262  		v.running = true
   263  	}
   264  
   265  	return v, nil
   266  }
   267  
   268  func (v *vaultClient) Stop() {
   269  	v.l.Lock()
   270  	running := v.running
   271  	v.running = false
   272  	v.l.Unlock()
   273  
   274  	if running {
   275  		v.tomb.Kill(nil)
   276  		v.tomb.Wait()
   277  		v.flush()
   278  	}
   279  }
   280  
   281  func (v *vaultClient) Running() bool {
   282  	v.l.Lock()
   283  	defer v.l.Unlock()
   284  	return v.running
   285  }
   286  
   287  // SetActive activates or de-activates the Vault client. When active, token
   288  // creation/lookup/revocation operation are allowed. All queued revocations are
   289  // cancelled if set un-active as it is assumed another instances is taking over
   290  func (v *vaultClient) SetActive(active bool) {
   291  	if active {
   292  		atomic.StoreInt32(&v.active, 1)
   293  	} else {
   294  		atomic.StoreInt32(&v.active, 0)
   295  	}
   296  
   297  	// Clear out the revoking tokens
   298  	v.revLock.Lock()
   299  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   300  	v.revLock.Unlock()
   301  
   302  	return
   303  }
   304  
   305  // flush is used to reset the state of the vault client
   306  func (v *vaultClient) flush() {
   307  	v.l.Lock()
   308  	defer v.l.Unlock()
   309  
   310  	v.client = nil
   311  	v.auth = nil
   312  	v.connEstablished = false
   313  	v.connEstablishedErr = nil
   314  	v.token = ""
   315  	v.tokenData = nil
   316  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   317  	v.childTTL = ""
   318  	v.tomb = &tomb.Tomb{}
   319  }
   320  
   321  // SetConfig is used to update the Vault config being used. A temporary outage
   322  // may occur after calling as it re-establishes a connection to Vault
   323  func (v *vaultClient) SetConfig(config *config.VaultConfig) error {
   324  	if config == nil {
   325  		return fmt.Errorf("must pass valid VaultConfig")
   326  	}
   327  
   328  	v.l.Lock()
   329  	defer v.l.Unlock()
   330  
   331  	// Kill any background routintes
   332  	if v.running {
   333  		// Stop accepting any new request
   334  		v.connEstablished = false
   335  
   336  		// Kill any background routine and create a new tomb
   337  		v.tomb.Kill(nil)
   338  		v.tomb.Wait()
   339  		v.tomb = &tomb.Tomb{}
   340  		v.running = false
   341  	}
   342  
   343  	// Store the new config
   344  	v.config = config
   345  
   346  	// Check if we should relaunch
   347  	if v.config.IsEnabled() {
   348  		// Rebuild the client
   349  		if err := v.buildClient(); err != nil {
   350  			return err
   351  		}
   352  
   353  		// Launch the required goroutines
   354  		v.tomb.Go(wrapNilError(v.establishConnection))
   355  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   356  		v.running = true
   357  	}
   358  
   359  	return nil
   360  }
   361  
   362  // buildClient is used to build a Vault client based on the stored Vault config
   363  func (v *vaultClient) buildClient() error {
   364  	// Validate we have the required fields.
   365  	if v.config.Token == "" {
   366  		return errors.New("Vault token must be set")
   367  	} else if v.config.Addr == "" {
   368  		return errors.New("Vault address must be set")
   369  	}
   370  
   371  	// Parse the TTL if it is set
   372  	if v.config.TaskTokenTTL != "" {
   373  		d, err := time.ParseDuration(v.config.TaskTokenTTL)
   374  		if err != nil {
   375  			return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err)
   376  		}
   377  
   378  		if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() {
   379  			return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL)
   380  		}
   381  
   382  		v.childTTL = v.config.TaskTokenTTL
   383  	} else {
   384  		// Default the TaskTokenTTL
   385  		v.childTTL = defaultTokenTTL
   386  	}
   387  
   388  	// Get the Vault API configuration
   389  	apiConf, err := v.config.ApiConfig()
   390  	if err != nil {
   391  		return fmt.Errorf("Failed to create Vault API config: %v", err)
   392  	}
   393  
   394  	// Create the Vault API client
   395  	client, err := vapi.NewClient(apiConf)
   396  	if err != nil {
   397  		v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err)
   398  		return err
   399  	}
   400  
   401  	// Set the token and store the client
   402  	v.token = v.config.Token
   403  	client.SetToken(v.token)
   404  	v.client = client
   405  	v.auth = client.Auth().Token()
   406  	return nil
   407  }
   408  
   409  // establishConnection is used to make first contact with Vault. This should be
   410  // called in a go-routine since the connection is retried til the Vault Client
   411  // is stopped or the connection is successfully made at which point the renew
   412  // loop is started.
   413  func (v *vaultClient) establishConnection() {
   414  	// Create the retry timer and set initial duration to zero so it fires
   415  	// immediately
   416  	retryTimer := time.NewTimer(0)
   417  
   418  OUTER:
   419  	for {
   420  		select {
   421  		case <-v.tomb.Dying():
   422  			return
   423  		case <-retryTimer.C:
   424  			// Ensure the API is reachable
   425  			if _, err := v.client.Sys().InitStatus(); err != nil {
   426  				v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v",
   427  					v.config.ConnectionRetryIntv, err)
   428  				retryTimer.Reset(v.config.ConnectionRetryIntv)
   429  				continue OUTER
   430  			}
   431  
   432  			break OUTER
   433  		}
   434  	}
   435  
   436  	// Retrieve our token, validate it and parse the lease duration
   437  	if err := v.parseSelfToken(); err != nil {
   438  		v.logger.Printf("[ERR] vault: failed to validate self token/role and not retrying: %v", err)
   439  		v.l.Lock()
   440  		v.connEstablished = false
   441  		v.connEstablishedErr = err
   442  		v.l.Unlock()
   443  		return
   444  	}
   445  
   446  	// Set the wrapping function such that token creation is wrapped now
   447  	// that we know our role
   448  	v.client.SetWrappingLookupFunc(v.getWrappingFn())
   449  
   450  	// If we are given a non-root token, start renewing it
   451  	if v.tokenData.Root && v.tokenData.CreationTTL == 0 {
   452  		v.logger.Printf("[DEBUG] vault: not renewing token as it is root")
   453  	} else {
   454  		v.logger.Printf("[DEBUG] vault: token lease duration is %v",
   455  			time.Duration(v.tokenData.CreationTTL)*time.Second)
   456  		v.tomb.Go(wrapNilError(v.renewalLoop))
   457  	}
   458  
   459  	v.l.Lock()
   460  	v.connEstablished = true
   461  	v.connEstablishedErr = nil
   462  	v.l.Unlock()
   463  }
   464  
   465  // renewalLoop runs the renew loop. This should only be called if we are given a
   466  // non-root token.
   467  func (v *vaultClient) renewalLoop() {
   468  	// Create the renewal timer and set initial duration to zero so it fires
   469  	// immediately
   470  	authRenewTimer := time.NewTimer(0)
   471  
   472  	// Backoff is to reduce the rate we try to renew with Vault under error
   473  	// situations
   474  	backoff := 0.0
   475  
   476  	for {
   477  		select {
   478  		case <-v.tomb.Dying():
   479  			return
   480  		case <-authRenewTimer.C:
   481  			// Renew the token and determine the new expiration
   482  			err := v.renew()
   483  			currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second)
   484  
   485  			// Successfully renewed
   486  			if err == nil {
   487  				// If we take the expiration (lastRenewed + auth duration) and
   488  				// subtract the current time, we get a duration until expiry.
   489  				// Set the timer to poke us after half of that time is up.
   490  				durationUntilRenew := currentExpiration.Sub(time.Now()) / 2
   491  
   492  				v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew)
   493  				authRenewTimer.Reset(durationUntilRenew)
   494  
   495  				// Reset any backoff
   496  				backoff = 0
   497  				break
   498  			}
   499  
   500  			// Back off, increasing the amount of backoff each time. There are some rules:
   501  			//
   502  			// * If we have an existing authentication that is going to expire,
   503  			// never back off more than half of the amount of time remaining
   504  			// until expiration
   505  			// * Never back off more than 30 seconds multiplied by a random
   506  			// value between 1 and 2
   507  			// * Use randomness so that many clients won't keep hitting Vault
   508  			// at the same time
   509  
   510  			// Set base values and add some backoff
   511  
   512  			v.logger.Printf("[WARN] vault: got error or bad auth, so backing off: %v", err)
   513  			switch {
   514  			case backoff < 5:
   515  				backoff = 5
   516  			case backoff >= 24:
   517  				backoff = 30
   518  			default:
   519  				backoff = backoff * 1.25
   520  			}
   521  
   522  			// Add randomness
   523  			backoff = backoff * (1.0 + rand.Float64())
   524  
   525  			maxBackoff := currentExpiration.Sub(time.Now()) / 2
   526  			if maxBackoff < 0 {
   527  				// We have failed to renew the token past its expiration. Stop
   528  				// renewing with Vault.
   529  				v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client")
   530  				v.l.Lock()
   531  				v.connEstablished = false
   532  				v.connEstablishedErr = err
   533  				v.l.Unlock()
   534  				return
   535  
   536  			} else if backoff > maxBackoff.Seconds() {
   537  				backoff = maxBackoff.Seconds()
   538  			}
   539  
   540  			durationUntilRetry := time.Duration(backoff) * time.Second
   541  			v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry)
   542  
   543  			authRenewTimer.Reset(durationUntilRetry)
   544  		}
   545  	}
   546  }
   547  
   548  // renew attempts to renew our Vault token. If the renewal fails, an error is
   549  // returned. This method updates the lastRenewed time
   550  func (v *vaultClient) renew() error {
   551  	// Attempt to renew the token
   552  	secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL)
   553  	if err != nil {
   554  		return err
   555  	}
   556  
   557  	auth := secret.Auth
   558  	if auth == nil {
   559  		return fmt.Errorf("renewal successful but not auth information returned")
   560  	} else if auth.LeaseDuration == 0 {
   561  		return fmt.Errorf("renewal successful but no lease duration returned")
   562  	}
   563  
   564  	v.lastRenewed = time.Now()
   565  	v.logger.Printf("[DEBUG] vault: succesfully renewed server token")
   566  	return nil
   567  }
   568  
   569  // getWrappingFn returns an appropriate wrapping function for Nomad Servers
   570  func (v *vaultClient) getWrappingFn() func(operation, path string) string {
   571  	createPath := "auth/token/create"
   572  	role := v.getRole()
   573  	if role != "" {
   574  		createPath = fmt.Sprintf("auth/token/create/%s", role)
   575  	}
   576  
   577  	return func(operation, path string) string {
   578  		// Only wrap the token create operation
   579  		if operation != "POST" || path != createPath {
   580  			return ""
   581  		}
   582  
   583  		return vaultTokenCreateTTL
   584  	}
   585  }
   586  
   587  // parseSelfToken looks up the Vault token in Vault and parses its data storing
   588  // it in the client. If the token is not valid for Nomads purposes an error is
   589  // returned.
   590  func (v *vaultClient) parseSelfToken() error {
   591  	// Get the initial lease duration
   592  	auth := v.client.Auth().Token()
   593  	var self *vapi.Secret
   594  
   595  	// Try looking up the token using the self endpoint
   596  	secret, err := auth.LookupSelf()
   597  	if err != nil {
   598  		// Try looking up our token directly
   599  		self, err = auth.Lookup(v.client.Token())
   600  		if err != nil {
   601  			return fmt.Errorf("failed to lookup Vault periodic token: %v", err)
   602  		}
   603  	}
   604  	self = secret
   605  
   606  	// Read and parse the fields
   607  	var data tokenData
   608  	if err := mapstructure.WeakDecode(self.Data, &data); err != nil {
   609  		return fmt.Errorf("failed to parse Vault token's data block: %v", err)
   610  	}
   611  
   612  	root := false
   613  	for _, p := range data.Policies {
   614  		if p == "root" {
   615  			root = true
   616  			break
   617  		}
   618  	}
   619  
   620  	// Store the token data
   621  	data.Root = root
   622  	v.tokenData = &data
   623  
   624  	// The criteria that must be met for the token to be valid are as follows:
   625  	// 1) If token is non-root or is but has a creation ttl
   626  	//   a) The token must be renewable
   627  	//   b) Token must have a non-zero TTL
   628  	// 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens)
   629  	// 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens)
   630  	// 4) If configured to create tokens against a role:
   631  	//   a) Must have read capability for "auth/token/roles/<role_name" (Can just attemp a read)
   632  	//   b) Must have update capability for path "auth/token/create/<role_name>"
   633  	//   c) Role must:
   634  	//     1) Not allow orphans
   635  	//     2) Must allow tokens to be renewed
   636  	//     3) Must not have an explicit max TTL
   637  	//     4) Must have non-zero period
   638  	// 5) If not configured against a role, the token must be root
   639  
   640  	var mErr multierror.Error
   641  	role := v.getRole()
   642  	if !root {
   643  		// All non-root tokens must be renewable
   644  		if !data.Renewable {
   645  			multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root"))
   646  		}
   647  
   648  		// All non-root tokens must have a lease duration
   649  		if data.CreationTTL == 0 {
   650  			multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero"))
   651  		}
   652  
   653  		// The lease duration can not be expired
   654  		if data.TTL == 0 {
   655  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   656  		}
   657  
   658  		// There must be a valid role since we aren't root
   659  		if role == "" {
   660  			multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token"))
   661  		}
   662  
   663  	} else if data.CreationTTL != 0 {
   664  		// If the root token has a TTL it must be renewable
   665  		if !data.Renewable {
   666  			multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable"))
   667  		} else if data.TTL == 0 {
   668  			// If the token has a TTL make sure it has not expired
   669  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   670  		}
   671  	}
   672  
   673  	// Check we have the correct capabilities
   674  	if err := v.validateCapabilities(role, root); err != nil {
   675  		multierror.Append(&mErr, err)
   676  	}
   677  
   678  	// If given a role validate it
   679  	if role != "" {
   680  		if err := v.validateRole(role); err != nil {
   681  			multierror.Append(&mErr, err)
   682  		}
   683  	}
   684  
   685  	return mErr.ErrorOrNil()
   686  }
   687  
   688  // getRole returns the role name to be used when creating tokens
   689  func (v *vaultClient) getRole() string {
   690  	if v.config.Role != "" {
   691  		return v.config.Role
   692  	}
   693  
   694  	return v.tokenData.Role
   695  }
   696  
   697  // validateCapabilities checks that Nomad's Vault token has the correct
   698  // capabilities.
   699  func (v *vaultClient) validateCapabilities(role string, root bool) error {
   700  	// Check if the token can lookup capabilities.
   701  	var mErr multierror.Error
   702  	_, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability)
   703  	if err != nil {
   704  		// Check if there is a permission denied
   705  		if vaultUnrecoverableError.MatchString(err.Error()) {
   706  			// Since we can't read permissions, we just log a warning that we
   707  			// can't tell if the Vault token will work
   708  			msg := fmt.Sprintf("Can not lookup token capabilities. "+
   709  				"As such certain operations may fail in the future. "+
   710  				"Please give Nomad a Vault token with one of the following "+
   711  				"capabilities %q on %q so that the required capabilities can be verified",
   712  				vaultCapabilitiesCapability, vaultCapabilitiesLookupPath)
   713  			v.logger.Printf("[WARN] vault: %s", msg)
   714  			return nil
   715  		} else {
   716  			multierror.Append(&mErr, err)
   717  		}
   718  	}
   719  
   720  	// verify is a helper function that verifies the token has one of the
   721  	// capabilities on the given path and adds an issue to the error
   722  	verify := func(path string, requiredCaps []string) {
   723  		ok, caps, err := v.hasCapability(path, requiredCaps)
   724  		if err != nil {
   725  			multierror.Append(&mErr, err)
   726  		} else if !ok {
   727  			multierror.Append(&mErr,
   728  				fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps))
   729  		}
   730  	}
   731  
   732  	// Check if we are verifying incoming tokens
   733  	if !v.config.AllowsUnauthenticated() {
   734  		verify(vaultTokenLookupPath, vaultTokenLookupCapability)
   735  	}
   736  
   737  	// Verify we can renew our selves tokens
   738  	verify(vaultTokenRenewPath, vaultTokenRenewCapability)
   739  
   740  	// Verify we can revoke tokens
   741  	verify(vaultTokenRevokePath, vaultTokenRevokeCapability)
   742  
   743  	// If we are using a role verify the capability
   744  	if role != "" {
   745  		// Verify we can read the role
   746  		verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability)
   747  
   748  		// Verify we can create from the role
   749  		verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability)
   750  	}
   751  
   752  	return mErr.ErrorOrNil()
   753  }
   754  
   755  // hasCapability takes a path and returns whether the token has at least one of
   756  // the required capabilities on the given path. It also returns the set of
   757  // capabilities the token does have as well as any error that occured.
   758  func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) {
   759  	caps, err := v.client.Sys().CapabilitiesSelf(path)
   760  	if err != nil {
   761  		return false, nil, err
   762  	}
   763  	for _, c := range caps {
   764  		for _, r := range required {
   765  			if c == r {
   766  				return true, caps, nil
   767  			}
   768  		}
   769  	}
   770  	return false, caps, nil
   771  }
   772  
   773  // validateRole contacts Vault and checks that the given Vault role is valid for
   774  // the purposes of being used by Nomad
   775  func (v *vaultClient) validateRole(role string) error {
   776  	if role == "" {
   777  		return fmt.Errorf("Invalid empty role name")
   778  	}
   779  
   780  	// Validate the role
   781  	rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role))
   782  	if err != nil {
   783  		return fmt.Errorf("failed to lookup role %q: %v", role, err)
   784  	}
   785  	if rsecret == nil {
   786  		return fmt.Errorf("Role %q does not exist", role)
   787  	}
   788  
   789  	// Read and parse the fields
   790  	var data struct {
   791  		ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"`
   792  		Orphan         bool
   793  		Period         int
   794  		Renewable      bool
   795  	}
   796  	if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil {
   797  		return fmt.Errorf("failed to parse Vault role's data block: %v", err)
   798  	}
   799  
   800  	// Validate the role is acceptable
   801  	var mErr multierror.Error
   802  	if data.Orphan {
   803  		multierror.Append(&mErr, fmt.Errorf("Role must not allow orphans"))
   804  	}
   805  
   806  	if !data.Renewable {
   807  		multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed"))
   808  	}
   809  
   810  	if data.ExplicitMaxTtl != 0 {
   811  		multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic."))
   812  	}
   813  
   814  	if data.Period == 0 {
   815  		multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic."))
   816  	}
   817  
   818  	return mErr.ErrorOrNil()
   819  }
   820  
   821  // ConnectionEstablished returns whether a connection to Vault has been
   822  // established and any error that potentially caused it to be false
   823  func (v *vaultClient) ConnectionEstablished() (bool, error) {
   824  	v.l.Lock()
   825  	defer v.l.Unlock()
   826  	return v.connEstablished, v.connEstablishedErr
   827  }
   828  
   829  // Enabled returns whether the client is active
   830  func (v *vaultClient) Enabled() bool {
   831  	v.l.Lock()
   832  	defer v.l.Unlock()
   833  	return v.config.IsEnabled()
   834  }
   835  
   836  // Active returns whether the client is active
   837  func (v *vaultClient) Active() bool {
   838  	return atomic.LoadInt32(&v.active) == 1
   839  }
   840  
   841  // CreateToken takes the allocation and task and returns an appropriate Vault
   842  // token. The call is rate limited and may be canceled with the passed policy.
   843  // When the error is recoverable, it will be of type RecoverableError
   844  func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) {
   845  	if !v.Enabled() {
   846  		return nil, fmt.Errorf("Vault integration disabled")
   847  	}
   848  	if !v.Active() {
   849  		return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true)
   850  	}
   851  
   852  	// Check if we have established a connection with Vault
   853  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   854  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   855  	} else if !established {
   856  		return nil, fmt.Errorf("Connection to Vault failed: %v", err)
   857  	}
   858  
   859  	// Track how long the request takes
   860  	defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now())
   861  
   862  	// Retrieve the Vault block for the task
   863  	policies := a.Job.VaultPolicies()
   864  	if policies == nil {
   865  		return nil, fmt.Errorf("Job doesn't require Vault policies")
   866  	}
   867  	tg, ok := policies[a.TaskGroup]
   868  	if !ok {
   869  		return nil, fmt.Errorf("Task group does not require Vault policies")
   870  	}
   871  	taskVault, ok := tg[task]
   872  	if !ok {
   873  		return nil, fmt.Errorf("Task does not require Vault policies")
   874  	}
   875  
   876  	// Build the creation request
   877  	req := &vapi.TokenCreateRequest{
   878  		Policies: taskVault.Policies,
   879  		Metadata: map[string]string{
   880  			"AllocationID": a.ID,
   881  			"Task":         task,
   882  			"NodeID":       a.NodeID,
   883  		},
   884  		TTL:         v.childTTL,
   885  		DisplayName: fmt.Sprintf("%s-%s", a.ID, task),
   886  	}
   887  
   888  	// Ensure we are under our rate limit
   889  	if err := v.limiter.Wait(ctx); err != nil {
   890  		return nil, err
   891  	}
   892  
   893  	// Make the request and switch depending on whether we are using a root
   894  	// token or a role based token
   895  	var secret *vapi.Secret
   896  	var err error
   897  	role := v.getRole()
   898  	if v.tokenData.Root && role == "" {
   899  		req.Period = v.childTTL
   900  		secret, err = v.auth.Create(req)
   901  	} else {
   902  		// Make the token using the role
   903  		secret, err = v.auth.CreateWithRole(req, v.getRole())
   904  	}
   905  
   906  	// Determine whether it is unrecoverable
   907  	if err != nil {
   908  		if vaultUnrecoverableError.MatchString(err.Error()) {
   909  			return secret, err
   910  		}
   911  
   912  		// The error is recoverable
   913  		return nil, structs.NewRecoverableError(err, true)
   914  	}
   915  
   916  	return secret, nil
   917  }
   918  
   919  // LookupToken takes a Vault token and does a lookup against Vault. The call is
   920  // rate limited and may be canceled with passed context.
   921  func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) {
   922  	if !v.Enabled() {
   923  		return nil, fmt.Errorf("Vault integration disabled")
   924  	}
   925  
   926  	if !v.Active() {
   927  		return nil, fmt.Errorf("Vault client not active")
   928  	}
   929  
   930  	// Check if we have established a connection with Vault
   931  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   932  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   933  	} else if !established {
   934  		return nil, fmt.Errorf("Connection to Vault failed: %v", err)
   935  	}
   936  
   937  	// Track how long the request takes
   938  	defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now())
   939  
   940  	// Ensure we are under our rate limit
   941  	if err := v.limiter.Wait(ctx); err != nil {
   942  		return nil, err
   943  	}
   944  
   945  	// Lookup the token
   946  	return v.auth.Lookup(token)
   947  }
   948  
   949  // PoliciesFrom parses the set of policies returned by a token lookup.
   950  func PoliciesFrom(s *vapi.Secret) ([]string, error) {
   951  	if s == nil {
   952  		return nil, fmt.Errorf("cannot parse nil Vault secret")
   953  	}
   954  	var data tokenData
   955  	if err := mapstructure.WeakDecode(s.Data, &data); err != nil {
   956  		return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err)
   957  	}
   958  
   959  	return data.Policies, nil
   960  }
   961  
   962  // RevokeTokens revokes the passed set of accessors. If committed is set, the
   963  // purge function passed to the client is called. If there is an error purging
   964  // either because of Vault failures or because of the purge function, the
   965  // revocation is retried until the tokens TTL.
   966  func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error {
   967  	if !v.Enabled() {
   968  		return nil
   969  	}
   970  
   971  	if !v.Active() {
   972  		return fmt.Errorf("Vault client not active")
   973  	}
   974  
   975  	// Track how long the request takes
   976  	defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now())
   977  
   978  	// Check if we have established a connection with Vault. If not just add it
   979  	// to the queue
   980  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   981  		// Only bother tracking it for later revocation if the accessor was
   982  		// committed
   983  		if committed {
   984  			v.storeForRevocation(accessors)
   985  		}
   986  
   987  		// Track that we are abandoning these accessors.
   988  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
   989  		return nil
   990  	}
   991  
   992  	// Attempt to revoke immediately and if it fails, add it to the revoke queue
   993  	err := v.parallelRevoke(ctx, accessors)
   994  	if err != nil {
   995  		// If it is uncommitted, it is a best effort revoke as it will shortly
   996  		// TTL within the cubbyhole and has not been leaked to any outside
   997  		// system
   998  		if !committed {
   999  			metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
  1000  			return nil
  1001  		}
  1002  
  1003  		v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt til TTL: %v", err)
  1004  		v.storeForRevocation(accessors)
  1005  		return nil
  1006  	} else if !committed {
  1007  		// Mark that it was revoked but there is nothing to purge so exit
  1008  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors)))
  1009  		return nil
  1010  	}
  1011  
  1012  	if err := v.purgeFn(accessors); err != nil {
  1013  		v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err)
  1014  		v.storeForRevocation(accessors)
  1015  		return nil
  1016  	}
  1017  
  1018  	// Track that it was revoked successfully
  1019  	metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors)))
  1020  
  1021  	return nil
  1022  }
  1023  
  1024  // storeForRevocation stores the passed set of accessors for revocation. It
  1025  // captrues their effective TTL by storing their create TTL plus the current
  1026  // time.
  1027  func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) {
  1028  	v.revLock.Lock()
  1029  	v.statsLock.Lock()
  1030  	now := time.Now()
  1031  	for _, a := range accessors {
  1032  		v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second)
  1033  	}
  1034  	v.stats.TrackedForRevoke = len(v.revoking)
  1035  	v.statsLock.Unlock()
  1036  	v.revLock.Unlock()
  1037  }
  1038  
  1039  // parallelRevoke revokes the passed VaultAccessors in parallel.
  1040  func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error {
  1041  	if !v.Enabled() {
  1042  		return fmt.Errorf("Vault integration disabled")
  1043  	}
  1044  
  1045  	if !v.Active() {
  1046  		return fmt.Errorf("Vault client not active")
  1047  	}
  1048  
  1049  	// Check if we have established a connection with Vault
  1050  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1051  		return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
  1052  	} else if !established {
  1053  		return fmt.Errorf("Connection to Vault failed: %v", err)
  1054  	}
  1055  
  1056  	g, pCtx := errgroup.WithContext(ctx)
  1057  
  1058  	// Cap the handlers
  1059  	handlers := len(accessors)
  1060  	if handlers > maxParallelRevokes {
  1061  		handlers = maxParallelRevokes
  1062  	}
  1063  
  1064  	// Create the Vault Tokens
  1065  	input := make(chan *structs.VaultAccessor, handlers)
  1066  	for i := 0; i < handlers; i++ {
  1067  		g.Go(func() error {
  1068  			for {
  1069  				select {
  1070  				case va, ok := <-input:
  1071  					if !ok {
  1072  						return nil
  1073  					}
  1074  
  1075  					if err := v.auth.RevokeAccessor(va.Accessor); err != nil {
  1076  						return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err)
  1077  					}
  1078  				case <-pCtx.Done():
  1079  					return nil
  1080  				}
  1081  			}
  1082  		})
  1083  	}
  1084  
  1085  	// Send the input
  1086  	go func() {
  1087  		defer close(input)
  1088  		for _, va := range accessors {
  1089  			select {
  1090  			case <-pCtx.Done():
  1091  				return
  1092  			case input <- va:
  1093  			}
  1094  		}
  1095  
  1096  	}()
  1097  
  1098  	// Wait for everything to complete
  1099  	return g.Wait()
  1100  }
  1101  
  1102  // revokeDaemon should be called in a goroutine and is used to periodically
  1103  // revoke Vault accessors that failed the original revocation
  1104  func (v *vaultClient) revokeDaemon() {
  1105  	ticker := time.NewTicker(vaultRevocationIntv)
  1106  	defer ticker.Stop()
  1107  
  1108  	for {
  1109  		select {
  1110  		case <-v.tomb.Dying():
  1111  			return
  1112  		case now := <-ticker.C:
  1113  			if established, _ := v.ConnectionEstablished(); !established {
  1114  				continue
  1115  			}
  1116  
  1117  			v.revLock.Lock()
  1118  
  1119  			// Fast path
  1120  			if len(v.revoking) == 0 {
  1121  				v.revLock.Unlock()
  1122  				continue
  1123  			}
  1124  
  1125  			// Build the list of allocations that need to revoked while pruning any TTL'd checks
  1126  			revoking := make([]*structs.VaultAccessor, 0, len(v.revoking))
  1127  			for va, ttl := range v.revoking {
  1128  				if now.After(ttl) {
  1129  					delete(v.revoking, va)
  1130  				} else {
  1131  					revoking = append(revoking, va)
  1132  				}
  1133  			}
  1134  
  1135  			if err := v.parallelRevoke(context.Background(), revoking); err != nil {
  1136  				v.logger.Printf("[WARN] vault: background token revocation errored: %v", err)
  1137  				v.revLock.Unlock()
  1138  				continue
  1139  			}
  1140  
  1141  			// Unlock before a potentially expensive operation
  1142  			v.revLock.Unlock()
  1143  
  1144  			// Call the passed in token revocation function
  1145  			if err := v.purgeFn(revoking); err != nil {
  1146  				// Can continue since revocation is idempotent
  1147  				v.logger.Printf("[ERR] vault: token revocation errored: %v", err)
  1148  				continue
  1149  			}
  1150  
  1151  			// Track that tokens were revoked successfully
  1152  			metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking)))
  1153  
  1154  			// Can delete from the tracked list now that we have purged
  1155  			v.revLock.Lock()
  1156  			v.statsLock.Lock()
  1157  			for _, va := range revoking {
  1158  				delete(v.revoking, va)
  1159  			}
  1160  			v.stats.TrackedForRevoke = len(v.revoking)
  1161  			v.statsLock.Unlock()
  1162  			v.revLock.Unlock()
  1163  
  1164  		}
  1165  	}
  1166  }
  1167  
  1168  // purgeVaultAccessors creates a Raft transaction to remove the passed Vault
  1169  // Accessors
  1170  func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error {
  1171  	// Commit this update via Raft
  1172  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1173  	_, _, err := s.raftApply(structs.VaultAccessorDegisterRequestType, req)
  1174  	return err
  1175  }
  1176  
  1177  // wrapNilError is a helper that returns a wrapped function that returns a nil
  1178  // error
  1179  func wrapNilError(f func()) func() error {
  1180  	return func() error {
  1181  		f()
  1182  		return nil
  1183  	}
  1184  }
  1185  
  1186  // setLimit is used to update the rate limit
  1187  func (v *vaultClient) setLimit(l rate.Limit) {
  1188  	v.l.Lock()
  1189  	defer v.l.Unlock()
  1190  	v.limiter = rate.NewLimiter(l, int(l))
  1191  }
  1192  
  1193  // Stats is used to query the state of the blocked eval tracker.
  1194  func (v *vaultClient) Stats() *VaultStats {
  1195  	// Allocate a new stats struct
  1196  	stats := new(VaultStats)
  1197  
  1198  	v.statsLock.RLock()
  1199  	defer v.statsLock.RUnlock()
  1200  
  1201  	// Copy all the stats
  1202  	stats.TrackedForRevoke = v.stats.TrackedForRevoke
  1203  
  1204  	return stats
  1205  }
  1206  
  1207  // EmitStats is used to export metrics about the blocked eval tracker while enabled
  1208  func (v *vaultClient) EmitStats(period time.Duration, stopCh chan struct{}) {
  1209  	for {
  1210  		select {
  1211  		case <-time.After(period):
  1212  			stats := v.Stats()
  1213  			metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke))
  1214  		case <-stopCh:
  1215  			return
  1216  		}
  1217  	}
  1218  }