github.com/emate/nomad@v0.8.2-wo-binpacking/nomad/vault.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"log"
     8  	"math/rand"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"gopkg.in/tomb.v2"
    14  
    15  	metrics "github.com/armon/go-metrics"
    16  	multierror "github.com/hashicorp/go-multierror"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/hashicorp/nomad/nomad/structs/config"
    19  	vapi "github.com/hashicorp/vault/api"
    20  	"github.com/mitchellh/mapstructure"
    21  
    22  	"golang.org/x/sync/errgroup"
    23  	"golang.org/x/time/rate"
    24  )
    25  
    26  const (
    27  	// vaultTokenCreateTTL is the duration the wrapped token for the client is
    28  	// valid for. The units are in seconds.
    29  	vaultTokenCreateTTL = "60s"
    30  
    31  	// minimumTokenTTL is the minimum Token TTL allowed for child tokens.
    32  	minimumTokenTTL = 5 * time.Minute
    33  
    34  	// defaultTokenTTL is the default Token TTL used when the passed token is a
    35  	// root token such that child tokens aren't being created against a role
    36  	// that has defined a TTL
    37  	defaultTokenTTL = "72h"
    38  
    39  	// requestRateLimit is the maximum number of requests per second Nomad will
    40  	// make against Vault
    41  	requestRateLimit rate.Limit = 500.0
    42  
    43  	// maxParallelRevokes is the maximum number of parallel Vault
    44  	// token revocation requests
    45  	maxParallelRevokes = 64
    46  
    47  	// vaultRevocationIntv is the interval at which Vault tokens that failed
    48  	// initial revocation are retried
    49  	vaultRevocationIntv = 5 * time.Minute
    50  
    51  	// vaultCapabilitiesLookupPath is the path to lookup the capabilities of
    52  	// ones token.
    53  	vaultCapabilitiesLookupPath = "sys/capabilities-self"
    54  
    55  	// vaultTokenRenewPath is the path used to renew our token
    56  	vaultTokenRenewPath = "auth/token/renew-self"
    57  
    58  	// vaultTokenLookupPath is the path used to lookup a token
    59  	vaultTokenLookupPath = "auth/token/lookup"
    60  
    61  	// vaultTokenRevokePath is the path used to revoke a token
    62  	vaultTokenRevokePath = "auth/token/revoke-accessor"
    63  
    64  	// vaultRoleLookupPath is the path to lookup a role
    65  	vaultRoleLookupPath = "auth/token/roles/%s"
    66  
    67  	// vaultRoleCreatePath is the path to create a token from a role
    68  	vaultTokenRoleCreatePath = "auth/token/create/%s"
    69  )
    70  
    71  var (
    72  	// vaultCapabilitiesCapability is the expected capability of Nomad's Vault
    73  	// token on the the path. The token must have at least one of the
    74  	// capabilities.
    75  	vaultCapabilitiesCapability = []string{"update", "root"}
    76  
    77  	// vaultTokenRenewCapability is the expected capability Nomad's
    78  	// Vault token should have on the path. The token must have at least one of
    79  	// the capabilities.
    80  	vaultTokenRenewCapability = []string{"update", "root"}
    81  
    82  	// vaultTokenLookupCapability is the expected capability Nomad's
    83  	// Vault token should have on the path. The token must have at least one of
    84  	// the capabilities.
    85  	vaultTokenLookupCapability = []string{"update", "root"}
    86  
    87  	// vaultTokenRevokeCapability is the expected capability Nomad's
    88  	// Vault token should have on the path. The token must have at least one of
    89  	// the capabilities.
    90  	vaultTokenRevokeCapability = []string{"update", "root"}
    91  
    92  	// vaultRoleLookupCapability is the the expected capability Nomad's Vault
    93  	// token should have on the path. The token must have at least one of the
    94  	// capabilities.
    95  	vaultRoleLookupCapability = []string{"read", "root"}
    96  
    97  	// vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault
    98  	// token should have on the path. The token must have at least one of the
    99  	// capabilities.
   100  	vaultTokenRoleCreateCapability = []string{"update", "root"}
   101  )
   102  
   103  // VaultClient is the Servers interface for interfacing with Vault
   104  type VaultClient interface {
   105  	// SetActive activates or de-activates the Vault client. When active, token
   106  	// creation/lookup/revocation operation are allowed.
   107  	SetActive(active bool)
   108  
   109  	// SetConfig updates the config used by the Vault client
   110  	SetConfig(config *config.VaultConfig) error
   111  
   112  	// CreateToken takes an allocation and task and returns an appropriate Vault
   113  	// Secret
   114  	CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error)
   115  
   116  	// LookupToken takes a token string and returns its capabilities.
   117  	LookupToken(ctx context.Context, token string) (*vapi.Secret, error)
   118  
   119  	// RevokeTokens takes a set of tokens accessor and revokes the tokens
   120  	RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error
   121  
   122  	// Stop is used to stop token renewal
   123  	Stop()
   124  
   125  	// Running returns whether the Vault client is running
   126  	Running() bool
   127  
   128  	// Stats returns the Vault clients statistics
   129  	Stats() *VaultStats
   130  
   131  	// EmitStats emits that clients statistics at the given period until stopCh
   132  	// is called.
   133  	EmitStats(period time.Duration, stopCh chan struct{})
   134  }
   135  
   136  // VaultStats returns all the stats about Vault tokens created and managed by
   137  // Nomad.
   138  type VaultStats struct {
   139  	// TrackedForRevoke is the count of tokens that are being tracked to be
   140  	// revoked since they could not be immediately revoked.
   141  	TrackedForRevoke int
   142  }
   143  
   144  // PurgeVaultAccessor is called to remove VaultAccessors from the system. If
   145  // the function returns an error, the token will still be tracked and revocation
   146  // will retry till there is a success
   147  type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error
   148  
   149  // tokenData holds the relevant information about the Vault token passed to the
   150  // client.
   151  type tokenData struct {
   152  	CreationTTL int      `mapstructure:"creation_ttl"`
   153  	TTL         int      `mapstructure:"ttl"`
   154  	Renewable   bool     `mapstructure:"renewable"`
   155  	Policies    []string `mapstructure:"policies"`
   156  	Role        string   `mapstructure:"role"`
   157  	Root        bool
   158  }
   159  
   160  // vaultClient is the Servers implementation of the VaultClient interface. The
   161  // client renews the PeriodicToken given in the Vault configuration and provides
   162  // the Server with the ability to create child tokens and lookup the permissions
   163  // of tokens.
   164  type vaultClient struct {
   165  	// limiter is used to rate limit requests to Vault
   166  	limiter *rate.Limiter
   167  
   168  	// client is the Vault API client
   169  	client *vapi.Client
   170  
   171  	// auth is the Vault token auth API client
   172  	auth *vapi.TokenAuth
   173  
   174  	// config is the user passed Vault config
   175  	config *config.VaultConfig
   176  
   177  	// connEstablished marks whether we have an established connection to Vault.
   178  	connEstablished bool
   179  
   180  	// connEstablishedErr marks an error that can occur when establishing a
   181  	// connection
   182  	connEstablishedErr error
   183  
   184  	// token is the raw token used by the client
   185  	token string
   186  
   187  	// tokenData is the data of the passed Vault token
   188  	tokenData *tokenData
   189  
   190  	// revoking tracks the VaultAccessors that must be revoked
   191  	revoking map[*structs.VaultAccessor]time.Time
   192  	purgeFn  PurgeVaultAccessorFn
   193  	revLock  sync.Mutex
   194  
   195  	// active indicates whether the vaultClient is active. It should be
   196  	// accessed using a helper and updated atomically
   197  	active int32
   198  
   199  	// running indicates whether the vault client is started.
   200  	running bool
   201  
   202  	// childTTL is the TTL for child tokens.
   203  	childTTL string
   204  
   205  	// lastRenewed is the time the token was last renewed
   206  	lastRenewed time.Time
   207  
   208  	tomb   *tomb.Tomb
   209  	logger *log.Logger
   210  
   211  	// stats stores the stats
   212  	stats     *VaultStats
   213  	statsLock sync.RWMutex
   214  
   215  	// l is used to lock the configuration aspects of the client such that
   216  	// multiple callers can't cause conflicting config updates
   217  	l sync.Mutex
   218  }
   219  
   220  // NewVaultClient returns a Vault client from the given config. If the client
   221  // couldn't be made an error is returned.
   222  func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) {
   223  	if c == nil {
   224  		return nil, fmt.Errorf("must pass valid VaultConfig")
   225  	}
   226  
   227  	if logger == nil {
   228  		return nil, fmt.Errorf("must pass valid logger")
   229  	}
   230  
   231  	v := &vaultClient{
   232  		config:   c,
   233  		logger:   logger,
   234  		limiter:  rate.NewLimiter(requestRateLimit, int(requestRateLimit)),
   235  		revoking: make(map[*structs.VaultAccessor]time.Time),
   236  		purgeFn:  purgeFn,
   237  		tomb:     &tomb.Tomb{},
   238  		stats:    new(VaultStats),
   239  	}
   240  
   241  	if v.config.IsEnabled() {
   242  		if err := v.buildClient(); err != nil {
   243  			return nil, err
   244  		}
   245  
   246  		// Launch the required goroutines
   247  		v.tomb.Go(wrapNilError(v.establishConnection))
   248  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   249  
   250  		v.running = true
   251  	}
   252  
   253  	return v, nil
   254  }
   255  
   256  func (v *vaultClient) Stop() {
   257  	v.l.Lock()
   258  	running := v.running
   259  	v.running = false
   260  	v.l.Unlock()
   261  
   262  	if running {
   263  		v.tomb.Kill(nil)
   264  		v.tomb.Wait()
   265  		v.flush()
   266  	}
   267  }
   268  
   269  func (v *vaultClient) Running() bool {
   270  	v.l.Lock()
   271  	defer v.l.Unlock()
   272  	return v.running
   273  }
   274  
   275  // SetActive activates or de-activates the Vault client. When active, token
   276  // creation/lookup/revocation operation are allowed. All queued revocations are
   277  // cancelled if set un-active as it is assumed another instances is taking over
   278  func (v *vaultClient) SetActive(active bool) {
   279  	if active {
   280  		atomic.StoreInt32(&v.active, 1)
   281  	} else {
   282  		atomic.StoreInt32(&v.active, 0)
   283  	}
   284  
   285  	// Clear out the revoking tokens
   286  	v.revLock.Lock()
   287  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   288  	v.revLock.Unlock()
   289  
   290  	return
   291  }
   292  
   293  // flush is used to reset the state of the vault client
   294  func (v *vaultClient) flush() {
   295  	v.l.Lock()
   296  	defer v.l.Unlock()
   297  
   298  	v.client = nil
   299  	v.auth = nil
   300  	v.connEstablished = false
   301  	v.connEstablishedErr = nil
   302  	v.token = ""
   303  	v.tokenData = nil
   304  	v.revoking = make(map[*structs.VaultAccessor]time.Time)
   305  	v.childTTL = ""
   306  	v.tomb = &tomb.Tomb{}
   307  }
   308  
   309  // SetConfig is used to update the Vault config being used. A temporary outage
   310  // may occur after calling as it re-establishes a connection to Vault
   311  func (v *vaultClient) SetConfig(config *config.VaultConfig) error {
   312  	if config == nil {
   313  		return fmt.Errorf("must pass valid VaultConfig")
   314  	}
   315  
   316  	v.l.Lock()
   317  	defer v.l.Unlock()
   318  
   319  	// Kill any background routines
   320  	if v.running {
   321  		// Stop accepting any new request
   322  		v.connEstablished = false
   323  
   324  		// Kill any background routine and create a new tomb
   325  		v.tomb.Kill(nil)
   326  		v.tomb.Wait()
   327  		v.tomb = &tomb.Tomb{}
   328  		v.running = false
   329  	}
   330  
   331  	// Store the new config
   332  	v.config = config
   333  
   334  	// Check if we should relaunch
   335  	if v.config.IsEnabled() {
   336  		// Rebuild the client
   337  		if err := v.buildClient(); err != nil {
   338  			return err
   339  		}
   340  
   341  		// Launch the required goroutines
   342  		v.tomb.Go(wrapNilError(v.establishConnection))
   343  		v.tomb.Go(wrapNilError(v.revokeDaemon))
   344  		v.running = true
   345  	}
   346  
   347  	return nil
   348  }
   349  
   350  // buildClient is used to build a Vault client based on the stored Vault config
   351  func (v *vaultClient) buildClient() error {
   352  	// Validate we have the required fields.
   353  	if v.config.Token == "" {
   354  		return errors.New("Vault token must be set")
   355  	} else if v.config.Addr == "" {
   356  		return errors.New("Vault address must be set")
   357  	}
   358  
   359  	// Parse the TTL if it is set
   360  	if v.config.TaskTokenTTL != "" {
   361  		d, err := time.ParseDuration(v.config.TaskTokenTTL)
   362  		if err != nil {
   363  			return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err)
   364  		}
   365  
   366  		if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() {
   367  			return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL)
   368  		}
   369  
   370  		v.childTTL = v.config.TaskTokenTTL
   371  	} else {
   372  		// Default the TaskTokenTTL
   373  		v.childTTL = defaultTokenTTL
   374  	}
   375  
   376  	// Get the Vault API configuration
   377  	apiConf, err := v.config.ApiConfig()
   378  	if err != nil {
   379  		return fmt.Errorf("Failed to create Vault API config: %v", err)
   380  	}
   381  
   382  	// Create the Vault API client
   383  	client, err := vapi.NewClient(apiConf)
   384  	if err != nil {
   385  		v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err)
   386  		return err
   387  	}
   388  
   389  	// Set the token and store the client
   390  	v.token = v.config.Token
   391  	client.SetToken(v.token)
   392  	v.client = client
   393  	v.auth = client.Auth().Token()
   394  	return nil
   395  }
   396  
   397  // establishConnection is used to make first contact with Vault. This should be
   398  // called in a go-routine since the connection is retried until the Vault Client
   399  // is stopped or the connection is successfully made at which point the renew
   400  // loop is started.
   401  func (v *vaultClient) establishConnection() {
   402  	// Create the retry timer and set initial duration to zero so it fires
   403  	// immediately
   404  	retryTimer := time.NewTimer(0)
   405  	initStatus := false
   406  OUTER:
   407  	for {
   408  		select {
   409  		case <-v.tomb.Dying():
   410  			return
   411  		case <-retryTimer.C:
   412  			// Ensure the API is reachable
   413  			if !initStatus {
   414  				if _, err := v.client.Sys().InitStatus(); err != nil {
   415  					v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v",
   416  						v.config.ConnectionRetryIntv, err)
   417  					retryTimer.Reset(v.config.ConnectionRetryIntv)
   418  					continue OUTER
   419  				}
   420  				initStatus = true
   421  			}
   422  			// Retry validating the token till success
   423  			if err := v.parseSelfToken(); err != nil {
   424  				v.logger.Printf("[ERR] vault: failed to validate self token/role. Retrying in %v: %v", v.config.ConnectionRetryIntv, err)
   425  				retryTimer.Reset(v.config.ConnectionRetryIntv)
   426  				v.l.Lock()
   427  				v.connEstablished = true
   428  				v.connEstablishedErr = fmt.Errorf("Nomad Server failed to establish connections to Vault: %v", err)
   429  				v.l.Unlock()
   430  				continue OUTER
   431  			}
   432  			break OUTER
   433  		}
   434  	}
   435  
   436  	// Set the wrapping function such that token creation is wrapped now
   437  	// that we know our role
   438  	v.client.SetWrappingLookupFunc(v.getWrappingFn())
   439  
   440  	// If we are given a non-root token, start renewing it
   441  	if v.tokenData.Root && v.tokenData.CreationTTL == 0 {
   442  		v.logger.Printf("[DEBUG] vault: not renewing token as it is root")
   443  	} else {
   444  		v.logger.Printf("[DEBUG] vault: token lease duration is %v",
   445  			time.Duration(v.tokenData.CreationTTL)*time.Second)
   446  		v.tomb.Go(wrapNilError(v.renewalLoop))
   447  	}
   448  
   449  	v.l.Lock()
   450  	v.connEstablished = true
   451  	v.connEstablishedErr = nil
   452  	v.l.Unlock()
   453  }
   454  
   455  // renewalLoop runs the renew loop. This should only be called if we are given a
   456  // non-root token.
   457  func (v *vaultClient) renewalLoop() {
   458  	// Create the renewal timer and set initial duration to zero so it fires
   459  	// immediately
   460  	authRenewTimer := time.NewTimer(0)
   461  
   462  	// Backoff is to reduce the rate we try to renew with Vault under error
   463  	// situations
   464  	backoff := 0.0
   465  
   466  	for {
   467  		select {
   468  		case <-v.tomb.Dying():
   469  			return
   470  		case <-authRenewTimer.C:
   471  			// Renew the token and determine the new expiration
   472  			err := v.renew()
   473  			currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second)
   474  
   475  			// Successfully renewed
   476  			if err == nil {
   477  				// If we take the expiration (lastRenewed + auth duration) and
   478  				// subtract the current time, we get a duration until expiry.
   479  				// Set the timer to poke us after half of that time is up.
   480  				durationUntilRenew := currentExpiration.Sub(time.Now()) / 2
   481  
   482  				v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew)
   483  				authRenewTimer.Reset(durationUntilRenew)
   484  
   485  				// Reset any backoff
   486  				backoff = 0
   487  				break
   488  			}
   489  
   490  			// Back off, increasing the amount of backoff each time. There are some rules:
   491  			//
   492  			// * If we have an existing authentication that is going to expire,
   493  			// never back off more than half of the amount of time remaining
   494  			// until expiration
   495  			// * Never back off more than 30 seconds multiplied by a random
   496  			// value between 1 and 2
   497  			// * Use randomness so that many clients won't keep hitting Vault
   498  			// at the same time
   499  
   500  			// Set base values and add some backoff
   501  
   502  			v.logger.Printf("[WARN] vault: got error or bad auth, so backing off: %v", err)
   503  			switch {
   504  			case backoff < 5:
   505  				backoff = 5
   506  			case backoff >= 24:
   507  				backoff = 30
   508  			default:
   509  				backoff = backoff * 1.25
   510  			}
   511  
   512  			// Add randomness
   513  			backoff = backoff * (1.0 + rand.Float64())
   514  
   515  			maxBackoff := currentExpiration.Sub(time.Now()) / 2
   516  			if maxBackoff < 0 {
   517  				// We have failed to renew the token past its expiration. Stop
   518  				// renewing with Vault.
   519  				v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client")
   520  				v.l.Lock()
   521  				v.connEstablished = false
   522  				v.connEstablishedErr = err
   523  				v.l.Unlock()
   524  				return
   525  
   526  			} else if backoff > maxBackoff.Seconds() {
   527  				backoff = maxBackoff.Seconds()
   528  			}
   529  
   530  			durationUntilRetry := time.Duration(backoff) * time.Second
   531  			v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry)
   532  
   533  			authRenewTimer.Reset(durationUntilRetry)
   534  		}
   535  	}
   536  }
   537  
   538  // renew attempts to renew our Vault token. If the renewal fails, an error is
   539  // returned. This method updates the lastRenewed time
   540  func (v *vaultClient) renew() error {
   541  	// Attempt to renew the token
   542  	secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL)
   543  	if err != nil {
   544  		return err
   545  	}
   546  
   547  	auth := secret.Auth
   548  	if auth == nil {
   549  		return fmt.Errorf("renewal successful but not auth information returned")
   550  	} else if auth.LeaseDuration == 0 {
   551  		return fmt.Errorf("renewal successful but no lease duration returned")
   552  	}
   553  
   554  	v.lastRenewed = time.Now()
   555  	v.logger.Printf("[DEBUG] vault: successfully renewed server token")
   556  	return nil
   557  }
   558  
   559  // getWrappingFn returns an appropriate wrapping function for Nomad Servers
   560  func (v *vaultClient) getWrappingFn() func(operation, path string) string {
   561  	createPath := "auth/token/create"
   562  	role := v.getRole()
   563  	if role != "" {
   564  		createPath = fmt.Sprintf("auth/token/create/%s", role)
   565  	}
   566  
   567  	return func(operation, path string) string {
   568  		// Only wrap the token create operation
   569  		if operation != "POST" || path != createPath {
   570  			return ""
   571  		}
   572  
   573  		return vaultTokenCreateTTL
   574  	}
   575  }
   576  
   577  // parseSelfToken looks up the Vault token in Vault and parses its data storing
   578  // it in the client. If the token is not valid for Nomads purposes an error is
   579  // returned.
   580  func (v *vaultClient) parseSelfToken() error {
   581  	// Get the initial lease duration
   582  	auth := v.client.Auth().Token()
   583  	var self *vapi.Secret
   584  
   585  	// Try looking up the token using the self endpoint
   586  	secret, err := auth.LookupSelf()
   587  	if err != nil {
   588  		// Try looking up our token directly
   589  		self, err = auth.Lookup(v.client.Token())
   590  		if err != nil {
   591  			return fmt.Errorf("failed to lookup Vault periodic token: %v", err)
   592  		}
   593  	}
   594  	self = secret
   595  
   596  	// Read and parse the fields
   597  	var data tokenData
   598  	if err := mapstructure.WeakDecode(self.Data, &data); err != nil {
   599  		return fmt.Errorf("failed to parse Vault token's data block: %v", err)
   600  	}
   601  
   602  	root := false
   603  	for _, p := range data.Policies {
   604  		if p == "root" {
   605  			root = true
   606  			break
   607  		}
   608  	}
   609  
   610  	// Store the token data
   611  	data.Root = root
   612  	v.tokenData = &data
   613  
   614  	// The criteria that must be met for the token to be valid are as follows:
   615  	// 1) If token is non-root or is but has a creation ttl
   616  	//   a) The token must be renewable
   617  	//   b) Token must have a non-zero TTL
   618  	// 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens)
   619  	// 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens)
   620  	// 4) If configured to create tokens against a role:
   621  	//   a) Must have read capability for "auth/token/roles/<role_name" (Can just attempt a read)
   622  	//   b) Must have update capability for path "auth/token/create/<role_name>"
   623  	//   c) Role must:
   624  	//     1) Must allow tokens to be renewed
   625  	//     2) Must not have an explicit max TTL
   626  	//     3) Must have non-zero period
   627  	// 5) If not configured against a role, the token must be root
   628  
   629  	var mErr multierror.Error
   630  	role := v.getRole()
   631  	if !root {
   632  		// All non-root tokens must be renewable
   633  		if !data.Renewable {
   634  			multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root"))
   635  		}
   636  
   637  		// All non-root tokens must have a lease duration
   638  		if data.CreationTTL == 0 {
   639  			multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero"))
   640  		}
   641  
   642  		// The lease duration can not be expired
   643  		if data.TTL == 0 {
   644  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   645  		}
   646  
   647  		// There must be a valid role since we aren't root
   648  		if role == "" {
   649  			multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token"))
   650  		}
   651  
   652  	} else if data.CreationTTL != 0 {
   653  		// If the root token has a TTL it must be renewable
   654  		if !data.Renewable {
   655  			multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable"))
   656  		} else if data.TTL == 0 {
   657  			// If the token has a TTL make sure it has not expired
   658  			multierror.Append(&mErr, fmt.Errorf("token TTL is zero"))
   659  		}
   660  	}
   661  
   662  	// Check we have the correct capabilities
   663  	if err := v.validateCapabilities(role, root); err != nil {
   664  		multierror.Append(&mErr, err)
   665  	}
   666  
   667  	// If given a role validate it
   668  	if role != "" {
   669  		if err := v.validateRole(role); err != nil {
   670  			multierror.Append(&mErr, err)
   671  		}
   672  	}
   673  
   674  	return mErr.ErrorOrNil()
   675  }
   676  
   677  // getRole returns the role name to be used when creating tokens
   678  func (v *vaultClient) getRole() string {
   679  	if v.config.Role != "" {
   680  		return v.config.Role
   681  	}
   682  
   683  	return v.tokenData.Role
   684  }
   685  
   686  // validateCapabilities checks that Nomad's Vault token has the correct
   687  // capabilities.
   688  func (v *vaultClient) validateCapabilities(role string, root bool) error {
   689  	// Check if the token can lookup capabilities.
   690  	var mErr multierror.Error
   691  	_, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability)
   692  	if err != nil {
   693  		// Check if there is a permission denied
   694  		if structs.VaultUnrecoverableError.MatchString(err.Error()) {
   695  			// Since we can't read permissions, we just log a warning that we
   696  			// can't tell if the Vault token will work
   697  			msg := fmt.Sprintf("Can not lookup token capabilities. "+
   698  				"As such certain operations may fail in the future. "+
   699  				"Please give Nomad a Vault token with one of the following "+
   700  				"capabilities %q on %q so that the required capabilities can be verified",
   701  				vaultCapabilitiesCapability, vaultCapabilitiesLookupPath)
   702  			v.logger.Printf("[WARN] vault: %s", msg)
   703  			return nil
   704  		} else {
   705  			multierror.Append(&mErr, err)
   706  		}
   707  	}
   708  
   709  	// verify is a helper function that verifies the token has one of the
   710  	// capabilities on the given path and adds an issue to the error
   711  	verify := func(path string, requiredCaps []string) {
   712  		ok, caps, err := v.hasCapability(path, requiredCaps)
   713  		if err != nil {
   714  			multierror.Append(&mErr, err)
   715  		} else if !ok {
   716  			multierror.Append(&mErr,
   717  				fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps))
   718  		}
   719  	}
   720  
   721  	// Check if we are verifying incoming tokens
   722  	if !v.config.AllowsUnauthenticated() {
   723  		verify(vaultTokenLookupPath, vaultTokenLookupCapability)
   724  	}
   725  
   726  	// Verify we can renew our selves tokens
   727  	verify(vaultTokenRenewPath, vaultTokenRenewCapability)
   728  
   729  	// Verify we can revoke tokens
   730  	verify(vaultTokenRevokePath, vaultTokenRevokeCapability)
   731  
   732  	// If we are using a role verify the capability
   733  	if role != "" {
   734  		// Verify we can read the role
   735  		verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability)
   736  
   737  		// Verify we can create from the role
   738  		verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability)
   739  	}
   740  
   741  	return mErr.ErrorOrNil()
   742  }
   743  
   744  // hasCapability takes a path and returns whether the token has at least one of
   745  // the required capabilities on the given path. It also returns the set of
   746  // capabilities the token does have as well as any error that occurred.
   747  func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) {
   748  	caps, err := v.client.Sys().CapabilitiesSelf(path)
   749  	if err != nil {
   750  		return false, nil, err
   751  	}
   752  	for _, c := range caps {
   753  		for _, r := range required {
   754  			if c == r {
   755  				return true, caps, nil
   756  			}
   757  		}
   758  	}
   759  	return false, caps, nil
   760  }
   761  
   762  // validateRole contacts Vault and checks that the given Vault role is valid for
   763  // the purposes of being used by Nomad
   764  func (v *vaultClient) validateRole(role string) error {
   765  	if role == "" {
   766  		return fmt.Errorf("Invalid empty role name")
   767  	}
   768  
   769  	// Validate the role
   770  	rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role))
   771  	if err != nil {
   772  		return fmt.Errorf("failed to lookup role %q: %v", role, err)
   773  	}
   774  	if rsecret == nil {
   775  		return fmt.Errorf("Role %q does not exist", role)
   776  	}
   777  
   778  	// Read and parse the fields
   779  	var data struct {
   780  		ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"`
   781  		Orphan         bool
   782  		Period         int
   783  		Renewable      bool
   784  	}
   785  	if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil {
   786  		return fmt.Errorf("failed to parse Vault role's data block: %v", err)
   787  	}
   788  
   789  	// Validate the role is acceptable
   790  	var mErr multierror.Error
   791  	if !data.Renewable {
   792  		multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed"))
   793  	}
   794  
   795  	if data.ExplicitMaxTtl != 0 {
   796  		multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic."))
   797  	}
   798  
   799  	if data.Period == 0 {
   800  		multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic."))
   801  	}
   802  
   803  	return mErr.ErrorOrNil()
   804  }
   805  
   806  // ConnectionEstablished returns whether a connection to Vault has been
   807  // established and any error that potentially caused it to be false
   808  func (v *vaultClient) ConnectionEstablished() (bool, error) {
   809  	v.l.Lock()
   810  	defer v.l.Unlock()
   811  	return v.connEstablished, v.connEstablishedErr
   812  }
   813  
   814  // Enabled returns whether the client is active
   815  func (v *vaultClient) Enabled() bool {
   816  	v.l.Lock()
   817  	defer v.l.Unlock()
   818  	return v.config.IsEnabled()
   819  }
   820  
   821  // Active returns whether the client is active
   822  func (v *vaultClient) Active() bool {
   823  	return atomic.LoadInt32(&v.active) == 1
   824  }
   825  
   826  // CreateToken takes the allocation and task and returns an appropriate Vault
   827  // token. The call is rate limited and may be canceled with the passed policy.
   828  // When the error is recoverable, it will be of type RecoverableError
   829  func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) {
   830  	if !v.Enabled() {
   831  		return nil, fmt.Errorf("Vault integration disabled")
   832  	}
   833  	if !v.Active() {
   834  		return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true)
   835  	}
   836  
   837  	// Check if we have established a connection with Vault
   838  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   839  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   840  	} else if err != nil {
   841  		return nil, err
   842  	}
   843  
   844  	// Track how long the request takes
   845  	defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now())
   846  
   847  	// Retrieve the Vault block for the task
   848  	policies := a.Job.VaultPolicies()
   849  	if policies == nil {
   850  		return nil, fmt.Errorf("Job doesn't require Vault policies")
   851  	}
   852  	tg, ok := policies[a.TaskGroup]
   853  	if !ok {
   854  		return nil, fmt.Errorf("Task group does not require Vault policies")
   855  	}
   856  	taskVault, ok := tg[task]
   857  	if !ok {
   858  		return nil, fmt.Errorf("Task does not require Vault policies")
   859  	}
   860  
   861  	// Build the creation request
   862  	req := &vapi.TokenCreateRequest{
   863  		Policies: taskVault.Policies,
   864  		Metadata: map[string]string{
   865  			"AllocationID": a.ID,
   866  			"Task":         task,
   867  			"NodeID":       a.NodeID,
   868  		},
   869  		TTL:         v.childTTL,
   870  		DisplayName: fmt.Sprintf("%s-%s", a.ID, task),
   871  	}
   872  
   873  	// Ensure we are under our rate limit
   874  	if err := v.limiter.Wait(ctx); err != nil {
   875  		return nil, err
   876  	}
   877  
   878  	// Make the request and switch depending on whether we are using a root
   879  	// token or a role based token
   880  	var secret *vapi.Secret
   881  	var err error
   882  	role := v.getRole()
   883  	if v.tokenData.Root && role == "" {
   884  		req.Period = v.childTTL
   885  		secret, err = v.auth.Create(req)
   886  	} else {
   887  		// Make the token using the role
   888  		secret, err = v.auth.CreateWithRole(req, v.getRole())
   889  	}
   890  
   891  	// Determine whether it is unrecoverable
   892  	if err != nil {
   893  		if structs.VaultUnrecoverableError.MatchString(err.Error()) {
   894  			return secret, err
   895  		}
   896  
   897  		// The error is recoverable
   898  		return nil, structs.NewRecoverableError(err, true)
   899  	}
   900  
   901  	// Validate the response
   902  	var validationErr error
   903  	if secret == nil {
   904  		validationErr = fmt.Errorf("Vault returned nil Secret")
   905  	} else if secret.WrapInfo == nil {
   906  		validationErr = fmt.Errorf("Vault returned Secret with nil WrapInfo. Secret warnings: %v", secret.Warnings)
   907  	} else if secret.WrapInfo.WrappedAccessor == "" {
   908  		validationErr = fmt.Errorf("Vault returned WrapInfo without WrappedAccessor. Secret warnings: %v", secret.Warnings)
   909  	}
   910  	if validationErr != nil {
   911  		v.logger.Printf("[WARN] vault: failed to CreateToken: %v", err)
   912  		return nil, structs.NewRecoverableError(validationErr, true)
   913  	}
   914  
   915  	// Got a valid response
   916  	return secret, nil
   917  }
   918  
   919  // LookupToken takes a Vault token and does a lookup against Vault. The call is
   920  // rate limited and may be canceled with passed context.
   921  func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) {
   922  	if !v.Enabled() {
   923  		return nil, fmt.Errorf("Vault integration disabled")
   924  	}
   925  
   926  	if !v.Active() {
   927  		return nil, fmt.Errorf("Vault client not active")
   928  	}
   929  
   930  	// Check if we have established a connection with Vault
   931  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   932  		return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
   933  	} else if err != nil {
   934  		return nil, err
   935  	}
   936  
   937  	// Track how long the request takes
   938  	defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now())
   939  
   940  	// Ensure we are under our rate limit
   941  	if err := v.limiter.Wait(ctx); err != nil {
   942  		return nil, err
   943  	}
   944  
   945  	// Lookup the token
   946  	return v.auth.Lookup(token)
   947  }
   948  
   949  // PoliciesFrom parses the set of policies returned by a token lookup.
   950  func PoliciesFrom(s *vapi.Secret) ([]string, error) {
   951  	if s == nil {
   952  		return nil, fmt.Errorf("cannot parse nil Vault secret")
   953  	}
   954  	var data tokenData
   955  	if err := mapstructure.WeakDecode(s.Data, &data); err != nil {
   956  		return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err)
   957  	}
   958  
   959  	return data.Policies, nil
   960  }
   961  
   962  // RevokeTokens revokes the passed set of accessors. If committed is set, the
   963  // purge function passed to the client is called. If there is an error purging
   964  // either because of Vault failures or because of the purge function, the
   965  // revocation is retried until the tokens TTL.
   966  func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error {
   967  	if !v.Enabled() {
   968  		return nil
   969  	}
   970  
   971  	if !v.Active() {
   972  		return fmt.Errorf("Vault client not active")
   973  	}
   974  
   975  	// Track how long the request takes
   976  	defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now())
   977  
   978  	// Check if we have established a connection with Vault. If not just add it
   979  	// to the queue
   980  	if established, err := v.ConnectionEstablished(); !established && err == nil {
   981  		// Only bother tracking it for later revocation if the accessor was
   982  		// committed
   983  		if committed {
   984  			v.storeForRevocation(accessors)
   985  		}
   986  
   987  		// Track that we are abandoning these accessors.
   988  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
   989  		return nil
   990  	}
   991  
   992  	// Attempt to revoke immediately and if it fails, add it to the revoke queue
   993  	err := v.parallelRevoke(ctx, accessors)
   994  	if err != nil {
   995  		// If it is uncommitted, it is a best effort revoke as it will shortly
   996  		// TTL within the cubbyhole and has not been leaked to any outside
   997  		// system
   998  		if !committed {
   999  			metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors)))
  1000  			return nil
  1001  		}
  1002  
  1003  		v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt until TTL: %v", err)
  1004  		v.storeForRevocation(accessors)
  1005  		return nil
  1006  	} else if !committed {
  1007  		// Mark that it was revoked but there is nothing to purge so exit
  1008  		metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors)))
  1009  		return nil
  1010  	}
  1011  
  1012  	if err := v.purgeFn(accessors); err != nil {
  1013  		v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err)
  1014  		v.storeForRevocation(accessors)
  1015  		return nil
  1016  	}
  1017  
  1018  	// Track that it was revoked successfully
  1019  	metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors)))
  1020  
  1021  	return nil
  1022  }
  1023  
  1024  // storeForRevocation stores the passed set of accessors for revocation. It
  1025  // captures their effective TTL by storing their create TTL plus the current
  1026  // time.
  1027  func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) {
  1028  	v.revLock.Lock()
  1029  	v.statsLock.Lock()
  1030  	now := time.Now()
  1031  	for _, a := range accessors {
  1032  		v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second)
  1033  	}
  1034  	v.stats.TrackedForRevoke = len(v.revoking)
  1035  	v.statsLock.Unlock()
  1036  	v.revLock.Unlock()
  1037  }
  1038  
  1039  // parallelRevoke revokes the passed VaultAccessors in parallel.
  1040  func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error {
  1041  	if !v.Enabled() {
  1042  		return fmt.Errorf("Vault integration disabled")
  1043  	}
  1044  
  1045  	if !v.Active() {
  1046  		return fmt.Errorf("Vault client not active")
  1047  	}
  1048  
  1049  	// Check if we have established a connection with Vault
  1050  	if established, err := v.ConnectionEstablished(); !established && err == nil {
  1051  		return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true)
  1052  	} else if err != nil {
  1053  		return err
  1054  	}
  1055  
  1056  	g, pCtx := errgroup.WithContext(ctx)
  1057  
  1058  	// Cap the handlers
  1059  	handlers := len(accessors)
  1060  	if handlers > maxParallelRevokes {
  1061  		handlers = maxParallelRevokes
  1062  	}
  1063  
  1064  	// Create the Vault Tokens
  1065  	input := make(chan *structs.VaultAccessor, handlers)
  1066  	for i := 0; i < handlers; i++ {
  1067  		g.Go(func() error {
  1068  			for {
  1069  				select {
  1070  				case va, ok := <-input:
  1071  					if !ok {
  1072  						return nil
  1073  					}
  1074  
  1075  					if err := v.auth.RevokeAccessor(va.Accessor); err != nil {
  1076  						return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err)
  1077  					}
  1078  				case <-pCtx.Done():
  1079  					return nil
  1080  				}
  1081  			}
  1082  		})
  1083  	}
  1084  
  1085  	// Send the input
  1086  	go func() {
  1087  		defer close(input)
  1088  		for _, va := range accessors {
  1089  			select {
  1090  			case <-pCtx.Done():
  1091  				return
  1092  			case input <- va:
  1093  			}
  1094  		}
  1095  
  1096  	}()
  1097  
  1098  	// Wait for everything to complete
  1099  	return g.Wait()
  1100  }
  1101  
  1102  // revokeDaemon should be called in a goroutine and is used to periodically
  1103  // revoke Vault accessors that failed the original revocation
  1104  func (v *vaultClient) revokeDaemon() {
  1105  	ticker := time.NewTicker(vaultRevocationIntv)
  1106  	defer ticker.Stop()
  1107  
  1108  	for {
  1109  		select {
  1110  		case <-v.tomb.Dying():
  1111  			return
  1112  		case now := <-ticker.C:
  1113  			if established, _ := v.ConnectionEstablished(); !established {
  1114  				continue
  1115  			}
  1116  
  1117  			v.revLock.Lock()
  1118  
  1119  			// Fast path
  1120  			if len(v.revoking) == 0 {
  1121  				v.revLock.Unlock()
  1122  				continue
  1123  			}
  1124  
  1125  			// Build the list of allocations that need to revoked while pruning any TTL'd checks
  1126  			revoking := make([]*structs.VaultAccessor, 0, len(v.revoking))
  1127  			for va, ttl := range v.revoking {
  1128  				if now.After(ttl) {
  1129  					delete(v.revoking, va)
  1130  				} else {
  1131  					revoking = append(revoking, va)
  1132  				}
  1133  			}
  1134  
  1135  			if err := v.parallelRevoke(context.Background(), revoking); err != nil {
  1136  				v.logger.Printf("[WARN] vault: background token revocation errored: %v", err)
  1137  				v.revLock.Unlock()
  1138  				continue
  1139  			}
  1140  
  1141  			// Unlock before a potentially expensive operation
  1142  			v.revLock.Unlock()
  1143  
  1144  			// Call the passed in token revocation function
  1145  			if err := v.purgeFn(revoking); err != nil {
  1146  				// Can continue since revocation is idempotent
  1147  				v.logger.Printf("[ERR] vault: token revocation errored: %v", err)
  1148  				continue
  1149  			}
  1150  
  1151  			// Track that tokens were revoked successfully
  1152  			metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking)))
  1153  
  1154  			// Can delete from the tracked list now that we have purged
  1155  			v.revLock.Lock()
  1156  			v.statsLock.Lock()
  1157  			for _, va := range revoking {
  1158  				delete(v.revoking, va)
  1159  			}
  1160  			v.stats.TrackedForRevoke = len(v.revoking)
  1161  			v.statsLock.Unlock()
  1162  			v.revLock.Unlock()
  1163  
  1164  		}
  1165  	}
  1166  }
  1167  
  1168  // purgeVaultAccessors creates a Raft transaction to remove the passed Vault
  1169  // Accessors
  1170  func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error {
  1171  	// Commit this update via Raft
  1172  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1173  	_, _, err := s.raftApply(structs.VaultAccessorDeregisterRequestType, req)
  1174  	return err
  1175  }
  1176  
  1177  // wrapNilError is a helper that returns a wrapped function that returns a nil
  1178  // error
  1179  func wrapNilError(f func()) func() error {
  1180  	return func() error {
  1181  		f()
  1182  		return nil
  1183  	}
  1184  }
  1185  
  1186  // setLimit is used to update the rate limit
  1187  func (v *vaultClient) setLimit(l rate.Limit) {
  1188  	v.l.Lock()
  1189  	defer v.l.Unlock()
  1190  	v.limiter = rate.NewLimiter(l, int(l))
  1191  }
  1192  
  1193  // Stats is used to query the state of the blocked eval tracker.
  1194  func (v *vaultClient) Stats() *VaultStats {
  1195  	// Allocate a new stats struct
  1196  	stats := new(VaultStats)
  1197  
  1198  	v.statsLock.RLock()
  1199  	defer v.statsLock.RUnlock()
  1200  
  1201  	// Copy all the stats
  1202  	stats.TrackedForRevoke = v.stats.TrackedForRevoke
  1203  
  1204  	return stats
  1205  }
  1206  
  1207  // EmitStats is used to export metrics about the blocked eval tracker while enabled
  1208  func (v *vaultClient) EmitStats(period time.Duration, stopCh chan struct{}) {
  1209  	for {
  1210  		select {
  1211  		case <-time.After(period):
  1212  			stats := v.Stats()
  1213  			metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke))
  1214  		case <-stopCh:
  1215  			return
  1216  		}
  1217  	}
  1218  }