github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/vault.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "log" 8 "math/rand" 9 "sync" 10 "sync/atomic" 11 "time" 12 13 "gopkg.in/tomb.v2" 14 15 metrics "github.com/armon/go-metrics" 16 multierror "github.com/hashicorp/go-multierror" 17 "github.com/hashicorp/nomad/nomad/structs" 18 "github.com/hashicorp/nomad/nomad/structs/config" 19 vapi "github.com/hashicorp/vault/api" 20 "github.com/mitchellh/mapstructure" 21 22 "golang.org/x/sync/errgroup" 23 "golang.org/x/time/rate" 24 ) 25 26 const ( 27 // vaultTokenCreateTTL is the duration the wrapped token for the client is 28 // valid for. The units are in seconds. 29 vaultTokenCreateTTL = "60s" 30 31 // minimumTokenTTL is the minimum Token TTL allowed for child tokens. 32 minimumTokenTTL = 5 * time.Minute 33 34 // defaultTokenTTL is the default Token TTL used when the passed token is a 35 // root token such that child tokens aren't being created against a role 36 // that has defined a TTL 37 defaultTokenTTL = "72h" 38 39 // requestRateLimit is the maximum number of requests per second Nomad will 40 // make against Vault 41 requestRateLimit rate.Limit = 500.0 42 43 // maxParallelRevokes is the maximum number of parallel Vault 44 // token revocation requests 45 maxParallelRevokes = 64 46 47 // vaultRevocationIntv is the interval at which Vault tokens that failed 48 // initial revocation are retried 49 vaultRevocationIntv = 5 * time.Minute 50 51 // vaultCapabilitiesLookupPath is the path to lookup the capabilities of 52 // ones token. 53 vaultCapabilitiesLookupPath = "sys/capabilities-self" 54 55 // vaultTokenRenewPath is the path used to renew our token 56 vaultTokenRenewPath = "auth/token/renew-self" 57 58 // vaultTokenLookupPath is the path used to lookup a token 59 vaultTokenLookupPath = "auth/token/lookup" 60 61 // vaultTokenRevokePath is the path used to revoke a token 62 vaultTokenRevokePath = "auth/token/revoke-accessor" 63 64 // vaultRoleLookupPath is the path to lookup a role 65 vaultRoleLookupPath = "auth/token/roles/%s" 66 67 // vaultRoleCreatePath is the path to create a token from a role 68 vaultTokenRoleCreatePath = "auth/token/create/%s" 69 ) 70 71 var ( 72 // vaultCapabilitiesCapability is the expected capability of Nomad's Vault 73 // token on the the path. The token must have at least one of the 74 // capabilities. 75 vaultCapabilitiesCapability = []string{"update", "root"} 76 77 // vaultTokenRenewCapability is the expected capability Nomad's 78 // Vault token should have on the path. The token must have at least one of 79 // the capabilities. 80 vaultTokenRenewCapability = []string{"update", "root"} 81 82 // vaultTokenLookupCapability is the expected capability Nomad's 83 // Vault token should have on the path. The token must have at least one of 84 // the capabilities. 85 vaultTokenLookupCapability = []string{"update", "root"} 86 87 // vaultTokenRevokeCapability is the expected capability Nomad's 88 // Vault token should have on the path. The token must have at least one of 89 // the capabilities. 90 vaultTokenRevokeCapability = []string{"update", "root"} 91 92 // vaultRoleLookupCapability is the the expected capability Nomad's Vault 93 // token should have on the path. The token must have at least one of the 94 // capabilities. 95 vaultRoleLookupCapability = []string{"read", "root"} 96 97 // vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault 98 // token should have on the path. The token must have at least one of the 99 // capabilities. 100 vaultTokenRoleCreateCapability = []string{"update", "root"} 101 ) 102 103 // VaultClient is the Servers interface for interfacing with Vault 104 type VaultClient interface { 105 // SetActive activates or de-activates the Vault client. When active, token 106 // creation/lookup/revocation operation are allowed. 107 SetActive(active bool) 108 109 // SetConfig updates the config used by the Vault client 110 SetConfig(config *config.VaultConfig) error 111 112 // CreateToken takes an allocation and task and returns an appropriate Vault 113 // Secret 114 CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) 115 116 // LookupToken takes a token string and returns its capabilities. 117 LookupToken(ctx context.Context, token string) (*vapi.Secret, error) 118 119 // RevokeTokens takes a set of tokens accessor and revokes the tokens 120 RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error 121 122 // Stop is used to stop token renewal 123 Stop() 124 125 // Running returns whether the Vault client is running 126 Running() bool 127 128 // Stats returns the Vault clients statistics 129 Stats() *VaultStats 130 131 // EmitStats emits that clients statistics at the given period until stopCh 132 // is called. 133 EmitStats(period time.Duration, stopCh chan struct{}) 134 } 135 136 // VaultStats returns all the stats about Vault tokens created and managed by 137 // Nomad. 138 type VaultStats struct { 139 // TrackedForRevoke is the count of tokens that are being tracked to be 140 // revoked since they could not be immediately revoked. 141 TrackedForRevoke int 142 } 143 144 // PurgeVaultAccessor is called to remove VaultAccessors from the system. If 145 // the function returns an error, the token will still be tracked and revocation 146 // will retry till there is a success 147 type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error 148 149 // tokenData holds the relevant information about the Vault token passed to the 150 // client. 151 type tokenData struct { 152 CreationTTL int `mapstructure:"creation_ttl"` 153 TTL int `mapstructure:"ttl"` 154 Renewable bool `mapstructure:"renewable"` 155 Policies []string `mapstructure:"policies"` 156 Role string `mapstructure:"role"` 157 Root bool 158 } 159 160 // vaultClient is the Servers implementation of the VaultClient interface. The 161 // client renews the PeriodicToken given in the Vault configuration and provides 162 // the Server with the ability to create child tokens and lookup the permissions 163 // of tokens. 164 type vaultClient struct { 165 // limiter is used to rate limit requests to Vault 166 limiter *rate.Limiter 167 168 // client is the Vault API client 169 client *vapi.Client 170 171 // auth is the Vault token auth API client 172 auth *vapi.TokenAuth 173 174 // config is the user passed Vault config 175 config *config.VaultConfig 176 177 // connEstablished marks whether we have an established connection to Vault. 178 connEstablished bool 179 180 // connEstablishedErr marks an error that can occur when establishing a 181 // connection 182 connEstablishedErr error 183 184 // token is the raw token used by the client 185 token string 186 187 // tokenData is the data of the passed Vault token 188 tokenData *tokenData 189 190 // revoking tracks the VaultAccessors that must be revoked 191 revoking map[*structs.VaultAccessor]time.Time 192 purgeFn PurgeVaultAccessorFn 193 revLock sync.Mutex 194 195 // active indicates whether the vaultClient is active. It should be 196 // accessed using a helper and updated atomically 197 active int32 198 199 // running indicates whether the vault client is started. 200 running bool 201 202 // childTTL is the TTL for child tokens. 203 childTTL string 204 205 // lastRenewed is the time the token was last renewed 206 lastRenewed time.Time 207 208 tomb *tomb.Tomb 209 logger *log.Logger 210 211 // stats stores the stats 212 stats *VaultStats 213 statsLock sync.RWMutex 214 215 // l is used to lock the configuration aspects of the client such that 216 // multiple callers can't cause conflicting config updates 217 l sync.Mutex 218 } 219 220 // NewVaultClient returns a Vault client from the given config. If the client 221 // couldn't be made an error is returned. 222 func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) { 223 if c == nil { 224 return nil, fmt.Errorf("must pass valid VaultConfig") 225 } 226 227 if logger == nil { 228 return nil, fmt.Errorf("must pass valid logger") 229 } 230 231 v := &vaultClient{ 232 config: c, 233 logger: logger, 234 limiter: rate.NewLimiter(requestRateLimit, int(requestRateLimit)), 235 revoking: make(map[*structs.VaultAccessor]time.Time), 236 purgeFn: purgeFn, 237 tomb: &tomb.Tomb{}, 238 stats: new(VaultStats), 239 } 240 241 if v.config.IsEnabled() { 242 if err := v.buildClient(); err != nil { 243 return nil, err 244 } 245 246 // Launch the required goroutines 247 v.tomb.Go(wrapNilError(v.establishConnection)) 248 v.tomb.Go(wrapNilError(v.revokeDaemon)) 249 250 v.running = true 251 } 252 253 return v, nil 254 } 255 256 func (v *vaultClient) Stop() { 257 v.l.Lock() 258 running := v.running 259 v.running = false 260 v.l.Unlock() 261 262 if running { 263 v.tomb.Kill(nil) 264 v.tomb.Wait() 265 v.flush() 266 } 267 } 268 269 func (v *vaultClient) Running() bool { 270 v.l.Lock() 271 defer v.l.Unlock() 272 return v.running 273 } 274 275 // SetActive activates or de-activates the Vault client. When active, token 276 // creation/lookup/revocation operation are allowed. All queued revocations are 277 // cancelled if set un-active as it is assumed another instances is taking over 278 func (v *vaultClient) SetActive(active bool) { 279 if active { 280 atomic.StoreInt32(&v.active, 1) 281 } else { 282 atomic.StoreInt32(&v.active, 0) 283 } 284 285 // Clear out the revoking tokens 286 v.revLock.Lock() 287 v.revoking = make(map[*structs.VaultAccessor]time.Time) 288 v.revLock.Unlock() 289 290 return 291 } 292 293 // flush is used to reset the state of the vault client 294 func (v *vaultClient) flush() { 295 v.l.Lock() 296 defer v.l.Unlock() 297 298 v.client = nil 299 v.auth = nil 300 v.connEstablished = false 301 v.connEstablishedErr = nil 302 v.token = "" 303 v.tokenData = nil 304 v.revoking = make(map[*structs.VaultAccessor]time.Time) 305 v.childTTL = "" 306 v.tomb = &tomb.Tomb{} 307 } 308 309 // SetConfig is used to update the Vault config being used. A temporary outage 310 // may occur after calling as it re-establishes a connection to Vault 311 func (v *vaultClient) SetConfig(config *config.VaultConfig) error { 312 if config == nil { 313 return fmt.Errorf("must pass valid VaultConfig") 314 } 315 316 v.l.Lock() 317 defer v.l.Unlock() 318 319 // If reloading the same config, no-op 320 if v.config.IsEqual(config) { 321 return nil 322 } 323 324 // Kill any background routines 325 if v.running { 326 // Stop accepting any new request 327 v.connEstablished = false 328 329 // Kill any background routine and create a new tomb 330 v.tomb.Kill(nil) 331 v.tomb.Wait() 332 v.tomb = &tomb.Tomb{} 333 v.running = false 334 } 335 336 // Store the new config 337 v.config = config 338 339 // Check if we should relaunch 340 if v.config.IsEnabled() { 341 // Rebuild the client 342 if err := v.buildClient(); err != nil { 343 return err 344 } 345 346 // Launch the required goroutines 347 v.tomb.Go(wrapNilError(v.establishConnection)) 348 v.tomb.Go(wrapNilError(v.revokeDaemon)) 349 v.running = true 350 } 351 352 return nil 353 } 354 355 // buildClient is used to build a Vault client based on the stored Vault config 356 func (v *vaultClient) buildClient() error { 357 // Validate we have the required fields. 358 if v.config.Token == "" { 359 return errors.New("Vault token must be set") 360 } else if v.config.Addr == "" { 361 return errors.New("Vault address must be set") 362 } 363 364 // Parse the TTL if it is set 365 if v.config.TaskTokenTTL != "" { 366 d, err := time.ParseDuration(v.config.TaskTokenTTL) 367 if err != nil { 368 return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err) 369 } 370 371 if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() { 372 return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL) 373 } 374 375 v.childTTL = v.config.TaskTokenTTL 376 } else { 377 // Default the TaskTokenTTL 378 v.childTTL = defaultTokenTTL 379 } 380 381 // Get the Vault API configuration 382 apiConf, err := v.config.ApiConfig() 383 if err != nil { 384 return fmt.Errorf("Failed to create Vault API config: %v", err) 385 } 386 387 // Create the Vault API client 388 client, err := vapi.NewClient(apiConf) 389 if err != nil { 390 v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err) 391 return err 392 } 393 394 // Set the token and store the client 395 v.token = v.config.Token 396 client.SetToken(v.token) 397 v.client = client 398 v.auth = client.Auth().Token() 399 return nil 400 } 401 402 // establishConnection is used to make first contact with Vault. This should be 403 // called in a go-routine since the connection is retried until the Vault Client 404 // is stopped or the connection is successfully made at which point the renew 405 // loop is started. 406 func (v *vaultClient) establishConnection() { 407 // Create the retry timer and set initial duration to zero so it fires 408 // immediately 409 retryTimer := time.NewTimer(0) 410 initStatus := false 411 OUTER: 412 for { 413 select { 414 case <-v.tomb.Dying(): 415 return 416 case <-retryTimer.C: 417 // Ensure the API is reachable 418 if !initStatus { 419 if _, err := v.client.Sys().InitStatus(); err != nil { 420 v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v", 421 v.config.ConnectionRetryIntv, err) 422 retryTimer.Reset(v.config.ConnectionRetryIntv) 423 continue OUTER 424 } 425 initStatus = true 426 } 427 // Retry validating the token till success 428 if err := v.parseSelfToken(); err != nil { 429 v.logger.Printf("[ERR] vault: failed to validate self token/role. Retrying in %v: %v", v.config.ConnectionRetryIntv, err) 430 retryTimer.Reset(v.config.ConnectionRetryIntv) 431 v.l.Lock() 432 v.connEstablished = true 433 v.connEstablishedErr = fmt.Errorf("Nomad Server failed to establish connections to Vault: %v", err) 434 v.l.Unlock() 435 continue OUTER 436 } 437 break OUTER 438 } 439 } 440 441 // Set the wrapping function such that token creation is wrapped now 442 // that we know our role 443 v.client.SetWrappingLookupFunc(v.getWrappingFn()) 444 445 // If we are given a non-root token, start renewing it 446 if v.tokenData.Root && v.tokenData.CreationTTL == 0 { 447 v.logger.Printf("[DEBUG] vault: not renewing token as it is root") 448 } else { 449 v.logger.Printf("[DEBUG] vault: token lease duration is %v", 450 time.Duration(v.tokenData.CreationTTL)*time.Second) 451 v.tomb.Go(wrapNilError(v.renewalLoop)) 452 } 453 454 v.l.Lock() 455 v.connEstablished = true 456 v.connEstablishedErr = nil 457 v.l.Unlock() 458 } 459 460 // renewalLoop runs the renew loop. This should only be called if we are given a 461 // non-root token. 462 func (v *vaultClient) renewalLoop() { 463 // Create the renewal timer and set initial duration to zero so it fires 464 // immediately 465 authRenewTimer := time.NewTimer(0) 466 467 // Backoff is to reduce the rate we try to renew with Vault under error 468 // situations 469 backoff := 0.0 470 471 for { 472 select { 473 case <-v.tomb.Dying(): 474 return 475 case <-authRenewTimer.C: 476 // Renew the token and determine the new expiration 477 err := v.renew() 478 currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second) 479 480 // Successfully renewed 481 if err == nil { 482 // If we take the expiration (lastRenewed + auth duration) and 483 // subtract the current time, we get a duration until expiry. 484 // Set the timer to poke us after half of that time is up. 485 durationUntilRenew := currentExpiration.Sub(time.Now()) / 2 486 487 v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew) 488 authRenewTimer.Reset(durationUntilRenew) 489 490 // Reset any backoff 491 backoff = 0 492 break 493 } 494 495 // Back off, increasing the amount of backoff each time. There are some rules: 496 // 497 // * If we have an existing authentication that is going to expire, 498 // never back off more than half of the amount of time remaining 499 // until expiration 500 // * Never back off more than 30 seconds multiplied by a random 501 // value between 1 and 2 502 // * Use randomness so that many clients won't keep hitting Vault 503 // at the same time 504 505 // Set base values and add some backoff 506 507 v.logger.Printf("[WARN] vault: got error or bad auth, so backing off: %v", err) 508 switch { 509 case backoff < 5: 510 backoff = 5 511 case backoff >= 24: 512 backoff = 30 513 default: 514 backoff = backoff * 1.25 515 } 516 517 // Add randomness 518 backoff = backoff * (1.0 + rand.Float64()) 519 520 maxBackoff := currentExpiration.Sub(time.Now()) / 2 521 if maxBackoff < 0 { 522 // We have failed to renew the token past its expiration. Stop 523 // renewing with Vault. 524 v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client") 525 v.l.Lock() 526 v.connEstablished = false 527 v.connEstablishedErr = err 528 v.l.Unlock() 529 return 530 531 } else if backoff > maxBackoff.Seconds() { 532 backoff = maxBackoff.Seconds() 533 } 534 535 durationUntilRetry := time.Duration(backoff) * time.Second 536 v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry) 537 538 authRenewTimer.Reset(durationUntilRetry) 539 } 540 } 541 } 542 543 // renew attempts to renew our Vault token. If the renewal fails, an error is 544 // returned. This method updates the lastRenewed time 545 func (v *vaultClient) renew() error { 546 // Attempt to renew the token 547 secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL) 548 if err != nil { 549 return err 550 } 551 552 auth := secret.Auth 553 if auth == nil { 554 return fmt.Errorf("renewal successful but not auth information returned") 555 } else if auth.LeaseDuration == 0 { 556 return fmt.Errorf("renewal successful but no lease duration returned") 557 } 558 559 v.lastRenewed = time.Now() 560 v.logger.Printf("[DEBUG] vault: successfully renewed server token") 561 return nil 562 } 563 564 // getWrappingFn returns an appropriate wrapping function for Nomad Servers 565 func (v *vaultClient) getWrappingFn() func(operation, path string) string { 566 createPath := "auth/token/create" 567 role := v.getRole() 568 if role != "" { 569 createPath = fmt.Sprintf("auth/token/create/%s", role) 570 } 571 572 return func(operation, path string) string { 573 // Only wrap the token create operation 574 if operation != "POST" || path != createPath { 575 return "" 576 } 577 578 return vaultTokenCreateTTL 579 } 580 } 581 582 // parseSelfToken looks up the Vault token in Vault and parses its data storing 583 // it in the client. If the token is not valid for Nomads purposes an error is 584 // returned. 585 func (v *vaultClient) parseSelfToken() error { 586 // Get the initial lease duration 587 auth := v.client.Auth().Token() 588 var self *vapi.Secret 589 590 // Try looking up the token using the self endpoint 591 secret, err := auth.LookupSelf() 592 if err != nil { 593 // Try looking up our token directly 594 self, err = auth.Lookup(v.client.Token()) 595 if err != nil { 596 return fmt.Errorf("failed to lookup Vault periodic token: %v", err) 597 } 598 } 599 self = secret 600 601 // Read and parse the fields 602 var data tokenData 603 if err := mapstructure.WeakDecode(self.Data, &data); err != nil { 604 return fmt.Errorf("failed to parse Vault token's data block: %v", err) 605 } 606 607 root := false 608 for _, p := range data.Policies { 609 if p == "root" { 610 root = true 611 break 612 } 613 } 614 615 // Store the token data 616 data.Root = root 617 v.tokenData = &data 618 619 // The criteria that must be met for the token to be valid are as follows: 620 // 1) If token is non-root or is but has a creation ttl 621 // a) The token must be renewable 622 // b) Token must have a non-zero TTL 623 // 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens) 624 // 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens) 625 // 4) If configured to create tokens against a role: 626 // a) Must have read capability for "auth/token/roles/<role_name" (Can just attempt a read) 627 // b) Must have update capability for path "auth/token/create/<role_name>" 628 // c) Role must: 629 // 1) Must allow tokens to be renewed 630 // 2) Must not have an explicit max TTL 631 // 3) Must have non-zero period 632 // 5) If not configured against a role, the token must be root 633 634 var mErr multierror.Error 635 role := v.getRole() 636 if !root { 637 // All non-root tokens must be renewable 638 if !data.Renewable { 639 multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root")) 640 } 641 642 // All non-root tokens must have a lease duration 643 if data.CreationTTL == 0 { 644 multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero")) 645 } 646 647 // The lease duration can not be expired 648 if data.TTL == 0 { 649 multierror.Append(&mErr, fmt.Errorf("token TTL is zero")) 650 } 651 652 // There must be a valid role since we aren't root 653 if role == "" { 654 multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token")) 655 } 656 657 } else if data.CreationTTL != 0 { 658 // If the root token has a TTL it must be renewable 659 if !data.Renewable { 660 multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable")) 661 } else if data.TTL == 0 { 662 // If the token has a TTL make sure it has not expired 663 multierror.Append(&mErr, fmt.Errorf("token TTL is zero")) 664 } 665 } 666 667 // Check we have the correct capabilities 668 if err := v.validateCapabilities(role, root); err != nil { 669 multierror.Append(&mErr, err) 670 } 671 672 // If given a role validate it 673 if role != "" { 674 if err := v.validateRole(role); err != nil { 675 multierror.Append(&mErr, err) 676 } 677 } 678 679 return mErr.ErrorOrNil() 680 } 681 682 // getRole returns the role name to be used when creating tokens 683 func (v *vaultClient) getRole() string { 684 if v.config.Role != "" { 685 return v.config.Role 686 } 687 688 return v.tokenData.Role 689 } 690 691 // validateCapabilities checks that Nomad's Vault token has the correct 692 // capabilities. 693 func (v *vaultClient) validateCapabilities(role string, root bool) error { 694 // Check if the token can lookup capabilities. 695 var mErr multierror.Error 696 _, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability) 697 if err != nil { 698 // Check if there is a permission denied 699 if structs.VaultUnrecoverableError.MatchString(err.Error()) { 700 // Since we can't read permissions, we just log a warning that we 701 // can't tell if the Vault token will work 702 msg := fmt.Sprintf("Can not lookup token capabilities. "+ 703 "As such certain operations may fail in the future. "+ 704 "Please give Nomad a Vault token with one of the following "+ 705 "capabilities %q on %q so that the required capabilities can be verified", 706 vaultCapabilitiesCapability, vaultCapabilitiesLookupPath) 707 v.logger.Printf("[WARN] vault: %s", msg) 708 return nil 709 } else { 710 multierror.Append(&mErr, err) 711 } 712 } 713 714 // verify is a helper function that verifies the token has one of the 715 // capabilities on the given path and adds an issue to the error 716 verify := func(path string, requiredCaps []string) { 717 ok, caps, err := v.hasCapability(path, requiredCaps) 718 if err != nil { 719 multierror.Append(&mErr, err) 720 } else if !ok { 721 multierror.Append(&mErr, 722 fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps)) 723 } 724 } 725 726 // Check if we are verifying incoming tokens 727 if !v.config.AllowsUnauthenticated() { 728 verify(vaultTokenLookupPath, vaultTokenLookupCapability) 729 } 730 731 // Verify we can renew our selves tokens 732 verify(vaultTokenRenewPath, vaultTokenRenewCapability) 733 734 // Verify we can revoke tokens 735 verify(vaultTokenRevokePath, vaultTokenRevokeCapability) 736 737 // If we are using a role verify the capability 738 if role != "" { 739 // Verify we can read the role 740 verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability) 741 742 // Verify we can create from the role 743 verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability) 744 } 745 746 return mErr.ErrorOrNil() 747 } 748 749 // hasCapability takes a path and returns whether the token has at least one of 750 // the required capabilities on the given path. It also returns the set of 751 // capabilities the token does have as well as any error that occurred. 752 func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) { 753 caps, err := v.client.Sys().CapabilitiesSelf(path) 754 if err != nil { 755 return false, nil, err 756 } 757 for _, c := range caps { 758 for _, r := range required { 759 if c == r { 760 return true, caps, nil 761 } 762 } 763 } 764 return false, caps, nil 765 } 766 767 // validateRole contacts Vault and checks that the given Vault role is valid for 768 // the purposes of being used by Nomad 769 func (v *vaultClient) validateRole(role string) error { 770 if role == "" { 771 return fmt.Errorf("Invalid empty role name") 772 } 773 774 // Validate the role 775 rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role)) 776 if err != nil { 777 return fmt.Errorf("failed to lookup role %q: %v", role, err) 778 } 779 if rsecret == nil { 780 return fmt.Errorf("Role %q does not exist", role) 781 } 782 783 // Read and parse the fields 784 var data struct { 785 ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"` 786 Orphan bool 787 Period int 788 Renewable bool 789 } 790 if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil { 791 return fmt.Errorf("failed to parse Vault role's data block: %v", err) 792 } 793 794 // Validate the role is acceptable 795 var mErr multierror.Error 796 if !data.Renewable { 797 multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed")) 798 } 799 800 if data.ExplicitMaxTtl != 0 { 801 multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic.")) 802 } 803 804 if data.Period == 0 { 805 multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic.")) 806 } 807 808 return mErr.ErrorOrNil() 809 } 810 811 // ConnectionEstablished returns whether a connection to Vault has been 812 // established and any error that potentially caused it to be false 813 func (v *vaultClient) ConnectionEstablished() (bool, error) { 814 v.l.Lock() 815 defer v.l.Unlock() 816 return v.connEstablished, v.connEstablishedErr 817 } 818 819 // Enabled returns whether the client is active 820 func (v *vaultClient) Enabled() bool { 821 v.l.Lock() 822 defer v.l.Unlock() 823 return v.config.IsEnabled() 824 } 825 826 // Active returns whether the client is active 827 func (v *vaultClient) Active() bool { 828 return atomic.LoadInt32(&v.active) == 1 829 } 830 831 // CreateToken takes the allocation and task and returns an appropriate Vault 832 // token. The call is rate limited and may be canceled with the passed policy. 833 // When the error is recoverable, it will be of type RecoverableError 834 func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) { 835 if !v.Enabled() { 836 return nil, fmt.Errorf("Vault integration disabled") 837 } 838 if !v.Active() { 839 return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true) 840 } 841 842 // Check if we have established a connection with Vault 843 if established, err := v.ConnectionEstablished(); !established && err == nil { 844 return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 845 } else if err != nil { 846 return nil, err 847 } 848 849 // Track how long the request takes 850 defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now()) 851 852 // Retrieve the Vault block for the task 853 policies := a.Job.VaultPolicies() 854 if policies == nil { 855 return nil, fmt.Errorf("Job doesn't require Vault policies") 856 } 857 tg, ok := policies[a.TaskGroup] 858 if !ok { 859 return nil, fmt.Errorf("Task group does not require Vault policies") 860 } 861 taskVault, ok := tg[task] 862 if !ok { 863 return nil, fmt.Errorf("Task does not require Vault policies") 864 } 865 866 // Build the creation request 867 req := &vapi.TokenCreateRequest{ 868 Policies: taskVault.Policies, 869 Metadata: map[string]string{ 870 "AllocationID": a.ID, 871 "Task": task, 872 "NodeID": a.NodeID, 873 }, 874 TTL: v.childTTL, 875 DisplayName: fmt.Sprintf("%s-%s", a.ID, task), 876 } 877 878 // Ensure we are under our rate limit 879 if err := v.limiter.Wait(ctx); err != nil { 880 return nil, err 881 } 882 883 // Make the request and switch depending on whether we are using a root 884 // token or a role based token 885 var secret *vapi.Secret 886 var err error 887 role := v.getRole() 888 if v.tokenData.Root && role == "" { 889 req.Period = v.childTTL 890 secret, err = v.auth.Create(req) 891 } else { 892 // Make the token using the role 893 secret, err = v.auth.CreateWithRole(req, v.getRole()) 894 } 895 896 // Determine whether it is unrecoverable 897 if err != nil { 898 if structs.VaultUnrecoverableError.MatchString(err.Error()) { 899 return secret, err 900 } 901 902 // The error is recoverable 903 return nil, structs.NewRecoverableError(err, true) 904 } 905 906 // Validate the response 907 var validationErr error 908 if secret == nil { 909 validationErr = fmt.Errorf("Vault returned nil Secret") 910 } else if secret.WrapInfo == nil { 911 validationErr = fmt.Errorf("Vault returned Secret with nil WrapInfo. Secret warnings: %v", secret.Warnings) 912 } else if secret.WrapInfo.WrappedAccessor == "" { 913 validationErr = fmt.Errorf("Vault returned WrapInfo without WrappedAccessor. Secret warnings: %v", secret.Warnings) 914 } 915 if validationErr != nil { 916 v.logger.Printf("[WARN] vault: failed to CreateToken: %v", err) 917 return nil, structs.NewRecoverableError(validationErr, true) 918 } 919 920 // Got a valid response 921 return secret, nil 922 } 923 924 // LookupToken takes a Vault token and does a lookup against Vault. The call is 925 // rate limited and may be canceled with passed context. 926 func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) { 927 if !v.Enabled() { 928 return nil, fmt.Errorf("Vault integration disabled") 929 } 930 931 if !v.Active() { 932 return nil, fmt.Errorf("Vault client not active") 933 } 934 935 // Check if we have established a connection with Vault 936 if established, err := v.ConnectionEstablished(); !established && err == nil { 937 return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 938 } else if err != nil { 939 return nil, err 940 } 941 942 // Track how long the request takes 943 defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now()) 944 945 // Ensure we are under our rate limit 946 if err := v.limiter.Wait(ctx); err != nil { 947 return nil, err 948 } 949 950 // Lookup the token 951 return v.auth.Lookup(token) 952 } 953 954 // PoliciesFrom parses the set of policies returned by a token lookup. 955 func PoliciesFrom(s *vapi.Secret) ([]string, error) { 956 if s == nil { 957 return nil, fmt.Errorf("cannot parse nil Vault secret") 958 } 959 var data tokenData 960 if err := mapstructure.WeakDecode(s.Data, &data); err != nil { 961 return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err) 962 } 963 964 return data.Policies, nil 965 } 966 967 // RevokeTokens revokes the passed set of accessors. If committed is set, the 968 // purge function passed to the client is called. If there is an error purging 969 // either because of Vault failures or because of the purge function, the 970 // revocation is retried until the tokens TTL. 971 func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error { 972 if !v.Enabled() { 973 return nil 974 } 975 976 if !v.Active() { 977 return fmt.Errorf("Vault client not active") 978 } 979 980 // Track how long the request takes 981 defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now()) 982 983 // Check if we have established a connection with Vault. If not just add it 984 // to the queue 985 if established, err := v.ConnectionEstablished(); !established && err == nil { 986 // Only bother tracking it for later revocation if the accessor was 987 // committed 988 if committed { 989 v.storeForRevocation(accessors) 990 } 991 992 // Track that we are abandoning these accessors. 993 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors))) 994 return nil 995 } 996 997 // Attempt to revoke immediately and if it fails, add it to the revoke queue 998 err := v.parallelRevoke(ctx, accessors) 999 if err != nil { 1000 // If it is uncommitted, it is a best effort revoke as it will shortly 1001 // TTL within the cubbyhole and has not been leaked to any outside 1002 // system 1003 if !committed { 1004 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors))) 1005 return nil 1006 } 1007 1008 v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt until TTL: %v", err) 1009 v.storeForRevocation(accessors) 1010 return nil 1011 } else if !committed { 1012 // Mark that it was revoked but there is nothing to purge so exit 1013 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors))) 1014 return nil 1015 } 1016 1017 if err := v.purgeFn(accessors); err != nil { 1018 v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err) 1019 v.storeForRevocation(accessors) 1020 return nil 1021 } 1022 1023 // Track that it was revoked successfully 1024 metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors))) 1025 1026 return nil 1027 } 1028 1029 // storeForRevocation stores the passed set of accessors for revocation. It 1030 // captures their effective TTL by storing their create TTL plus the current 1031 // time. 1032 func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) { 1033 v.revLock.Lock() 1034 v.statsLock.Lock() 1035 now := time.Now() 1036 for _, a := range accessors { 1037 v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second) 1038 } 1039 v.stats.TrackedForRevoke = len(v.revoking) 1040 v.statsLock.Unlock() 1041 v.revLock.Unlock() 1042 } 1043 1044 // parallelRevoke revokes the passed VaultAccessors in parallel. 1045 func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error { 1046 if !v.Enabled() { 1047 return fmt.Errorf("Vault integration disabled") 1048 } 1049 1050 if !v.Active() { 1051 return fmt.Errorf("Vault client not active") 1052 } 1053 1054 // Check if we have established a connection with Vault 1055 if established, err := v.ConnectionEstablished(); !established && err == nil { 1056 return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 1057 } else if err != nil { 1058 return err 1059 } 1060 1061 g, pCtx := errgroup.WithContext(ctx) 1062 1063 // Cap the handlers 1064 handlers := len(accessors) 1065 if handlers > maxParallelRevokes { 1066 handlers = maxParallelRevokes 1067 } 1068 1069 // Create the Vault Tokens 1070 input := make(chan *structs.VaultAccessor, handlers) 1071 for i := 0; i < handlers; i++ { 1072 g.Go(func() error { 1073 for { 1074 select { 1075 case va, ok := <-input: 1076 if !ok { 1077 return nil 1078 } 1079 1080 if err := v.auth.RevokeAccessor(va.Accessor); err != nil { 1081 return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err) 1082 } 1083 case <-pCtx.Done(): 1084 return nil 1085 } 1086 } 1087 }) 1088 } 1089 1090 // Send the input 1091 go func() { 1092 defer close(input) 1093 for _, va := range accessors { 1094 select { 1095 case <-pCtx.Done(): 1096 return 1097 case input <- va: 1098 } 1099 } 1100 1101 }() 1102 1103 // Wait for everything to complete 1104 return g.Wait() 1105 } 1106 1107 // revokeDaemon should be called in a goroutine and is used to periodically 1108 // revoke Vault accessors that failed the original revocation 1109 func (v *vaultClient) revokeDaemon() { 1110 ticker := time.NewTicker(vaultRevocationIntv) 1111 defer ticker.Stop() 1112 1113 for { 1114 select { 1115 case <-v.tomb.Dying(): 1116 return 1117 case now := <-ticker.C: 1118 if established, _ := v.ConnectionEstablished(); !established { 1119 continue 1120 } 1121 1122 v.revLock.Lock() 1123 1124 // Fast path 1125 if len(v.revoking) == 0 { 1126 v.revLock.Unlock() 1127 continue 1128 } 1129 1130 // Build the list of allocations that need to revoked while pruning any TTL'd checks 1131 revoking := make([]*structs.VaultAccessor, 0, len(v.revoking)) 1132 for va, ttl := range v.revoking { 1133 if now.After(ttl) { 1134 delete(v.revoking, va) 1135 } else { 1136 revoking = append(revoking, va) 1137 } 1138 } 1139 1140 if err := v.parallelRevoke(context.Background(), revoking); err != nil { 1141 v.logger.Printf("[WARN] vault: background token revocation errored: %v", err) 1142 v.revLock.Unlock() 1143 continue 1144 } 1145 1146 // Unlock before a potentially expensive operation 1147 v.revLock.Unlock() 1148 1149 // Call the passed in token revocation function 1150 if err := v.purgeFn(revoking); err != nil { 1151 // Can continue since revocation is idempotent 1152 v.logger.Printf("[ERR] vault: token revocation errored: %v", err) 1153 continue 1154 } 1155 1156 // Track that tokens were revoked successfully 1157 metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking))) 1158 1159 // Can delete from the tracked list now that we have purged 1160 v.revLock.Lock() 1161 v.statsLock.Lock() 1162 for _, va := range revoking { 1163 delete(v.revoking, va) 1164 } 1165 v.stats.TrackedForRevoke = len(v.revoking) 1166 v.statsLock.Unlock() 1167 v.revLock.Unlock() 1168 1169 } 1170 } 1171 } 1172 1173 // purgeVaultAccessors creates a Raft transaction to remove the passed Vault 1174 // Accessors 1175 func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error { 1176 // Commit this update via Raft 1177 req := structs.VaultAccessorsRequest{Accessors: accessors} 1178 _, _, err := s.raftApply(structs.VaultAccessorDeregisterRequestType, req) 1179 return err 1180 } 1181 1182 // wrapNilError is a helper that returns a wrapped function that returns a nil 1183 // error 1184 func wrapNilError(f func()) func() error { 1185 return func() error { 1186 f() 1187 return nil 1188 } 1189 } 1190 1191 // setLimit is used to update the rate limit 1192 func (v *vaultClient) setLimit(l rate.Limit) { 1193 v.l.Lock() 1194 defer v.l.Unlock() 1195 v.limiter = rate.NewLimiter(l, int(l)) 1196 } 1197 1198 // Stats is used to query the state of the blocked eval tracker. 1199 func (v *vaultClient) Stats() *VaultStats { 1200 // Allocate a new stats struct 1201 stats := new(VaultStats) 1202 1203 v.statsLock.RLock() 1204 defer v.statsLock.RUnlock() 1205 1206 // Copy all the stats 1207 stats.TrackedForRevoke = v.stats.TrackedForRevoke 1208 1209 return stats 1210 } 1211 1212 // EmitStats is used to export metrics about the blocked eval tracker while enabled 1213 func (v *vaultClient) EmitStats(period time.Duration, stopCh chan struct{}) { 1214 for { 1215 select { 1216 case <-time.After(period): 1217 stats := v.Stats() 1218 metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke)) 1219 case <-stopCh: 1220 return 1221 } 1222 } 1223 }