github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/vault.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "log" 8 "math/rand" 9 "regexp" 10 "sync" 11 "sync/atomic" 12 "time" 13 14 "gopkg.in/tomb.v2" 15 16 metrics "github.com/armon/go-metrics" 17 multierror "github.com/hashicorp/go-multierror" 18 "github.com/hashicorp/nomad/nomad/structs" 19 "github.com/hashicorp/nomad/nomad/structs/config" 20 vapi "github.com/hashicorp/vault/api" 21 "github.com/mitchellh/mapstructure" 22 23 "golang.org/x/sync/errgroup" 24 "golang.org/x/time/rate" 25 ) 26 27 const ( 28 // vaultTokenCreateTTL is the duration the wrapped token for the client is 29 // valid for. The units are in seconds. 30 vaultTokenCreateTTL = "60s" 31 32 // minimumTokenTTL is the minimum Token TTL allowed for child tokens. 33 minimumTokenTTL = 5 * time.Minute 34 35 // defaultTokenTTL is the default Token TTL used when the passed token is a 36 // root token such that child tokens aren't being created against a role 37 // that has defined a TTL 38 defaultTokenTTL = "72h" 39 40 // requestRateLimit is the maximum number of requests per second Nomad will 41 // make against Vault 42 requestRateLimit rate.Limit = 500.0 43 44 // maxParallelRevokes is the maximum number of parallel Vault 45 // token revocation requests 46 maxParallelRevokes = 64 47 48 // vaultRevocationIntv is the interval at which Vault tokens that failed 49 // initial revocation are retried 50 vaultRevocationIntv = 5 * time.Minute 51 52 // vaultCapabilitiesLookupPath is the path to lookup the capabilities of 53 // ones token. 54 vaultCapabilitiesLookupPath = "sys/capabilities-self" 55 56 // vaultTokenRenewPath is the path used to renew our token 57 vaultTokenRenewPath = "auth/token/renew-self" 58 59 // vaultTokenLookupPath is the path used to lookup a token 60 vaultTokenLookupPath = "auth/token/lookup" 61 62 // vaultTokenRevokePath is the path used to revoke a token 63 vaultTokenRevokePath = "auth/token/revoke-accessor" 64 65 // vaultRoleLookupPath is the path to lookup a role 66 vaultRoleLookupPath = "auth/token/roles/%s" 67 68 // vaultRoleCreatePath is the path to create a token from a role 69 vaultTokenRoleCreatePath = "auth/token/create/%s" 70 ) 71 72 var ( 73 // vaultUnrecoverableError matches unrecoverable errors 74 vaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 75 76 // vaultCapabilitiesCapability is the expected capability of Nomad's Vault 77 // token on the the path. The token must have at least one of the 78 // capabilities. 79 vaultCapabilitiesCapability = []string{"update", "root"} 80 81 // vaultTokenRenewCapability is the expected capability Nomad's 82 // Vault token should have on the path. The token must have at least one of 83 // the capabilities. 84 vaultTokenRenewCapability = []string{"update", "root"} 85 86 // vaultTokenLookupCapability is the expected capability Nomad's 87 // Vault token should have on the path. The token must have at least one of 88 // the capabilities. 89 vaultTokenLookupCapability = []string{"update", "root"} 90 91 // vaultTokenRevokeCapability is the expected capability Nomad's 92 // Vault token should have on the path. The token must have at least one of 93 // the capabilities. 94 vaultTokenRevokeCapability = []string{"update", "root"} 95 96 // vaultRoleLookupCapability is the the expected capability Nomad's Vault 97 // token should have on the path. The token must have at least one of the 98 // capabilities. 99 vaultRoleLookupCapability = []string{"read", "root"} 100 101 // vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault 102 // token should have on the path. The token must have at least one of the 103 // capabilities. 104 vaultTokenRoleCreateCapability = []string{"update", "root"} 105 ) 106 107 // VaultClient is the Servers interface for interfacing with Vault 108 type VaultClient interface { 109 // SetActive activates or de-activates the Vault client. When active, token 110 // creation/lookup/revocation operation are allowed. 111 SetActive(active bool) 112 113 // SetConfig updates the config used by the Vault client 114 SetConfig(config *config.VaultConfig) error 115 116 // CreateToken takes an allocation and task and returns an appropriate Vault 117 // Secret 118 CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) 119 120 // LookupToken takes a token string and returns its capabilities. 121 LookupToken(ctx context.Context, token string) (*vapi.Secret, error) 122 123 // RevokeTokens takes a set of tokens accessor and revokes the tokens 124 RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error 125 126 // Stop is used to stop token renewal 127 Stop() 128 129 // Running returns whether the Vault client is running 130 Running() bool 131 132 // Stats returns the Vault clients statistics 133 Stats() *VaultStats 134 135 // EmitStats emits that clients statistics at the given period until stopCh 136 // is called. 137 EmitStats(period time.Duration, stopCh chan struct{}) 138 } 139 140 // VaultStats returns all the stats about Vault tokens created and managed by 141 // Nomad. 142 type VaultStats struct { 143 // TrackedForRevoke is the count of tokens that are being tracked to be 144 // revoked since they could not be immediately revoked. 145 TrackedForRevoke int 146 } 147 148 // PurgeVaultAccessor is called to remove VaultAccessors from the system. If 149 // the function returns an error, the token will still be tracked and revocation 150 // will retry till there is a success 151 type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error 152 153 // tokenData holds the relevant information about the Vault token passed to the 154 // client. 155 type tokenData struct { 156 CreationTTL int `mapstructure:"creation_ttl"` 157 TTL int `mapstructure:"ttl"` 158 Renewable bool `mapstructure:"renewable"` 159 Policies []string `mapstructure:"policies"` 160 Role string `mapstructure:"role"` 161 Root bool 162 } 163 164 // vaultClient is the Servers implementation of the VaultClient interface. The 165 // client renews the PeriodicToken given in the Vault configuration and provides 166 // the Server with the ability to create child tokens and lookup the permissions 167 // of tokens. 168 type vaultClient struct { 169 // limiter is used to rate limit requests to Vault 170 limiter *rate.Limiter 171 172 // client is the Vault API client 173 client *vapi.Client 174 175 // auth is the Vault token auth API client 176 auth *vapi.TokenAuth 177 178 // config is the user passed Vault config 179 config *config.VaultConfig 180 181 // connEstablished marks whether we have an established connection to Vault. 182 connEstablished bool 183 184 // connEstablishedErr marks an error that can occur when establishing a 185 // connection 186 connEstablishedErr error 187 188 // token is the raw token used by the client 189 token string 190 191 // tokenData is the data of the passed Vault token 192 tokenData *tokenData 193 194 // revoking tracks the VaultAccessors that must be revoked 195 revoking map[*structs.VaultAccessor]time.Time 196 purgeFn PurgeVaultAccessorFn 197 revLock sync.Mutex 198 199 // active indicates whether the vaultClient is active. It should be 200 // accessed using a helper and updated atomically 201 active int32 202 203 // running indicates whether the vault client is started. 204 running bool 205 206 // childTTL is the TTL for child tokens. 207 childTTL string 208 209 // lastRenewed is the time the token was last renewed 210 lastRenewed time.Time 211 212 tomb *tomb.Tomb 213 logger *log.Logger 214 215 // stats stores the stats 216 stats *VaultStats 217 statsLock sync.RWMutex 218 219 // l is used to lock the configuration aspects of the client such that 220 // multiple callers can't cause conflicting config updates 221 l sync.Mutex 222 } 223 224 // NewVaultClient returns a Vault client from the given config. If the client 225 // couldn't be made an error is returned. 226 func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) { 227 if c == nil { 228 return nil, fmt.Errorf("must pass valid VaultConfig") 229 } 230 231 if logger == nil { 232 return nil, fmt.Errorf("must pass valid logger") 233 } 234 235 v := &vaultClient{ 236 config: c, 237 logger: logger, 238 limiter: rate.NewLimiter(requestRateLimit, int(requestRateLimit)), 239 revoking: make(map[*structs.VaultAccessor]time.Time), 240 purgeFn: purgeFn, 241 tomb: &tomb.Tomb{}, 242 stats: new(VaultStats), 243 } 244 245 if v.config.IsEnabled() { 246 if err := v.buildClient(); err != nil { 247 return nil, err 248 } 249 250 // Launch the required goroutines 251 v.tomb.Go(wrapNilError(v.establishConnection)) 252 v.tomb.Go(wrapNilError(v.revokeDaemon)) 253 254 v.running = true 255 } 256 257 return v, nil 258 } 259 260 func (v *vaultClient) Stop() { 261 v.l.Lock() 262 running := v.running 263 v.running = false 264 v.l.Unlock() 265 266 if running { 267 v.tomb.Kill(nil) 268 v.tomb.Wait() 269 v.flush() 270 } 271 } 272 273 func (v *vaultClient) Running() bool { 274 v.l.Lock() 275 defer v.l.Unlock() 276 return v.running 277 } 278 279 // SetActive activates or de-activates the Vault client. When active, token 280 // creation/lookup/revocation operation are allowed. All queued revocations are 281 // cancelled if set un-active as it is assumed another instances is taking over 282 func (v *vaultClient) SetActive(active bool) { 283 if active { 284 atomic.StoreInt32(&v.active, 1) 285 } else { 286 atomic.StoreInt32(&v.active, 0) 287 } 288 289 // Clear out the revoking tokens 290 v.revLock.Lock() 291 v.revoking = make(map[*structs.VaultAccessor]time.Time) 292 v.revLock.Unlock() 293 294 return 295 } 296 297 // flush is used to reset the state of the vault client 298 func (v *vaultClient) flush() { 299 v.l.Lock() 300 defer v.l.Unlock() 301 302 v.client = nil 303 v.auth = nil 304 v.connEstablished = false 305 v.connEstablishedErr = nil 306 v.token = "" 307 v.tokenData = nil 308 v.revoking = make(map[*structs.VaultAccessor]time.Time) 309 v.childTTL = "" 310 v.tomb = &tomb.Tomb{} 311 } 312 313 // SetConfig is used to update the Vault config being used. A temporary outage 314 // may occur after calling as it re-establishes a connection to Vault 315 func (v *vaultClient) SetConfig(config *config.VaultConfig) error { 316 if config == nil { 317 return fmt.Errorf("must pass valid VaultConfig") 318 } 319 320 v.l.Lock() 321 defer v.l.Unlock() 322 323 // Kill any background routintes 324 if v.running { 325 // Stop accepting any new request 326 v.connEstablished = false 327 328 // Kill any background routine and create a new tomb 329 v.tomb.Kill(nil) 330 v.tomb.Wait() 331 v.tomb = &tomb.Tomb{} 332 v.running = false 333 } 334 335 // Store the new config 336 v.config = config 337 338 // Check if we should relaunch 339 if v.config.IsEnabled() { 340 // Rebuild the client 341 if err := v.buildClient(); err != nil { 342 return err 343 } 344 345 // Launch the required goroutines 346 v.tomb.Go(wrapNilError(v.establishConnection)) 347 v.tomb.Go(wrapNilError(v.revokeDaemon)) 348 v.running = true 349 } 350 351 return nil 352 } 353 354 // buildClient is used to build a Vault client based on the stored Vault config 355 func (v *vaultClient) buildClient() error { 356 // Validate we have the required fields. 357 if v.config.Token == "" { 358 return errors.New("Vault token must be set") 359 } else if v.config.Addr == "" { 360 return errors.New("Vault address must be set") 361 } 362 363 // Parse the TTL if it is set 364 if v.config.TaskTokenTTL != "" { 365 d, err := time.ParseDuration(v.config.TaskTokenTTL) 366 if err != nil { 367 return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err) 368 } 369 370 if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() { 371 return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL) 372 } 373 374 v.childTTL = v.config.TaskTokenTTL 375 } else { 376 // Default the TaskTokenTTL 377 v.childTTL = defaultTokenTTL 378 } 379 380 // Get the Vault API configuration 381 apiConf, err := v.config.ApiConfig() 382 if err != nil { 383 return fmt.Errorf("Failed to create Vault API config: %v", err) 384 } 385 386 // Create the Vault API client 387 client, err := vapi.NewClient(apiConf) 388 if err != nil { 389 v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err) 390 return err 391 } 392 393 // Set the token and store the client 394 v.token = v.config.Token 395 client.SetToken(v.token) 396 v.client = client 397 v.auth = client.Auth().Token() 398 return nil 399 } 400 401 // establishConnection is used to make first contact with Vault. This should be 402 // called in a go-routine since the connection is retried til the Vault Client 403 // is stopped or the connection is successfully made at which point the renew 404 // loop is started. 405 func (v *vaultClient) establishConnection() { 406 // Create the retry timer and set initial duration to zero so it fires 407 // immediately 408 retryTimer := time.NewTimer(0) 409 410 OUTER: 411 for { 412 select { 413 case <-v.tomb.Dying(): 414 return 415 case <-retryTimer.C: 416 // Ensure the API is reachable 417 if _, err := v.client.Sys().InitStatus(); err != nil { 418 v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v", 419 v.config.ConnectionRetryIntv, err) 420 retryTimer.Reset(v.config.ConnectionRetryIntv) 421 continue OUTER 422 } 423 424 break OUTER 425 } 426 } 427 428 // Retrieve our token, validate it and parse the lease duration 429 if err := v.parseSelfToken(); err != nil { 430 v.logger.Printf("[ERR] vault: failed to validate self token/role and not retrying: %v", err) 431 v.l.Lock() 432 v.connEstablished = false 433 v.connEstablishedErr = err 434 v.l.Unlock() 435 return 436 } 437 438 // Set the wrapping function such that token creation is wrapped now 439 // that we know our role 440 v.client.SetWrappingLookupFunc(v.getWrappingFn()) 441 442 // If we are given a non-root token, start renewing it 443 if v.tokenData.Root && v.tokenData.CreationTTL == 0 { 444 v.logger.Printf("[DEBUG] vault: not renewing token as it is root") 445 } else { 446 v.logger.Printf("[DEBUG] vault: token lease duration is %v", 447 time.Duration(v.tokenData.CreationTTL)*time.Second) 448 v.tomb.Go(wrapNilError(v.renewalLoop)) 449 } 450 451 v.l.Lock() 452 v.connEstablished = true 453 v.connEstablishedErr = nil 454 v.l.Unlock() 455 } 456 457 // renewalLoop runs the renew loop. This should only be called if we are given a 458 // non-root token. 459 func (v *vaultClient) renewalLoop() { 460 // Create the renewal timer and set initial duration to zero so it fires 461 // immediately 462 authRenewTimer := time.NewTimer(0) 463 464 // Backoff is to reduce the rate we try to renew with Vault under error 465 // situations 466 backoff := 0.0 467 468 for { 469 select { 470 case <-v.tomb.Dying(): 471 return 472 case <-authRenewTimer.C: 473 // Renew the token and determine the new expiration 474 err := v.renew() 475 currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second) 476 477 // Successfully renewed 478 if err == nil { 479 // If we take the expiration (lastRenewed + auth duration) and 480 // subtract the current time, we get a duration until expiry. 481 // Set the timer to poke us after half of that time is up. 482 durationUntilRenew := currentExpiration.Sub(time.Now()) / 2 483 484 v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew) 485 authRenewTimer.Reset(durationUntilRenew) 486 487 // Reset any backoff 488 backoff = 0 489 break 490 } 491 492 // Back off, increasing the amount of backoff each time. There are some rules: 493 // 494 // * If we have an existing authentication that is going to expire, 495 // never back off more than half of the amount of time remaining 496 // until expiration 497 // * Never back off more than 30 seconds multiplied by a random 498 // value between 1 and 2 499 // * Use randomness so that many clients won't keep hitting Vault 500 // at the same time 501 502 // Set base values and add some backoff 503 504 v.logger.Printf("[WARN] vault: got error or bad auth, so backing off: %v", err) 505 switch { 506 case backoff < 5: 507 backoff = 5 508 case backoff >= 24: 509 backoff = 30 510 default: 511 backoff = backoff * 1.25 512 } 513 514 // Add randomness 515 backoff = backoff * (1.0 + rand.Float64()) 516 517 maxBackoff := currentExpiration.Sub(time.Now()) / 2 518 if maxBackoff < 0 { 519 // We have failed to renew the token past its expiration. Stop 520 // renewing with Vault. 521 v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client") 522 v.l.Lock() 523 v.connEstablished = false 524 v.connEstablishedErr = err 525 v.l.Unlock() 526 return 527 528 } else if backoff > maxBackoff.Seconds() { 529 backoff = maxBackoff.Seconds() 530 } 531 532 durationUntilRetry := time.Duration(backoff) * time.Second 533 v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry) 534 535 authRenewTimer.Reset(durationUntilRetry) 536 } 537 } 538 } 539 540 // renew attempts to renew our Vault token. If the renewal fails, an error is 541 // returned. This method updates the lastRenewed time 542 func (v *vaultClient) renew() error { 543 // Attempt to renew the token 544 secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL) 545 if err != nil { 546 return err 547 } 548 549 auth := secret.Auth 550 if auth == nil { 551 return fmt.Errorf("renewal successful but not auth information returned") 552 } else if auth.LeaseDuration == 0 { 553 return fmt.Errorf("renewal successful but no lease duration returned") 554 } 555 556 v.lastRenewed = time.Now() 557 v.logger.Printf("[DEBUG] vault: successfully renewed server token") 558 return nil 559 } 560 561 // getWrappingFn returns an appropriate wrapping function for Nomad Servers 562 func (v *vaultClient) getWrappingFn() func(operation, path string) string { 563 createPath := "auth/token/create" 564 role := v.getRole() 565 if role != "" { 566 createPath = fmt.Sprintf("auth/token/create/%s", role) 567 } 568 569 return func(operation, path string) string { 570 // Only wrap the token create operation 571 if operation != "POST" || path != createPath { 572 return "" 573 } 574 575 return vaultTokenCreateTTL 576 } 577 } 578 579 // parseSelfToken looks up the Vault token in Vault and parses its data storing 580 // it in the client. If the token is not valid for Nomads purposes an error is 581 // returned. 582 func (v *vaultClient) parseSelfToken() error { 583 // Get the initial lease duration 584 auth := v.client.Auth().Token() 585 var self *vapi.Secret 586 587 // Try looking up the token using the self endpoint 588 secret, err := auth.LookupSelf() 589 if err != nil { 590 // Try looking up our token directly 591 self, err = auth.Lookup(v.client.Token()) 592 if err != nil { 593 return fmt.Errorf("failed to lookup Vault periodic token: %v", err) 594 } 595 } 596 self = secret 597 598 // Read and parse the fields 599 var data tokenData 600 if err := mapstructure.WeakDecode(self.Data, &data); err != nil { 601 return fmt.Errorf("failed to parse Vault token's data block: %v", err) 602 } 603 604 root := false 605 for _, p := range data.Policies { 606 if p == "root" { 607 root = true 608 break 609 } 610 } 611 612 // Store the token data 613 data.Root = root 614 v.tokenData = &data 615 616 // The criteria that must be met for the token to be valid are as follows: 617 // 1) If token is non-root or is but has a creation ttl 618 // a) The token must be renewable 619 // b) Token must have a non-zero TTL 620 // 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens) 621 // 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens) 622 // 4) If configured to create tokens against a role: 623 // a) Must have read capability for "auth/token/roles/<role_name" (Can just attempt a read) 624 // b) Must have update capability for path "auth/token/create/<role_name>" 625 // c) Role must: 626 // 1) Not allow orphans 627 // 2) Must allow tokens to be renewed 628 // 3) Must not have an explicit max TTL 629 // 4) Must have non-zero period 630 // 5) If not configured against a role, the token must be root 631 632 var mErr multierror.Error 633 role := v.getRole() 634 if !root { 635 // All non-root tokens must be renewable 636 if !data.Renewable { 637 multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root")) 638 } 639 640 // All non-root tokens must have a lease duration 641 if data.CreationTTL == 0 { 642 multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero")) 643 } 644 645 // The lease duration can not be expired 646 if data.TTL == 0 { 647 multierror.Append(&mErr, fmt.Errorf("token TTL is zero")) 648 } 649 650 // There must be a valid role since we aren't root 651 if role == "" { 652 multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token")) 653 } 654 655 } else if data.CreationTTL != 0 { 656 // If the root token has a TTL it must be renewable 657 if !data.Renewable { 658 multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable")) 659 } else if data.TTL == 0 { 660 // If the token has a TTL make sure it has not expired 661 multierror.Append(&mErr, fmt.Errorf("token TTL is zero")) 662 } 663 } 664 665 // Check we have the correct capabilities 666 if err := v.validateCapabilities(role, root); err != nil { 667 multierror.Append(&mErr, err) 668 } 669 670 // If given a role validate it 671 if role != "" { 672 if err := v.validateRole(role); err != nil { 673 multierror.Append(&mErr, err) 674 } 675 } 676 677 return mErr.ErrorOrNil() 678 } 679 680 // getRole returns the role name to be used when creating tokens 681 func (v *vaultClient) getRole() string { 682 if v.config.Role != "" { 683 return v.config.Role 684 } 685 686 return v.tokenData.Role 687 } 688 689 // validateCapabilities checks that Nomad's Vault token has the correct 690 // capabilities. 691 func (v *vaultClient) validateCapabilities(role string, root bool) error { 692 // Check if the token can lookup capabilities. 693 var mErr multierror.Error 694 _, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability) 695 if err != nil { 696 // Check if there is a permission denied 697 if vaultUnrecoverableError.MatchString(err.Error()) { 698 // Since we can't read permissions, we just log a warning that we 699 // can't tell if the Vault token will work 700 msg := fmt.Sprintf("Can not lookup token capabilities. "+ 701 "As such certain operations may fail in the future. "+ 702 "Please give Nomad a Vault token with one of the following "+ 703 "capabilities %q on %q so that the required capabilities can be verified", 704 vaultCapabilitiesCapability, vaultCapabilitiesLookupPath) 705 v.logger.Printf("[WARN] vault: %s", msg) 706 return nil 707 } else { 708 multierror.Append(&mErr, err) 709 } 710 } 711 712 // verify is a helper function that verifies the token has one of the 713 // capabilities on the given path and adds an issue to the error 714 verify := func(path string, requiredCaps []string) { 715 ok, caps, err := v.hasCapability(path, requiredCaps) 716 if err != nil { 717 multierror.Append(&mErr, err) 718 } else if !ok { 719 multierror.Append(&mErr, 720 fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps)) 721 } 722 } 723 724 // Check if we are verifying incoming tokens 725 if !v.config.AllowsUnauthenticated() { 726 verify(vaultTokenLookupPath, vaultTokenLookupCapability) 727 } 728 729 // Verify we can renew our selves tokens 730 verify(vaultTokenRenewPath, vaultTokenRenewCapability) 731 732 // Verify we can revoke tokens 733 verify(vaultTokenRevokePath, vaultTokenRevokeCapability) 734 735 // If we are using a role verify the capability 736 if role != "" { 737 // Verify we can read the role 738 verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability) 739 740 // Verify we can create from the role 741 verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability) 742 } 743 744 return mErr.ErrorOrNil() 745 } 746 747 // hasCapability takes a path and returns whether the token has at least one of 748 // the required capabilities on the given path. It also returns the set of 749 // capabilities the token does have as well as any error that occurred. 750 func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) { 751 caps, err := v.client.Sys().CapabilitiesSelf(path) 752 if err != nil { 753 return false, nil, err 754 } 755 for _, c := range caps { 756 for _, r := range required { 757 if c == r { 758 return true, caps, nil 759 } 760 } 761 } 762 return false, caps, nil 763 } 764 765 // validateRole contacts Vault and checks that the given Vault role is valid for 766 // the purposes of being used by Nomad 767 func (v *vaultClient) validateRole(role string) error { 768 if role == "" { 769 return fmt.Errorf("Invalid empty role name") 770 } 771 772 // Validate the role 773 rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role)) 774 if err != nil { 775 return fmt.Errorf("failed to lookup role %q: %v", role, err) 776 } 777 if rsecret == nil { 778 return fmt.Errorf("Role %q does not exist", role) 779 } 780 781 // Read and parse the fields 782 var data struct { 783 ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"` 784 Orphan bool 785 Period int 786 Renewable bool 787 } 788 if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil { 789 return fmt.Errorf("failed to parse Vault role's data block: %v", err) 790 } 791 792 // Validate the role is acceptable 793 var mErr multierror.Error 794 if data.Orphan { 795 multierror.Append(&mErr, fmt.Errorf("Role must not allow orphans")) 796 } 797 798 if !data.Renewable { 799 multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed")) 800 } 801 802 if data.ExplicitMaxTtl != 0 { 803 multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic.")) 804 } 805 806 if data.Period == 0 { 807 multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic.")) 808 } 809 810 return mErr.ErrorOrNil() 811 } 812 813 // ConnectionEstablished returns whether a connection to Vault has been 814 // established and any error that potentially caused it to be false 815 func (v *vaultClient) ConnectionEstablished() (bool, error) { 816 v.l.Lock() 817 defer v.l.Unlock() 818 return v.connEstablished, v.connEstablishedErr 819 } 820 821 // Enabled returns whether the client is active 822 func (v *vaultClient) Enabled() bool { 823 v.l.Lock() 824 defer v.l.Unlock() 825 return v.config.IsEnabled() 826 } 827 828 // Active returns whether the client is active 829 func (v *vaultClient) Active() bool { 830 return atomic.LoadInt32(&v.active) == 1 831 } 832 833 // CreateToken takes the allocation and task and returns an appropriate Vault 834 // token. The call is rate limited and may be canceled with the passed policy. 835 // When the error is recoverable, it will be of type RecoverableError 836 func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) { 837 if !v.Enabled() { 838 return nil, fmt.Errorf("Vault integration disabled") 839 } 840 if !v.Active() { 841 return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true) 842 } 843 844 // Check if we have established a connection with Vault 845 if established, err := v.ConnectionEstablished(); !established && err == nil { 846 return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 847 } else if !established { 848 return nil, fmt.Errorf("Connection to Vault failed: %v", err) 849 } 850 851 // Track how long the request takes 852 defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now()) 853 854 // Retrieve the Vault block for the task 855 policies := a.Job.VaultPolicies() 856 if policies == nil { 857 return nil, fmt.Errorf("Job doesn't require Vault policies") 858 } 859 tg, ok := policies[a.TaskGroup] 860 if !ok { 861 return nil, fmt.Errorf("Task group does not require Vault policies") 862 } 863 taskVault, ok := tg[task] 864 if !ok { 865 return nil, fmt.Errorf("Task does not require Vault policies") 866 } 867 868 // Build the creation request 869 req := &vapi.TokenCreateRequest{ 870 Policies: taskVault.Policies, 871 Metadata: map[string]string{ 872 "AllocationID": a.ID, 873 "Task": task, 874 "NodeID": a.NodeID, 875 }, 876 TTL: v.childTTL, 877 DisplayName: fmt.Sprintf("%s-%s", a.ID, task), 878 } 879 880 // Ensure we are under our rate limit 881 if err := v.limiter.Wait(ctx); err != nil { 882 return nil, err 883 } 884 885 // Make the request and switch depending on whether we are using a root 886 // token or a role based token 887 var secret *vapi.Secret 888 var err error 889 role := v.getRole() 890 if v.tokenData.Root && role == "" { 891 req.Period = v.childTTL 892 secret, err = v.auth.Create(req) 893 } else { 894 // Make the token using the role 895 secret, err = v.auth.CreateWithRole(req, v.getRole()) 896 } 897 898 // Determine whether it is unrecoverable 899 if err != nil { 900 if vaultUnrecoverableError.MatchString(err.Error()) { 901 return secret, err 902 } 903 904 // The error is recoverable 905 return nil, structs.NewRecoverableError(err, true) 906 } 907 908 return secret, nil 909 } 910 911 // LookupToken takes a Vault token and does a lookup against Vault. The call is 912 // rate limited and may be canceled with passed context. 913 func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) { 914 if !v.Enabled() { 915 return nil, fmt.Errorf("Vault integration disabled") 916 } 917 918 if !v.Active() { 919 return nil, fmt.Errorf("Vault client not active") 920 } 921 922 // Check if we have established a connection with Vault 923 if established, err := v.ConnectionEstablished(); !established && err == nil { 924 return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 925 } else if !established { 926 return nil, fmt.Errorf("Connection to Vault failed: %v", err) 927 } 928 929 // Track how long the request takes 930 defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now()) 931 932 // Ensure we are under our rate limit 933 if err := v.limiter.Wait(ctx); err != nil { 934 return nil, err 935 } 936 937 // Lookup the token 938 return v.auth.Lookup(token) 939 } 940 941 // PoliciesFrom parses the set of policies returned by a token lookup. 942 func PoliciesFrom(s *vapi.Secret) ([]string, error) { 943 if s == nil { 944 return nil, fmt.Errorf("cannot parse nil Vault secret") 945 } 946 var data tokenData 947 if err := mapstructure.WeakDecode(s.Data, &data); err != nil { 948 return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err) 949 } 950 951 return data.Policies, nil 952 } 953 954 // RevokeTokens revokes the passed set of accessors. If committed is set, the 955 // purge function passed to the client is called. If there is an error purging 956 // either because of Vault failures or because of the purge function, the 957 // revocation is retried until the tokens TTL. 958 func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error { 959 if !v.Enabled() { 960 return nil 961 } 962 963 if !v.Active() { 964 return fmt.Errorf("Vault client not active") 965 } 966 967 // Track how long the request takes 968 defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now()) 969 970 // Check if we have established a connection with Vault. If not just add it 971 // to the queue 972 if established, err := v.ConnectionEstablished(); !established && err == nil { 973 // Only bother tracking it for later revocation if the accessor was 974 // committed 975 if committed { 976 v.storeForRevocation(accessors) 977 } 978 979 // Track that we are abandoning these accessors. 980 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors))) 981 return nil 982 } 983 984 // Attempt to revoke immediately and if it fails, add it to the revoke queue 985 err := v.parallelRevoke(ctx, accessors) 986 if err != nil { 987 // If it is uncommitted, it is a best effort revoke as it will shortly 988 // TTL within the cubbyhole and has not been leaked to any outside 989 // system 990 if !committed { 991 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors))) 992 return nil 993 } 994 995 v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt til TTL: %v", err) 996 v.storeForRevocation(accessors) 997 return nil 998 } else if !committed { 999 // Mark that it was revoked but there is nothing to purge so exit 1000 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors))) 1001 return nil 1002 } 1003 1004 if err := v.purgeFn(accessors); err != nil { 1005 v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err) 1006 v.storeForRevocation(accessors) 1007 return nil 1008 } 1009 1010 // Track that it was revoked successfully 1011 metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors))) 1012 1013 return nil 1014 } 1015 1016 // storeForRevocation stores the passed set of accessors for revocation. It 1017 // captrues their effective TTL by storing their create TTL plus the current 1018 // time. 1019 func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) { 1020 v.revLock.Lock() 1021 v.statsLock.Lock() 1022 now := time.Now() 1023 for _, a := range accessors { 1024 v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second) 1025 } 1026 v.stats.TrackedForRevoke = len(v.revoking) 1027 v.statsLock.Unlock() 1028 v.revLock.Unlock() 1029 } 1030 1031 // parallelRevoke revokes the passed VaultAccessors in parallel. 1032 func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error { 1033 if !v.Enabled() { 1034 return fmt.Errorf("Vault integration disabled") 1035 } 1036 1037 if !v.Active() { 1038 return fmt.Errorf("Vault client not active") 1039 } 1040 1041 // Check if we have established a connection with Vault 1042 if established, err := v.ConnectionEstablished(); !established && err == nil { 1043 return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 1044 } else if !established { 1045 return fmt.Errorf("Connection to Vault failed: %v", err) 1046 } 1047 1048 g, pCtx := errgroup.WithContext(ctx) 1049 1050 // Cap the handlers 1051 handlers := len(accessors) 1052 if handlers > maxParallelRevokes { 1053 handlers = maxParallelRevokes 1054 } 1055 1056 // Create the Vault Tokens 1057 input := make(chan *structs.VaultAccessor, handlers) 1058 for i := 0; i < handlers; i++ { 1059 g.Go(func() error { 1060 for { 1061 select { 1062 case va, ok := <-input: 1063 if !ok { 1064 return nil 1065 } 1066 1067 if err := v.auth.RevokeAccessor(va.Accessor); err != nil { 1068 return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err) 1069 } 1070 case <-pCtx.Done(): 1071 return nil 1072 } 1073 } 1074 }) 1075 } 1076 1077 // Send the input 1078 go func() { 1079 defer close(input) 1080 for _, va := range accessors { 1081 select { 1082 case <-pCtx.Done(): 1083 return 1084 case input <- va: 1085 } 1086 } 1087 1088 }() 1089 1090 // Wait for everything to complete 1091 return g.Wait() 1092 } 1093 1094 // revokeDaemon should be called in a goroutine and is used to periodically 1095 // revoke Vault accessors that failed the original revocation 1096 func (v *vaultClient) revokeDaemon() { 1097 ticker := time.NewTicker(vaultRevocationIntv) 1098 defer ticker.Stop() 1099 1100 for { 1101 select { 1102 case <-v.tomb.Dying(): 1103 return 1104 case now := <-ticker.C: 1105 if established, _ := v.ConnectionEstablished(); !established { 1106 continue 1107 } 1108 1109 v.revLock.Lock() 1110 1111 // Fast path 1112 if len(v.revoking) == 0 { 1113 v.revLock.Unlock() 1114 continue 1115 } 1116 1117 // Build the list of allocations that need to revoked while pruning any TTL'd checks 1118 revoking := make([]*structs.VaultAccessor, 0, len(v.revoking)) 1119 for va, ttl := range v.revoking { 1120 if now.After(ttl) { 1121 delete(v.revoking, va) 1122 } else { 1123 revoking = append(revoking, va) 1124 } 1125 } 1126 1127 if err := v.parallelRevoke(context.Background(), revoking); err != nil { 1128 v.logger.Printf("[WARN] vault: background token revocation errored: %v", err) 1129 v.revLock.Unlock() 1130 continue 1131 } 1132 1133 // Unlock before a potentially expensive operation 1134 v.revLock.Unlock() 1135 1136 // Call the passed in token revocation function 1137 if err := v.purgeFn(revoking); err != nil { 1138 // Can continue since revocation is idempotent 1139 v.logger.Printf("[ERR] vault: token revocation errored: %v", err) 1140 continue 1141 } 1142 1143 // Track that tokens were revoked successfully 1144 metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking))) 1145 1146 // Can delete from the tracked list now that we have purged 1147 v.revLock.Lock() 1148 v.statsLock.Lock() 1149 for _, va := range revoking { 1150 delete(v.revoking, va) 1151 } 1152 v.stats.TrackedForRevoke = len(v.revoking) 1153 v.statsLock.Unlock() 1154 v.revLock.Unlock() 1155 1156 } 1157 } 1158 } 1159 1160 // purgeVaultAccessors creates a Raft transaction to remove the passed Vault 1161 // Accessors 1162 func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error { 1163 // Commit this update via Raft 1164 req := structs.VaultAccessorsRequest{Accessors: accessors} 1165 _, _, err := s.raftApply(structs.VaultAccessorDegisterRequestType, req) 1166 return err 1167 } 1168 1169 // wrapNilError is a helper that returns a wrapped function that returns a nil 1170 // error 1171 func wrapNilError(f func()) func() error { 1172 return func() error { 1173 f() 1174 return nil 1175 } 1176 } 1177 1178 // setLimit is used to update the rate limit 1179 func (v *vaultClient) setLimit(l rate.Limit) { 1180 v.l.Lock() 1181 defer v.l.Unlock() 1182 v.limiter = rate.NewLimiter(l, int(l)) 1183 } 1184 1185 // Stats is used to query the state of the blocked eval tracker. 1186 func (v *vaultClient) Stats() *VaultStats { 1187 // Allocate a new stats struct 1188 stats := new(VaultStats) 1189 1190 v.statsLock.RLock() 1191 defer v.statsLock.RUnlock() 1192 1193 // Copy all the stats 1194 stats.TrackedForRevoke = v.stats.TrackedForRevoke 1195 1196 return stats 1197 } 1198 1199 // EmitStats is used to export metrics about the blocked eval tracker while enabled 1200 func (v *vaultClient) EmitStats(period time.Duration, stopCh chan struct{}) { 1201 for { 1202 select { 1203 case <-time.After(period): 1204 stats := v.Stats() 1205 metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke)) 1206 case <-stopCh: 1207 return 1208 } 1209 } 1210 }