github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/nomad/vault.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "log" 8 "math/rand" 9 "regexp" 10 "sync" 11 "sync/atomic" 12 "time" 13 14 "gopkg.in/tomb.v2" 15 16 metrics "github.com/armon/go-metrics" 17 multierror "github.com/hashicorp/go-multierror" 18 "github.com/hashicorp/nomad/nomad/structs" 19 "github.com/hashicorp/nomad/nomad/structs/config" 20 vapi "github.com/hashicorp/vault/api" 21 "github.com/mitchellh/mapstructure" 22 23 "golang.org/x/sync/errgroup" 24 "golang.org/x/time/rate" 25 ) 26 27 const ( 28 // vaultTokenCreateTTL is the duration the wrapped token for the client is 29 // valid for. The units are in seconds. 30 vaultTokenCreateTTL = "60s" 31 32 // minimumTokenTTL is the minimum Token TTL allowed for child tokens. 33 minimumTokenTTL = 5 * time.Minute 34 35 // defaultTokenTTL is the default Token TTL used when the passed token is a 36 // root token such that child tokens aren't being created against a role 37 // that has defined a TTL 38 defaultTokenTTL = "72h" 39 40 // requestRateLimit is the maximum number of requests per second Nomad will 41 // make against Vault 42 requestRateLimit rate.Limit = 500.0 43 44 // maxParallelRevokes is the maximum number of parallel Vault 45 // token revocation requests 46 maxParallelRevokes = 64 47 48 // vaultRevocationIntv is the interval at which Vault tokens that failed 49 // initial revocation are retried 50 vaultRevocationIntv = 5 * time.Minute 51 52 // vaultCapabilitiesLookupPath is the path to lookup the capabilities of 53 // ones token. 54 vaultCapabilitiesLookupPath = "sys/capabilities-self" 55 56 // vaultTokenRenewPath is the path used to renew our token 57 vaultTokenRenewPath = "auth/token/renew-self" 58 59 // vaultTokenLookupPath is the path used to lookup a token 60 vaultTokenLookupPath = "auth/token/lookup" 61 62 // vaultTokenLookupSelfPath is the path used to lookup self token 63 vaultTokenLookupSelfPath = "auth/token/lookup-self" 64 65 // vaultTokenRevokePath is the path used to revoke a token 66 vaultTokenRevokePath = "auth/token/revoke-accessor" 67 68 // vaultRoleLookupPath is the path to lookup a role 69 vaultRoleLookupPath = "auth/token/roles/%s" 70 71 // vaultRoleCreatePath is the path to create a token from a role 72 vaultTokenRoleCreatePath = "auth/token/create/%s" 73 ) 74 75 var ( 76 // vaultUnrecoverableError matches unrecoverable errors 77 vaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 78 79 // vaultCapabilitiesCapability is the expected capability of Nomad's Vault 80 // token on the the path. The token must have at least one of the 81 // capabilities. 82 vaultCapabilitiesCapability = []string{"update", "root"} 83 84 // vaultTokenRenewCapability is the expected capability Nomad's 85 // Vault token should have on the path. The token must have at least one of 86 // the capabilities. 87 vaultTokenRenewCapability = []string{"update", "root"} 88 89 // vaultTokenLookupCapability is the expected capability Nomad's 90 // Vault token should have on the path. The token must have at least one of 91 // the capabilities. 92 vaultTokenLookupCapability = []string{"update", "root"} 93 94 // vaultTokenLookupSelfCapability is the expected capability Nomad's 95 // Vault token should have on the path. The token must have at least one of 96 // the capabilities. 97 vaultTokenLookupSelfCapability = []string{"update", "root"} 98 99 // vaultTokenRevokeCapability is the expected capability Nomad's 100 // Vault token should have on the path. The token must have at least one of 101 // the capabilities. 102 vaultTokenRevokeCapability = []string{"update", "root"} 103 104 // vaultRoleLookupCapability is the the expected capability Nomad's Vault 105 // token should have on the path. The token must have at least one of the 106 // capabilities. 107 vaultRoleLookupCapability = []string{"read", "root"} 108 109 // vaultTokenRoleCreateCapability is the the expected capability Nomad's Vault 110 // token should have on the path. The token must have at least one of the 111 // capabilities. 112 vaultTokenRoleCreateCapability = []string{"update", "root"} 113 ) 114 115 // VaultClient is the Servers interface for interfacing with Vault 116 type VaultClient interface { 117 // SetActive activates or de-activates the Vault client. When active, token 118 // creation/lookup/revocation operation are allowed. 119 SetActive(active bool) 120 121 // SetConfig updates the config used by the Vault client 122 SetConfig(config *config.VaultConfig) error 123 124 // CreateToken takes an allocation and task and returns an appropriate Vault 125 // Secret 126 CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) 127 128 // LookupToken takes a token string and returns its capabilities. 129 LookupToken(ctx context.Context, token string) (*vapi.Secret, error) 130 131 // RevokeTokens takes a set of tokens accessor and revokes the tokens 132 RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error 133 134 // Stop is used to stop token renewal 135 Stop() 136 137 // Running returns whether the Vault client is running 138 Running() bool 139 140 // Stats returns the Vault clients statistics 141 Stats() *VaultStats 142 143 // EmitStats emits that clients statistics at the given period until stopCh 144 // is called. 145 EmitStats(period time.Duration, stopCh chan struct{}) 146 } 147 148 // VaultStats returns all the stats about Vault tokens created and managed by 149 // Nomad. 150 type VaultStats struct { 151 // TrackedForRevoke is the count of tokens that are being tracked to be 152 // revoked since they could not be immediately revoked. 153 TrackedForRevoke int 154 } 155 156 // PurgeVaultAccessor is called to remove VaultAccessors from the system. If 157 // the function returns an error, the token will still be tracked and revocation 158 // will retry till there is a success 159 type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error 160 161 // tokenData holds the relevant information about the Vault token passed to the 162 // client. 163 type tokenData struct { 164 CreationTTL int `mapstructure:"creation_ttl"` 165 TTL int `mapstructure:"ttl"` 166 Renewable bool `mapstructure:"renewable"` 167 Policies []string `mapstructure:"policies"` 168 Role string `mapstructure:"role"` 169 Root bool 170 } 171 172 // vaultClient is the Servers implementation of the VaultClient interface. The 173 // client renews the PeriodicToken given in the Vault configuration and provides 174 // the Server with the ability to create child tokens and lookup the permissions 175 // of tokens. 176 type vaultClient struct { 177 // limiter is used to rate limit requests to Vault 178 limiter *rate.Limiter 179 180 // client is the Vault API client 181 client *vapi.Client 182 183 // auth is the Vault token auth API client 184 auth *vapi.TokenAuth 185 186 // config is the user passed Vault config 187 config *config.VaultConfig 188 189 // connEstablished marks whether we have an established connection to Vault. 190 connEstablished bool 191 192 // connEstablishedErr marks an error that can occur when establishing a 193 // connection 194 connEstablishedErr error 195 196 // token is the raw token used by the client 197 token string 198 199 // tokenData is the data of the passed Vault token 200 tokenData *tokenData 201 202 // revoking tracks the VaultAccessors that must be revoked 203 revoking map[*structs.VaultAccessor]time.Time 204 purgeFn PurgeVaultAccessorFn 205 revLock sync.Mutex 206 207 // active indicates whether the vaultClient is active. It should be 208 // accessed using a helper and updated atomically 209 active int32 210 211 // running indicates whether the vault client is started. 212 running bool 213 214 // childTTL is the TTL for child tokens. 215 childTTL string 216 217 // lastRenewed is the time the token was last renewed 218 lastRenewed time.Time 219 220 tomb *tomb.Tomb 221 logger *log.Logger 222 223 // stats stores the stats 224 stats *VaultStats 225 statsLock sync.RWMutex 226 227 // l is used to lock the configuration aspects of the client such that 228 // multiple callers can't cause conflicting config updates 229 l sync.Mutex 230 } 231 232 // NewVaultClient returns a Vault client from the given config. If the client 233 // couldn't be made an error is returned. 234 func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) { 235 if c == nil { 236 return nil, fmt.Errorf("must pass valid VaultConfig") 237 } 238 239 if logger == nil { 240 return nil, fmt.Errorf("must pass valid logger") 241 } 242 243 v := &vaultClient{ 244 config: c, 245 logger: logger, 246 limiter: rate.NewLimiter(requestRateLimit, int(requestRateLimit)), 247 revoking: make(map[*structs.VaultAccessor]time.Time), 248 purgeFn: purgeFn, 249 tomb: &tomb.Tomb{}, 250 stats: new(VaultStats), 251 } 252 253 if v.config.IsEnabled() { 254 if err := v.buildClient(); err != nil { 255 return nil, err 256 } 257 258 // Launch the required goroutines 259 v.tomb.Go(wrapNilError(v.establishConnection)) 260 v.tomb.Go(wrapNilError(v.revokeDaemon)) 261 262 v.running = true 263 } 264 265 return v, nil 266 } 267 268 func (v *vaultClient) Stop() { 269 v.l.Lock() 270 running := v.running 271 v.running = false 272 v.l.Unlock() 273 274 if running { 275 v.tomb.Kill(nil) 276 v.tomb.Wait() 277 v.flush() 278 } 279 } 280 281 func (v *vaultClient) Running() bool { 282 v.l.Lock() 283 defer v.l.Unlock() 284 return v.running 285 } 286 287 // SetActive activates or de-activates the Vault client. When active, token 288 // creation/lookup/revocation operation are allowed. All queued revocations are 289 // cancelled if set un-active as it is assumed another instances is taking over 290 func (v *vaultClient) SetActive(active bool) { 291 if active { 292 atomic.StoreInt32(&v.active, 1) 293 } else { 294 atomic.StoreInt32(&v.active, 0) 295 } 296 297 // Clear out the revoking tokens 298 v.revLock.Lock() 299 v.revoking = make(map[*structs.VaultAccessor]time.Time) 300 v.revLock.Unlock() 301 302 return 303 } 304 305 // flush is used to reset the state of the vault client 306 func (v *vaultClient) flush() { 307 v.l.Lock() 308 defer v.l.Unlock() 309 310 v.client = nil 311 v.auth = nil 312 v.connEstablished = false 313 v.connEstablishedErr = nil 314 v.token = "" 315 v.tokenData = nil 316 v.revoking = make(map[*structs.VaultAccessor]time.Time) 317 v.childTTL = "" 318 v.tomb = &tomb.Tomb{} 319 } 320 321 // SetConfig is used to update the Vault config being used. A temporary outage 322 // may occur after calling as it re-establishes a connection to Vault 323 func (v *vaultClient) SetConfig(config *config.VaultConfig) error { 324 if config == nil { 325 return fmt.Errorf("must pass valid VaultConfig") 326 } 327 328 v.l.Lock() 329 defer v.l.Unlock() 330 331 // Kill any background routintes 332 if v.running { 333 // Stop accepting any new request 334 v.connEstablished = false 335 336 // Kill any background routine and create a new tomb 337 v.tomb.Kill(nil) 338 v.tomb.Wait() 339 v.tomb = &tomb.Tomb{} 340 v.running = false 341 } 342 343 // Store the new config 344 v.config = config 345 346 // Check if we should relaunch 347 if v.config.IsEnabled() { 348 // Rebuild the client 349 if err := v.buildClient(); err != nil { 350 return err 351 } 352 353 // Launch the required goroutines 354 v.tomb.Go(wrapNilError(v.establishConnection)) 355 v.tomb.Go(wrapNilError(v.revokeDaemon)) 356 v.running = true 357 } 358 359 return nil 360 } 361 362 // buildClient is used to build a Vault client based on the stored Vault config 363 func (v *vaultClient) buildClient() error { 364 // Validate we have the required fields. 365 if v.config.Token == "" { 366 return errors.New("Vault token must be set") 367 } else if v.config.Addr == "" { 368 return errors.New("Vault address must be set") 369 } 370 371 // Parse the TTL if it is set 372 if v.config.TaskTokenTTL != "" { 373 d, err := time.ParseDuration(v.config.TaskTokenTTL) 374 if err != nil { 375 return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err) 376 } 377 378 if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() { 379 return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL) 380 } 381 382 v.childTTL = v.config.TaskTokenTTL 383 } else { 384 // Default the TaskTokenTTL 385 v.childTTL = defaultTokenTTL 386 } 387 388 // Get the Vault API configuration 389 apiConf, err := v.config.ApiConfig() 390 if err != nil { 391 return fmt.Errorf("Failed to create Vault API config: %v", err) 392 } 393 394 // Create the Vault API client 395 client, err := vapi.NewClient(apiConf) 396 if err != nil { 397 v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err) 398 return err 399 } 400 401 // Set the token and store the client 402 v.token = v.config.Token 403 client.SetToken(v.token) 404 v.client = client 405 v.auth = client.Auth().Token() 406 return nil 407 } 408 409 // establishConnection is used to make first contact with Vault. This should be 410 // called in a go-routine since the connection is retried til the Vault Client 411 // is stopped or the connection is successfully made at which point the renew 412 // loop is started. 413 func (v *vaultClient) establishConnection() { 414 // Create the retry timer and set initial duration to zero so it fires 415 // immediately 416 retryTimer := time.NewTimer(0) 417 418 OUTER: 419 for { 420 select { 421 case <-v.tomb.Dying(): 422 return 423 case <-retryTimer.C: 424 // Ensure the API is reachable 425 if _, err := v.client.Sys().InitStatus(); err != nil { 426 v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v", 427 v.config.ConnectionRetryIntv, err) 428 retryTimer.Reset(v.config.ConnectionRetryIntv) 429 continue OUTER 430 } 431 432 break OUTER 433 } 434 } 435 436 // Retrieve our token, validate it and parse the lease duration 437 if err := v.parseSelfToken(); err != nil { 438 v.logger.Printf("[ERR] vault: failed to validate self token/role and not retrying: %v", err) 439 v.l.Lock() 440 v.connEstablished = false 441 v.connEstablishedErr = err 442 v.l.Unlock() 443 return 444 } 445 446 // Set the wrapping function such that token creation is wrapped now 447 // that we know our role 448 v.client.SetWrappingLookupFunc(v.getWrappingFn()) 449 450 // If we are given a non-root token, start renewing it 451 if v.tokenData.Root && v.tokenData.CreationTTL == 0 { 452 v.logger.Printf("[DEBUG] vault: not renewing token as it is root") 453 } else { 454 v.logger.Printf("[DEBUG] vault: token lease duration is %v", 455 time.Duration(v.tokenData.CreationTTL)*time.Second) 456 v.tomb.Go(wrapNilError(v.renewalLoop)) 457 } 458 459 v.l.Lock() 460 v.connEstablished = true 461 v.connEstablishedErr = nil 462 v.l.Unlock() 463 } 464 465 // renewalLoop runs the renew loop. This should only be called if we are given a 466 // non-root token. 467 func (v *vaultClient) renewalLoop() { 468 // Create the renewal timer and set initial duration to zero so it fires 469 // immediately 470 authRenewTimer := time.NewTimer(0) 471 472 // Backoff is to reduce the rate we try to renew with Vault under error 473 // situations 474 backoff := 0.0 475 476 for { 477 select { 478 case <-v.tomb.Dying(): 479 return 480 case <-authRenewTimer.C: 481 // Renew the token and determine the new expiration 482 err := v.renew() 483 currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second) 484 485 // Successfully renewed 486 if err == nil { 487 // If we take the expiration (lastRenewed + auth duration) and 488 // subtract the current time, we get a duration until expiry. 489 // Set the timer to poke us after half of that time is up. 490 durationUntilRenew := currentExpiration.Sub(time.Now()) / 2 491 492 v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew) 493 authRenewTimer.Reset(durationUntilRenew) 494 495 // Reset any backoff 496 backoff = 0 497 break 498 } 499 500 // Back off, increasing the amount of backoff each time. There are some rules: 501 // 502 // * If we have an existing authentication that is going to expire, 503 // never back off more than half of the amount of time remaining 504 // until expiration 505 // * Never back off more than 30 seconds multiplied by a random 506 // value between 1 and 2 507 // * Use randomness so that many clients won't keep hitting Vault 508 // at the same time 509 510 // Set base values and add some backoff 511 512 v.logger.Printf("[WARN] vault: got error or bad auth, so backing off: %v", err) 513 switch { 514 case backoff < 5: 515 backoff = 5 516 case backoff >= 24: 517 backoff = 30 518 default: 519 backoff = backoff * 1.25 520 } 521 522 // Add randomness 523 backoff = backoff * (1.0 + rand.Float64()) 524 525 maxBackoff := currentExpiration.Sub(time.Now()) / 2 526 if maxBackoff < 0 { 527 // We have failed to renew the token past its expiration. Stop 528 // renewing with Vault. 529 v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client") 530 v.l.Lock() 531 v.connEstablished = false 532 v.connEstablishedErr = err 533 v.l.Unlock() 534 return 535 536 } else if backoff > maxBackoff.Seconds() { 537 backoff = maxBackoff.Seconds() 538 } 539 540 durationUntilRetry := time.Duration(backoff) * time.Second 541 v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry) 542 543 authRenewTimer.Reset(durationUntilRetry) 544 } 545 } 546 } 547 548 // renew attempts to renew our Vault token. If the renewal fails, an error is 549 // returned. This method updates the lastRenewed time 550 func (v *vaultClient) renew() error { 551 // Attempt to renew the token 552 secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL) 553 if err != nil { 554 return err 555 } 556 557 auth := secret.Auth 558 if auth == nil { 559 return fmt.Errorf("renewal successful but not auth information returned") 560 } else if auth.LeaseDuration == 0 { 561 return fmt.Errorf("renewal successful but no lease duration returned") 562 } 563 564 v.lastRenewed = time.Now() 565 v.logger.Printf("[DEBUG] vault: succesfully renewed server token") 566 return nil 567 } 568 569 // getWrappingFn returns an appropriate wrapping function for Nomad Servers 570 func (v *vaultClient) getWrappingFn() func(operation, path string) string { 571 createPath := "auth/token/create" 572 role := v.getRole() 573 if role != "" { 574 createPath = fmt.Sprintf("auth/token/create/%s", role) 575 } 576 577 return func(operation, path string) string { 578 // Only wrap the token create operation 579 if operation != "POST" || path != createPath { 580 return "" 581 } 582 583 return vaultTokenCreateTTL 584 } 585 } 586 587 // parseSelfToken looks up the Vault token in Vault and parses its data storing 588 // it in the client. If the token is not valid for Nomads purposes an error is 589 // returned. 590 func (v *vaultClient) parseSelfToken() error { 591 // Get the initial lease duration 592 auth := v.client.Auth().Token() 593 var self *vapi.Secret 594 595 // Try looking up the token using the self endpoint 596 secret, err := auth.LookupSelf() 597 if err != nil { 598 // Try looking up our token directly 599 self, err = auth.Lookup(v.client.Token()) 600 if err != nil { 601 return fmt.Errorf("failed to lookup Vault periodic token: %v", err) 602 } 603 } 604 self = secret 605 606 // Read and parse the fields 607 var data tokenData 608 if err := mapstructure.WeakDecode(self.Data, &data); err != nil { 609 return fmt.Errorf("failed to parse Vault token's data block: %v", err) 610 } 611 612 root := false 613 for _, p := range data.Policies { 614 if p == "root" { 615 root = true 616 break 617 } 618 } 619 620 // Store the token data 621 data.Root = root 622 v.tokenData = &data 623 624 // The criteria that must be met for the token to be valid are as follows: 625 // 1) If token is non-root or is but has a creation ttl 626 // a) The token must be renewable 627 // b) Token must have a non-zero TTL 628 // 2) Must have update capability for "auth/token/lookup/" (used to verify incoming tokens) 629 // 3) Must have update capability for "/auth/token/revoke-accessor/" (used to revoke unneeded tokens) 630 // 4) If configured to create tokens against a role: 631 // a) Must have read capability for "auth/token/roles/<role_name" (Can just attemp a read) 632 // b) Must have update capability for path "auth/token/create/<role_name>" 633 // c) Role must: 634 // 1) Not allow orphans 635 // 2) Must allow tokens to be renewed 636 // 3) Must not have an explicit max TTL 637 // 4) Must have non-zero period 638 // 5) If not configured against a role, the token must be root 639 640 var mErr multierror.Error 641 role := v.getRole() 642 if !root { 643 // All non-root tokens must be renewable 644 if !data.Renewable { 645 multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root")) 646 } 647 648 // All non-root tokens must have a lease duration 649 if data.CreationTTL == 0 { 650 multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero")) 651 } 652 653 // The lease duration can not be expired 654 if data.TTL == 0 { 655 multierror.Append(&mErr, fmt.Errorf("token TTL is zero")) 656 } 657 658 // There must be a valid role since we aren't root 659 if role == "" { 660 multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token")) 661 } 662 663 } else if data.CreationTTL != 0 { 664 // If the root token has a TTL it must be renewable 665 if !data.Renewable { 666 multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable")) 667 } else if data.TTL == 0 { 668 // If the token has a TTL make sure it has not expired 669 multierror.Append(&mErr, fmt.Errorf("token TTL is zero")) 670 } 671 } 672 673 // Check we have the correct capabilities 674 if err := v.validateCapabilities(role, root); err != nil { 675 multierror.Append(&mErr, err) 676 } 677 678 // If given a role validate it 679 if role != "" { 680 if err := v.validateRole(role); err != nil { 681 multierror.Append(&mErr, err) 682 } 683 } 684 685 return mErr.ErrorOrNil() 686 } 687 688 // getRole returns the role name to be used when creating tokens 689 func (v *vaultClient) getRole() string { 690 if v.config.Role != "" { 691 return v.config.Role 692 } 693 694 return v.tokenData.Role 695 } 696 697 // validateCapabilities checks that Nomad's Vault token has the correct 698 // capabilities. 699 func (v *vaultClient) validateCapabilities(role string, root bool) error { 700 // Check if the token can lookup capabilities. 701 var mErr multierror.Error 702 _, _, err := v.hasCapability(vaultCapabilitiesLookupPath, vaultCapabilitiesCapability) 703 if err != nil { 704 // Check if there is a permission denied 705 if vaultUnrecoverableError.MatchString(err.Error()) { 706 // Since we can't read permissions, we just log a warning that we 707 // can't tell if the Vault token will work 708 msg := fmt.Sprintf("Can not lookup token capabilities. "+ 709 "As such certain operations may fail in the future. "+ 710 "Please give Nomad a Vault token with one of the following "+ 711 "capabilities %q on %q so that the required capabilities can be verified", 712 vaultCapabilitiesCapability, vaultCapabilitiesLookupPath) 713 v.logger.Printf("[WARN] vault: %s", msg) 714 return nil 715 } else { 716 multierror.Append(&mErr, err) 717 } 718 } 719 720 // verify is a helper function that verifies the token has one of the 721 // capabilities on the given path and adds an issue to the error 722 verify := func(path string, requiredCaps []string) { 723 ok, caps, err := v.hasCapability(path, requiredCaps) 724 if err != nil { 725 multierror.Append(&mErr, err) 726 } else if !ok { 727 multierror.Append(&mErr, 728 fmt.Errorf("token must have one of the following capabilities %q on %q; has %v", requiredCaps, path, caps)) 729 } 730 } 731 732 // Check if we are verifying incoming tokens 733 if !v.config.AllowsUnauthenticated() { 734 verify(vaultTokenLookupPath, vaultTokenLookupCapability) 735 } 736 737 // Verify we can renew our selves tokens 738 verify(vaultTokenRenewPath, vaultTokenRenewCapability) 739 740 // Verify we can revoke tokens 741 verify(vaultTokenRevokePath, vaultTokenRevokeCapability) 742 743 // If we are using a role verify the capability 744 if role != "" { 745 // Verify we can read the role 746 verify(fmt.Sprintf(vaultRoleLookupPath, role), vaultRoleLookupCapability) 747 748 // Verify we can create from the role 749 verify(fmt.Sprintf(vaultTokenRoleCreatePath, role), vaultTokenRoleCreateCapability) 750 } 751 752 return mErr.ErrorOrNil() 753 } 754 755 // hasCapability takes a path and returns whether the token has at least one of 756 // the required capabilities on the given path. It also returns the set of 757 // capabilities the token does have as well as any error that occured. 758 func (v *vaultClient) hasCapability(path string, required []string) (bool, []string, error) { 759 caps, err := v.client.Sys().CapabilitiesSelf(path) 760 if err != nil { 761 return false, nil, err 762 } 763 for _, c := range caps { 764 for _, r := range required { 765 if c == r { 766 return true, caps, nil 767 } 768 } 769 } 770 return false, caps, nil 771 } 772 773 // validateRole contacts Vault and checks that the given Vault role is valid for 774 // the purposes of being used by Nomad 775 func (v *vaultClient) validateRole(role string) error { 776 if role == "" { 777 return fmt.Errorf("Invalid empty role name") 778 } 779 780 // Validate the role 781 rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role)) 782 if err != nil { 783 return fmt.Errorf("failed to lookup role %q: %v", role, err) 784 } 785 if rsecret == nil { 786 return fmt.Errorf("Role %q does not exist", role) 787 } 788 789 // Read and parse the fields 790 var data struct { 791 ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"` 792 Orphan bool 793 Period int 794 Renewable bool 795 } 796 if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil { 797 return fmt.Errorf("failed to parse Vault role's data block: %v", err) 798 } 799 800 // Validate the role is acceptable 801 var mErr multierror.Error 802 if data.Orphan { 803 multierror.Append(&mErr, fmt.Errorf("Role must not allow orphans")) 804 } 805 806 if !data.Renewable { 807 multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed")) 808 } 809 810 if data.ExplicitMaxTtl != 0 { 811 multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic.")) 812 } 813 814 if data.Period == 0 { 815 multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic.")) 816 } 817 818 return mErr.ErrorOrNil() 819 } 820 821 // ConnectionEstablished returns whether a connection to Vault has been 822 // established and any error that potentially caused it to be false 823 func (v *vaultClient) ConnectionEstablished() (bool, error) { 824 v.l.Lock() 825 defer v.l.Unlock() 826 return v.connEstablished, v.connEstablishedErr 827 } 828 829 // Enabled returns whether the client is active 830 func (v *vaultClient) Enabled() bool { 831 v.l.Lock() 832 defer v.l.Unlock() 833 return v.config.IsEnabled() 834 } 835 836 // Active returns whether the client is active 837 func (v *vaultClient) Active() bool { 838 return atomic.LoadInt32(&v.active) == 1 839 } 840 841 // CreateToken takes the allocation and task and returns an appropriate Vault 842 // token. The call is rate limited and may be canceled with the passed policy. 843 // When the error is recoverable, it will be of type RecoverableError 844 func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) { 845 if !v.Enabled() { 846 return nil, fmt.Errorf("Vault integration disabled") 847 } 848 if !v.Active() { 849 return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true) 850 } 851 852 // Check if we have established a connection with Vault 853 if established, err := v.ConnectionEstablished(); !established && err == nil { 854 return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 855 } else if !established { 856 return nil, fmt.Errorf("Connection to Vault failed: %v", err) 857 } 858 859 // Track how long the request takes 860 defer metrics.MeasureSince([]string{"nomad", "vault", "create_token"}, time.Now()) 861 862 // Retrieve the Vault block for the task 863 policies := a.Job.VaultPolicies() 864 if policies == nil { 865 return nil, fmt.Errorf("Job doesn't require Vault policies") 866 } 867 tg, ok := policies[a.TaskGroup] 868 if !ok { 869 return nil, fmt.Errorf("Task group does not require Vault policies") 870 } 871 taskVault, ok := tg[task] 872 if !ok { 873 return nil, fmt.Errorf("Task does not require Vault policies") 874 } 875 876 // Build the creation request 877 req := &vapi.TokenCreateRequest{ 878 Policies: taskVault.Policies, 879 Metadata: map[string]string{ 880 "AllocationID": a.ID, 881 "Task": task, 882 "NodeID": a.NodeID, 883 }, 884 TTL: v.childTTL, 885 DisplayName: fmt.Sprintf("%s-%s", a.ID, task), 886 } 887 888 // Ensure we are under our rate limit 889 if err := v.limiter.Wait(ctx); err != nil { 890 return nil, err 891 } 892 893 // Make the request and switch depending on whether we are using a root 894 // token or a role based token 895 var secret *vapi.Secret 896 var err error 897 role := v.getRole() 898 if v.tokenData.Root && role == "" { 899 req.Period = v.childTTL 900 secret, err = v.auth.Create(req) 901 } else { 902 // Make the token using the role 903 secret, err = v.auth.CreateWithRole(req, v.getRole()) 904 } 905 906 // Determine whether it is unrecoverable 907 if err != nil { 908 if vaultUnrecoverableError.MatchString(err.Error()) { 909 return secret, err 910 } 911 912 // The error is recoverable 913 return nil, structs.NewRecoverableError(err, true) 914 } 915 916 return secret, nil 917 } 918 919 // LookupToken takes a Vault token and does a lookup against Vault. The call is 920 // rate limited and may be canceled with passed context. 921 func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) { 922 if !v.Enabled() { 923 return nil, fmt.Errorf("Vault integration disabled") 924 } 925 926 if !v.Active() { 927 return nil, fmt.Errorf("Vault client not active") 928 } 929 930 // Check if we have established a connection with Vault 931 if established, err := v.ConnectionEstablished(); !established && err == nil { 932 return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 933 } else if !established { 934 return nil, fmt.Errorf("Connection to Vault failed: %v", err) 935 } 936 937 // Track how long the request takes 938 defer metrics.MeasureSince([]string{"nomad", "vault", "lookup_token"}, time.Now()) 939 940 // Ensure we are under our rate limit 941 if err := v.limiter.Wait(ctx); err != nil { 942 return nil, err 943 } 944 945 // Lookup the token 946 return v.auth.Lookup(token) 947 } 948 949 // PoliciesFrom parses the set of policies returned by a token lookup. 950 func PoliciesFrom(s *vapi.Secret) ([]string, error) { 951 if s == nil { 952 return nil, fmt.Errorf("cannot parse nil Vault secret") 953 } 954 var data tokenData 955 if err := mapstructure.WeakDecode(s.Data, &data); err != nil { 956 return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err) 957 } 958 959 return data.Policies, nil 960 } 961 962 // RevokeTokens revokes the passed set of accessors. If committed is set, the 963 // purge function passed to the client is called. If there is an error purging 964 // either because of Vault failures or because of the purge function, the 965 // revocation is retried until the tokens TTL. 966 func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error { 967 if !v.Enabled() { 968 return nil 969 } 970 971 if !v.Active() { 972 return fmt.Errorf("Vault client not active") 973 } 974 975 // Track how long the request takes 976 defer metrics.MeasureSince([]string{"nomad", "vault", "revoke_tokens"}, time.Now()) 977 978 // Check if we have established a connection with Vault. If not just add it 979 // to the queue 980 if established, err := v.ConnectionEstablished(); !established && err == nil { 981 // Only bother tracking it for later revocation if the accessor was 982 // committed 983 if committed { 984 v.storeForRevocation(accessors) 985 } 986 987 // Track that we are abandoning these accessors. 988 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors))) 989 return nil 990 } 991 992 // Attempt to revoke immediately and if it fails, add it to the revoke queue 993 err := v.parallelRevoke(ctx, accessors) 994 if err != nil { 995 // If it is uncommitted, it is a best effort revoke as it will shortly 996 // TTL within the cubbyhole and has not been leaked to any outside 997 // system 998 if !committed { 999 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_abandoned"}, float32(len(accessors))) 1000 return nil 1001 } 1002 1003 v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt til TTL: %v", err) 1004 v.storeForRevocation(accessors) 1005 return nil 1006 } else if !committed { 1007 // Mark that it was revoked but there is nothing to purge so exit 1008 metrics.IncrCounter([]string{"nomad", "vault", "undistributed_tokens_revoked"}, float32(len(accessors))) 1009 return nil 1010 } 1011 1012 if err := v.purgeFn(accessors); err != nil { 1013 v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err) 1014 v.storeForRevocation(accessors) 1015 return nil 1016 } 1017 1018 // Track that it was revoked successfully 1019 metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(accessors))) 1020 1021 return nil 1022 } 1023 1024 // storeForRevocation stores the passed set of accessors for revocation. It 1025 // captrues their effective TTL by storing their create TTL plus the current 1026 // time. 1027 func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) { 1028 v.revLock.Lock() 1029 v.statsLock.Lock() 1030 now := time.Now() 1031 for _, a := range accessors { 1032 v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second) 1033 } 1034 v.stats.TrackedForRevoke = len(v.revoking) 1035 v.statsLock.Unlock() 1036 v.revLock.Unlock() 1037 } 1038 1039 // parallelRevoke revokes the passed VaultAccessors in parallel. 1040 func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error { 1041 if !v.Enabled() { 1042 return fmt.Errorf("Vault integration disabled") 1043 } 1044 1045 if !v.Active() { 1046 return fmt.Errorf("Vault client not active") 1047 } 1048 1049 // Check if we have established a connection with Vault 1050 if established, err := v.ConnectionEstablished(); !established && err == nil { 1051 return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 1052 } else if !established { 1053 return fmt.Errorf("Connection to Vault failed: %v", err) 1054 } 1055 1056 g, pCtx := errgroup.WithContext(ctx) 1057 1058 // Cap the handlers 1059 handlers := len(accessors) 1060 if handlers > maxParallelRevokes { 1061 handlers = maxParallelRevokes 1062 } 1063 1064 // Create the Vault Tokens 1065 input := make(chan *structs.VaultAccessor, handlers) 1066 for i := 0; i < handlers; i++ { 1067 g.Go(func() error { 1068 for { 1069 select { 1070 case va, ok := <-input: 1071 if !ok { 1072 return nil 1073 } 1074 1075 if err := v.auth.RevokeAccessor(va.Accessor); err != nil { 1076 return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err) 1077 } 1078 case <-pCtx.Done(): 1079 return nil 1080 } 1081 } 1082 }) 1083 } 1084 1085 // Send the input 1086 go func() { 1087 defer close(input) 1088 for _, va := range accessors { 1089 select { 1090 case <-pCtx.Done(): 1091 return 1092 case input <- va: 1093 } 1094 } 1095 1096 }() 1097 1098 // Wait for everything to complete 1099 return g.Wait() 1100 } 1101 1102 // revokeDaemon should be called in a goroutine and is used to periodically 1103 // revoke Vault accessors that failed the original revocation 1104 func (v *vaultClient) revokeDaemon() { 1105 ticker := time.NewTicker(vaultRevocationIntv) 1106 defer ticker.Stop() 1107 1108 for { 1109 select { 1110 case <-v.tomb.Dying(): 1111 return 1112 case now := <-ticker.C: 1113 if established, _ := v.ConnectionEstablished(); !established { 1114 continue 1115 } 1116 1117 v.revLock.Lock() 1118 1119 // Fast path 1120 if len(v.revoking) == 0 { 1121 v.revLock.Unlock() 1122 continue 1123 } 1124 1125 // Build the list of allocations that need to revoked while pruning any TTL'd checks 1126 revoking := make([]*structs.VaultAccessor, 0, len(v.revoking)) 1127 for va, ttl := range v.revoking { 1128 if now.After(ttl) { 1129 delete(v.revoking, va) 1130 } else { 1131 revoking = append(revoking, va) 1132 } 1133 } 1134 1135 if err := v.parallelRevoke(context.Background(), revoking); err != nil { 1136 v.logger.Printf("[WARN] vault: background token revocation errored: %v", err) 1137 v.revLock.Unlock() 1138 continue 1139 } 1140 1141 // Unlock before a potentially expensive operation 1142 v.revLock.Unlock() 1143 1144 // Call the passed in token revocation function 1145 if err := v.purgeFn(revoking); err != nil { 1146 // Can continue since revocation is idempotent 1147 v.logger.Printf("[ERR] vault: token revocation errored: %v", err) 1148 continue 1149 } 1150 1151 // Track that tokens were revoked successfully 1152 metrics.IncrCounter([]string{"nomad", "vault", "distributed_tokens_revoked"}, float32(len(revoking))) 1153 1154 // Can delete from the tracked list now that we have purged 1155 v.revLock.Lock() 1156 v.statsLock.Lock() 1157 for _, va := range revoking { 1158 delete(v.revoking, va) 1159 } 1160 v.stats.TrackedForRevoke = len(v.revoking) 1161 v.statsLock.Unlock() 1162 v.revLock.Unlock() 1163 1164 } 1165 } 1166 } 1167 1168 // purgeVaultAccessors creates a Raft transaction to remove the passed Vault 1169 // Accessors 1170 func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error { 1171 // Commit this update via Raft 1172 req := structs.VaultAccessorsRequest{Accessors: accessors} 1173 _, _, err := s.raftApply(structs.VaultAccessorDegisterRequestType, req) 1174 return err 1175 } 1176 1177 // wrapNilError is a helper that returns a wrapped function that returns a nil 1178 // error 1179 func wrapNilError(f func()) func() error { 1180 return func() error { 1181 f() 1182 return nil 1183 } 1184 } 1185 1186 // setLimit is used to update the rate limit 1187 func (v *vaultClient) setLimit(l rate.Limit) { 1188 v.l.Lock() 1189 defer v.l.Unlock() 1190 v.limiter = rate.NewLimiter(l, int(l)) 1191 } 1192 1193 // Stats is used to query the state of the blocked eval tracker. 1194 func (v *vaultClient) Stats() *VaultStats { 1195 // Allocate a new stats struct 1196 stats := new(VaultStats) 1197 1198 v.statsLock.RLock() 1199 defer v.statsLock.RUnlock() 1200 1201 // Copy all the stats 1202 stats.TrackedForRevoke = v.stats.TrackedForRevoke 1203 1204 return stats 1205 } 1206 1207 // EmitStats is used to export metrics about the blocked eval tracker while enabled 1208 func (v *vaultClient) EmitStats(period time.Duration, stopCh chan struct{}) { 1209 for { 1210 select { 1211 case <-time.After(period): 1212 stats := v.Stats() 1213 metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke)) 1214 case <-stopCh: 1215 return 1216 } 1217 } 1218 }