github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/vault.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "log" 8 "math/rand" 9 "regexp" 10 "sync" 11 "sync/atomic" 12 "time" 13 14 "gopkg.in/tomb.v2" 15 16 multierror "github.com/hashicorp/go-multierror" 17 "github.com/hashicorp/nomad/nomad/structs" 18 "github.com/hashicorp/nomad/nomad/structs/config" 19 vapi "github.com/hashicorp/vault/api" 20 "github.com/mitchellh/mapstructure" 21 22 "golang.org/x/sync/errgroup" 23 "golang.org/x/time/rate" 24 ) 25 26 const ( 27 // vaultTokenCreateTTL is the duration the wrapped token for the client is 28 // valid for. The units are in seconds. 29 vaultTokenCreateTTL = "60s" 30 31 // minimumTokenTTL is the minimum Token TTL allowed for child tokens. 32 minimumTokenTTL = 5 * time.Minute 33 34 // defaultTokenTTL is the default Token TTL used when the passed token is a 35 // root token such that child tokens aren't being created against a role 36 // that has defined a TTL 37 defaultTokenTTL = "72h" 38 39 // requestRateLimit is the maximum number of requests per second Nomad will 40 // make against Vault 41 requestRateLimit rate.Limit = 500.0 42 43 // maxParallelRevokes is the maximum number of parallel Vault 44 // token revocation requests 45 maxParallelRevokes = 64 46 47 // vaultRevocationIntv is the interval at which Vault tokens that failed 48 // initial revocation are retried 49 vaultRevocationIntv = 5 * time.Minute 50 ) 51 52 var ( 53 // vaultUnrecoverableError matches unrecoverable errors 54 vaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 55 ) 56 57 // VaultClient is the Servers interface for interfacing with Vault 58 type VaultClient interface { 59 // SetActive activates or de-activates the Vault client. When active, token 60 // creation/lookup/revocation operation are allowed. 61 SetActive(active bool) 62 63 // SetConfig updates the config used by the Vault client 64 SetConfig(config *config.VaultConfig) error 65 66 // CreateToken takes an allocation and task and returns an appropriate Vault 67 // Secret 68 CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) 69 70 // LookupToken takes a token string and returns its capabilities. 71 LookupToken(ctx context.Context, token string) (*vapi.Secret, error) 72 73 // RevokeTokens takes a set of tokens accessor and revokes the tokens 74 RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error 75 76 // Stop is used to stop token renewal 77 Stop() 78 } 79 80 // PurgeVaultAccessor is called to remove VaultAccessors from the system. If 81 // the function returns an error, the token will still be tracked and revocation 82 // will retry till there is a success 83 type PurgeVaultAccessorFn func(accessors []*structs.VaultAccessor) error 84 85 // tokenData holds the relevant information about the Vault token passed to the 86 // client. 87 type tokenData struct { 88 CreationTTL int `mapstructure:"creation_ttl"` 89 TTL int `mapstructure:"ttl"` 90 Renewable bool `mapstructure:"renewable"` 91 Policies []string `mapstructure:"policies"` 92 Role string `mapstructure:"role"` 93 Root bool 94 } 95 96 // vaultClient is the Servers implementation of the VaultClient interface. The 97 // client renews the PeriodicToken given in the Vault configuration and provides 98 // the Server with the ability to create child tokens and lookup the permissions 99 // of tokens. 100 type vaultClient struct { 101 // limiter is used to rate limit requests to Vault 102 limiter *rate.Limiter 103 104 // client is the Vault API client 105 client *vapi.Client 106 107 // auth is the Vault token auth API client 108 auth *vapi.TokenAuth 109 110 // config is the user passed Vault config 111 config *config.VaultConfig 112 113 // connEstablished marks whether we have an established connection to Vault. 114 connEstablished bool 115 116 // connEstablishedErr marks an error that can occur when establishing a 117 // connection 118 connEstablishedErr error 119 120 // token is the raw token used by the client 121 token string 122 123 // tokenData is the data of the passed Vault token 124 tokenData *tokenData 125 126 // revoking tracks the VaultAccessors that must be revoked 127 revoking map[*structs.VaultAccessor]time.Time 128 purgeFn PurgeVaultAccessorFn 129 revLock sync.Mutex 130 131 // active indicates whether the vaultClient is active. It should be 132 // accessed using a helper and updated atomically 133 active int32 134 135 // running indicates whether the vault client is started. 136 running bool 137 138 // childTTL is the TTL for child tokens. 139 childTTL string 140 141 // lastRenewed is the time the token was last renewed 142 lastRenewed time.Time 143 144 tomb *tomb.Tomb 145 logger *log.Logger 146 147 // l is used to lock the configuration aspects of the client such that 148 // multiple callers can't cause conflicting config updates 149 l sync.Mutex 150 } 151 152 // NewVaultClient returns a Vault client from the given config. If the client 153 // couldn't be made an error is returned. 154 func NewVaultClient(c *config.VaultConfig, logger *log.Logger, purgeFn PurgeVaultAccessorFn) (*vaultClient, error) { 155 if c == nil { 156 return nil, fmt.Errorf("must pass valid VaultConfig") 157 } 158 159 if logger == nil { 160 return nil, fmt.Errorf("must pass valid logger") 161 } 162 163 v := &vaultClient{ 164 config: c, 165 logger: logger, 166 limiter: rate.NewLimiter(requestRateLimit, int(requestRateLimit)), 167 revoking: make(map[*structs.VaultAccessor]time.Time), 168 purgeFn: purgeFn, 169 tomb: &tomb.Tomb{}, 170 } 171 172 if v.config.IsEnabled() { 173 if err := v.buildClient(); err != nil { 174 return nil, err 175 } 176 177 // Launch the required goroutines 178 v.tomb.Go(wrapNilError(v.establishConnection)) 179 v.tomb.Go(wrapNilError(v.revokeDaemon)) 180 181 v.running = true 182 } 183 184 return v, nil 185 } 186 187 func (v *vaultClient) Stop() { 188 v.l.Lock() 189 running := v.running 190 v.running = false 191 v.l.Unlock() 192 193 if running { 194 v.tomb.Kill(nil) 195 v.tomb.Wait() 196 v.flush() 197 } 198 } 199 200 // SetActive activates or de-activates the Vault client. When active, token 201 // creation/lookup/revocation operation are allowed. All queued revocations are 202 // cancelled if set un-active as it is assumed another instances is taking over 203 func (v *vaultClient) SetActive(active bool) { 204 if active { 205 atomic.StoreInt32(&v.active, 1) 206 } else { 207 atomic.StoreInt32(&v.active, 0) 208 } 209 210 // Clear out the revoking tokens 211 v.revLock.Lock() 212 v.revoking = make(map[*structs.VaultAccessor]time.Time) 213 v.revLock.Unlock() 214 215 return 216 } 217 218 // flush is used to reset the state of the vault client 219 func (v *vaultClient) flush() { 220 v.l.Lock() 221 defer v.l.Unlock() 222 223 v.client = nil 224 v.auth = nil 225 v.connEstablished = false 226 v.connEstablishedErr = nil 227 v.token = "" 228 v.tokenData = nil 229 v.revoking = make(map[*structs.VaultAccessor]time.Time) 230 v.childTTL = "" 231 v.tomb = &tomb.Tomb{} 232 } 233 234 // SetConfig is used to update the Vault config being used. A temporary outage 235 // may occur after calling as it re-establishes a connection to Vault 236 func (v *vaultClient) SetConfig(config *config.VaultConfig) error { 237 if config == nil { 238 return fmt.Errorf("must pass valid VaultConfig") 239 } 240 241 v.l.Lock() 242 defer v.l.Unlock() 243 244 // Store the new config 245 v.config = config 246 247 if v.config.IsEnabled() { 248 // Stop accepting any new request 249 v.connEstablished = false 250 251 // Kill any background routine and create a new tomb 252 v.tomb.Kill(nil) 253 v.tomb.Wait() 254 v.tomb = &tomb.Tomb{} 255 256 // Rebuild the client 257 if err := v.buildClient(); err != nil { 258 v.l.Unlock() 259 return err 260 } 261 262 // Launch the required goroutines 263 v.tomb.Go(wrapNilError(v.establishConnection)) 264 v.tomb.Go(wrapNilError(v.revokeDaemon)) 265 } 266 267 return nil 268 } 269 270 // buildClient is used to build a Vault client based on the stored Vault config 271 func (v *vaultClient) buildClient() error { 272 // Validate we have the required fields. 273 if v.config.Token == "" { 274 return errors.New("Vault token must be set") 275 } else if v.config.Addr == "" { 276 return errors.New("Vault address must be set") 277 } 278 279 // Parse the TTL if it is set 280 if v.config.TaskTokenTTL != "" { 281 d, err := time.ParseDuration(v.config.TaskTokenTTL) 282 if err != nil { 283 return fmt.Errorf("failed to parse TaskTokenTTL %q: %v", v.config.TaskTokenTTL, err) 284 } 285 286 if d.Nanoseconds() < minimumTokenTTL.Nanoseconds() { 287 return fmt.Errorf("ChildTokenTTL is less than minimum allowed of %v", minimumTokenTTL) 288 } 289 290 v.childTTL = v.config.TaskTokenTTL 291 } else { 292 // Default the TaskTokenTTL 293 v.childTTL = defaultTokenTTL 294 } 295 296 // Get the Vault API configuration 297 apiConf, err := v.config.ApiConfig() 298 if err != nil { 299 return fmt.Errorf("Failed to create Vault API config: %v", err) 300 } 301 302 // Create the Vault API client 303 client, err := vapi.NewClient(apiConf) 304 if err != nil { 305 v.logger.Printf("[ERR] vault: failed to create Vault client. Not retrying: %v", err) 306 return err 307 } 308 309 // Set the token and store the client 310 v.token = v.config.Token 311 client.SetToken(v.token) 312 v.client = client 313 v.auth = client.Auth().Token() 314 return nil 315 } 316 317 // establishConnection is used to make first contact with Vault. This should be 318 // called in a go-routine since the connection is retried til the Vault Client 319 // is stopped or the connection is successfully made at which point the renew 320 // loop is started. 321 func (v *vaultClient) establishConnection() { 322 // Create the retry timer and set initial duration to zero so it fires 323 // immediately 324 retryTimer := time.NewTimer(0) 325 326 OUTER: 327 for { 328 select { 329 case <-v.tomb.Dying(): 330 return 331 case <-retryTimer.C: 332 // Ensure the API is reachable 333 if _, err := v.client.Sys().InitStatus(); err != nil { 334 v.logger.Printf("[WARN] vault: failed to contact Vault API. Retrying in %v: %v", 335 v.config.ConnectionRetryIntv, err) 336 retryTimer.Reset(v.config.ConnectionRetryIntv) 337 continue OUTER 338 } 339 340 break OUTER 341 } 342 } 343 344 // Retrieve our token, validate it and parse the lease duration 345 if err := v.parseSelfToken(); err != nil { 346 v.logger.Printf("[ERR] vault: failed to validate self token/role and not retrying: %v", err) 347 v.l.Lock() 348 v.connEstablished = false 349 v.connEstablishedErr = err 350 v.l.Unlock() 351 return 352 } 353 354 // Set the wrapping function such that token creation is wrapped now 355 // that we know our role 356 v.client.SetWrappingLookupFunc(v.getWrappingFn()) 357 358 // If we are given a non-root token, start renewing it 359 if v.tokenData.Root && v.tokenData.CreationTTL == 0 { 360 v.logger.Printf("[DEBUG] vault: not renewing token as it is root") 361 } else { 362 v.logger.Printf("[DEBUG] vault: token lease duration is %v", 363 time.Duration(v.tokenData.CreationTTL)*time.Second) 364 v.tomb.Go(wrapNilError(v.renewalLoop)) 365 } 366 367 v.l.Lock() 368 v.connEstablished = true 369 v.connEstablishedErr = nil 370 v.l.Unlock() 371 } 372 373 // renewalLoop runs the renew loop. This should only be called if we are given a 374 // non-root token. 375 func (v *vaultClient) renewalLoop() { 376 // Create the renewal timer and set initial duration to zero so it fires 377 // immediately 378 authRenewTimer := time.NewTimer(0) 379 380 // Backoff is to reduce the rate we try to renew with Vault under error 381 // situations 382 backoff := 0.0 383 384 for { 385 select { 386 case <-v.tomb.Dying(): 387 return 388 case <-authRenewTimer.C: 389 // Renew the token and determine the new expiration 390 err := v.renew() 391 currentExpiration := v.lastRenewed.Add(time.Duration(v.tokenData.CreationTTL) * time.Second) 392 393 // Successfully renewed 394 if err == nil { 395 // If we take the expiration (lastRenewed + auth duration) and 396 // subtract the current time, we get a duration until expiry. 397 // Set the timer to poke us after half of that time is up. 398 durationUntilRenew := currentExpiration.Sub(time.Now()) / 2 399 400 v.logger.Printf("[INFO] vault: renewing token in %v", durationUntilRenew) 401 authRenewTimer.Reset(durationUntilRenew) 402 403 // Reset any backoff 404 backoff = 0 405 break 406 } 407 408 // Back off, increasing the amount of backoff each time. There are some rules: 409 // 410 // * If we have an existing authentication that is going to expire, 411 // never back off more than half of the amount of time remaining 412 // until expiration 413 // * Never back off more than 30 seconds multiplied by a random 414 // value between 1 and 2 415 // * Use randomness so that many clients won't keep hitting Vault 416 // at the same time 417 418 // Set base values and add some backoff 419 420 v.logger.Printf("[DEBUG] vault: got error or bad auth, so backing off: %v", err) 421 switch { 422 case backoff < 5: 423 backoff = 5 424 case backoff >= 24: 425 backoff = 30 426 default: 427 backoff = backoff * 1.25 428 } 429 430 // Add randomness 431 backoff = backoff * (1.0 + rand.Float64()) 432 433 maxBackoff := currentExpiration.Sub(time.Now()) / 2 434 if maxBackoff < 0 { 435 // We have failed to renew the token past its expiration. Stop 436 // renewing with Vault. 437 v.logger.Printf("[ERR] vault: failed to renew Vault token before lease expiration. Shutting down Vault client") 438 v.l.Lock() 439 v.connEstablished = false 440 v.connEstablishedErr = err 441 v.l.Unlock() 442 return 443 444 } else if backoff > maxBackoff.Seconds() { 445 backoff = maxBackoff.Seconds() 446 } 447 448 durationUntilRetry := time.Duration(backoff) * time.Second 449 v.logger.Printf("[INFO] vault: backing off for %v", durationUntilRetry) 450 451 authRenewTimer.Reset(durationUntilRetry) 452 } 453 } 454 } 455 456 // renew attempts to renew our Vault token. If the renewal fails, an error is 457 // returned. This method updates the lastRenewed time 458 func (v *vaultClient) renew() error { 459 // Attempt to renew the token 460 secret, err := v.auth.RenewSelf(v.tokenData.CreationTTL) 461 if err != nil { 462 return err 463 } 464 465 auth := secret.Auth 466 if auth == nil { 467 return fmt.Errorf("renewal successful but not auth information returned") 468 } else if auth.LeaseDuration == 0 { 469 return fmt.Errorf("renewal successful but no lease duration returned") 470 } 471 472 v.lastRenewed = time.Now() 473 v.logger.Printf("[DEBUG] vault: succesfully renewed server token") 474 return nil 475 } 476 477 // getWrappingFn returns an appropriate wrapping function for Nomad Servers 478 func (v *vaultClient) getWrappingFn() func(operation, path string) string { 479 createPath := "auth/token/create" 480 if !v.tokenData.Root { 481 createPath = fmt.Sprintf("auth/token/create/%s", v.tokenData.Role) 482 } 483 484 return func(operation, path string) string { 485 // Only wrap the token create operation 486 if operation != "POST" || path != createPath { 487 return "" 488 } 489 490 return vaultTokenCreateTTL 491 } 492 } 493 494 // parseSelfToken looks up the Vault token in Vault and parses its data storing 495 // it in the client. If the token is not valid for Nomads purposes an error is 496 // returned. 497 func (v *vaultClient) parseSelfToken() error { 498 // Get the initial lease duration 499 auth := v.client.Auth().Token() 500 self, err := auth.LookupSelf() 501 if err != nil { 502 return fmt.Errorf("failed to lookup Vault periodic token: %v", err) 503 } 504 505 // Read and parse the fields 506 var data tokenData 507 if err := mapstructure.WeakDecode(self.Data, &data); err != nil { 508 return fmt.Errorf("failed to parse Vault token's data block: %v", err) 509 } 510 511 root := false 512 for _, p := range data.Policies { 513 if p == "root" { 514 root = true 515 break 516 } 517 } 518 519 var mErr multierror.Error 520 if !root { 521 // All non-root tokens must be renewable 522 if !data.Renewable { 523 multierror.Append(&mErr, fmt.Errorf("Vault token is not renewable or root")) 524 } 525 526 // All non-root tokens must have a lease duration 527 if data.CreationTTL == 0 { 528 multierror.Append(&mErr, fmt.Errorf("invalid lease duration of zero")) 529 } 530 531 // The lease duration can not be expired 532 if data.TTL == 0 { 533 multierror.Append(&mErr, fmt.Errorf("token TTL is zero")) 534 } 535 536 // There must be a valid role since we aren't root 537 if data.Role == "" { 538 multierror.Append(&mErr, fmt.Errorf("token role name must be set when not using a root token")) 539 } 540 541 } else if data.CreationTTL != 0 { 542 // If the root token has a TTL it must be renewable 543 if !data.Renewable { 544 multierror.Append(&mErr, fmt.Errorf("Vault token has a TTL but is not renewable")) 545 } else if data.TTL == 0 { 546 // If the token has a TTL make sure it has not expired 547 multierror.Append(&mErr, fmt.Errorf("token TTL is zero")) 548 } 549 } 550 551 // If given a role validate it 552 if data.Role != "" { 553 if err := v.validateRole(data.Role); err != nil { 554 multierror.Append(&mErr, err) 555 } 556 } 557 558 data.Root = root 559 v.tokenData = &data 560 return mErr.ErrorOrNil() 561 } 562 563 // validateRole contacts Vault and checks that the given Vault role is valid for 564 // the purposes of being used by Nomad 565 func (v *vaultClient) validateRole(role string) error { 566 if role == "" { 567 return fmt.Errorf("Invalid empty role name") 568 } 569 570 // Validate the role 571 rsecret, err := v.client.Logical().Read(fmt.Sprintf("auth/token/roles/%s", role)) 572 if err != nil { 573 return fmt.Errorf("failed to lookup role %q: %v", role, err) 574 } 575 576 // Read and parse the fields 577 var data struct { 578 ExplicitMaxTtl int `mapstructure:"explicit_max_ttl"` 579 Orphan bool 580 Period int 581 Renewable bool 582 } 583 if err := mapstructure.WeakDecode(rsecret.Data, &data); err != nil { 584 return fmt.Errorf("failed to parse Vault role's data block: %v", err) 585 } 586 587 // Validate the role is acceptable 588 var mErr multierror.Error 589 if data.Orphan { 590 multierror.Append(&mErr, fmt.Errorf("Role must not allow orphans")) 591 } 592 593 if !data.Renewable { 594 multierror.Append(&mErr, fmt.Errorf("Role must allow tokens to be renewed")) 595 } 596 597 if data.ExplicitMaxTtl != 0 { 598 multierror.Append(&mErr, fmt.Errorf("Role can not use an explicit max ttl. Token must be periodic.")) 599 } 600 601 if data.Period == 0 { 602 multierror.Append(&mErr, fmt.Errorf("Role must have a non-zero period to make tokens periodic.")) 603 } 604 605 return mErr.ErrorOrNil() 606 } 607 608 // ConnectionEstablished returns whether a connection to Vault has been 609 // established and any error that potentially caused it to be false 610 func (v *vaultClient) ConnectionEstablished() (bool, error) { 611 v.l.Lock() 612 defer v.l.Unlock() 613 return v.connEstablished, v.connEstablishedErr 614 } 615 616 // Enabled returns whether the client is active 617 func (v *vaultClient) Enabled() bool { 618 v.l.Lock() 619 defer v.l.Unlock() 620 return v.config.IsEnabled() 621 } 622 623 // Active returns whether the client is active 624 func (v *vaultClient) Active() bool { 625 return atomic.LoadInt32(&v.active) == 1 626 } 627 628 // CreateToken takes the allocation and task and returns an appropriate Vault 629 // token. The call is rate limited and may be canceled with the passed policy. 630 // When the error is recoverable, it will be of type RecoverableError 631 func (v *vaultClient) CreateToken(ctx context.Context, a *structs.Allocation, task string) (*vapi.Secret, error) { 632 if !v.Enabled() { 633 return nil, fmt.Errorf("Vault integration disabled") 634 } 635 636 if !v.Active() { 637 return nil, structs.NewRecoverableError(fmt.Errorf("Vault client not active"), true) 638 } 639 640 // Check if we have established a connection with Vault 641 if established, err := v.ConnectionEstablished(); !established && err == nil { 642 return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 643 } else if !established { 644 return nil, fmt.Errorf("Connection to Vault failed: %v", err) 645 } 646 647 // Retrieve the Vault block for the task 648 policies := a.Job.VaultPolicies() 649 if policies == nil { 650 return nil, fmt.Errorf("Job doesn't require Vault policies") 651 } 652 tg, ok := policies[a.TaskGroup] 653 if !ok { 654 return nil, fmt.Errorf("Task group does not require Vault policies") 655 } 656 taskVault, ok := tg[task] 657 if !ok { 658 return nil, fmt.Errorf("Task does not require Vault policies") 659 } 660 661 // Build the creation request 662 req := &vapi.TokenCreateRequest{ 663 Policies: taskVault.Policies, 664 Metadata: map[string]string{ 665 "AllocationID": a.ID, 666 "Task": task, 667 "NodeID": a.NodeID, 668 }, 669 TTL: v.childTTL, 670 DisplayName: fmt.Sprintf("%s-%s", a.ID, task), 671 } 672 673 // Ensure we are under our rate limit 674 if err := v.limiter.Wait(ctx); err != nil { 675 return nil, err 676 } 677 678 // Make the request and switch depending on whether we are using a root 679 // token or a role based token 680 var secret *vapi.Secret 681 var err error 682 if v.tokenData.Root { 683 req.Period = v.childTTL 684 secret, err = v.auth.Create(req) 685 } else { 686 // Make the token using the role 687 secret, err = v.auth.CreateWithRole(req, v.tokenData.Role) 688 } 689 690 // Determine whether it is unrecoverable 691 if err != nil { 692 if vaultUnrecoverableError.MatchString(err.Error()) { 693 return secret, err 694 } 695 696 // The error is recoverable 697 return nil, structs.NewRecoverableError(err, true) 698 } 699 700 return secret, nil 701 } 702 703 // LookupToken takes a Vault token and does a lookup against Vault. The call is 704 // rate limited and may be canceled with passed context. 705 func (v *vaultClient) LookupToken(ctx context.Context, token string) (*vapi.Secret, error) { 706 if !v.Enabled() { 707 return nil, fmt.Errorf("Vault integration disabled") 708 } 709 710 if !v.Active() { 711 return nil, fmt.Errorf("Vault client not active") 712 } 713 714 // Check if we have established a connection with Vault 715 if established, err := v.ConnectionEstablished(); !established && err == nil { 716 return nil, structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 717 } else if !established { 718 return nil, fmt.Errorf("Connection to Vault failed: %v", err) 719 } 720 721 // Ensure we are under our rate limit 722 if err := v.limiter.Wait(ctx); err != nil { 723 return nil, err 724 } 725 726 // Lookup the token 727 return v.auth.Lookup(token) 728 } 729 730 // PoliciesFrom parses the set of policies returned by a token lookup. 731 func PoliciesFrom(s *vapi.Secret) ([]string, error) { 732 if s == nil { 733 return nil, fmt.Errorf("cannot parse nil Vault secret") 734 } 735 var data tokenData 736 if err := mapstructure.WeakDecode(s.Data, &data); err != nil { 737 return nil, fmt.Errorf("failed to parse Vault token's data block: %v", err) 738 } 739 740 return data.Policies, nil 741 } 742 743 // RevokeTokens revokes the passed set of accessors. If committed is set, the 744 // purge function passed to the client is called. If there is an error purging 745 // either because of Vault failures or because of the purge function, the 746 // revocation is retried until the tokens TTL. 747 func (v *vaultClient) RevokeTokens(ctx context.Context, accessors []*structs.VaultAccessor, committed bool) error { 748 if !v.Enabled() { 749 return nil 750 } 751 752 if !v.Active() { 753 return fmt.Errorf("Vault client not active") 754 } 755 756 // Check if we have established a connection with Vault. If not just add it 757 // to the queue 758 if established, err := v.ConnectionEstablished(); !established && err == nil { 759 // Only bother tracking it for later revocation if the accessor was 760 // committed 761 if committed { 762 v.storeForRevocation(accessors) 763 } 764 765 return nil 766 } 767 768 // Attempt to revoke immediately and if it fails, add it to the revoke queue 769 err := v.parallelRevoke(ctx, accessors) 770 if !committed { 771 // If it is uncommitted, it is a best effort revoke as it will shortly 772 // TTL within the cubbyhole and has not been leaked to any outside 773 // system 774 return nil 775 } 776 777 if err != nil { 778 v.logger.Printf("[WARN] vault: failed to revoke tokens. Will reattempt til TTL: %v", err) 779 v.storeForRevocation(accessors) 780 return nil 781 } 782 783 if err := v.purgeFn(accessors); err != nil { 784 v.logger.Printf("[ERR] vault: failed to purge Vault accessors: %v", err) 785 v.storeForRevocation(accessors) 786 return nil 787 } 788 789 return nil 790 } 791 792 // storeForRevocation stores the passed set of accessors for revocation. It 793 // captrues their effective TTL by storing their create TTL plus the current 794 // time. 795 func (v *vaultClient) storeForRevocation(accessors []*structs.VaultAccessor) { 796 v.revLock.Lock() 797 now := time.Now() 798 for _, a := range accessors { 799 v.revoking[a] = now.Add(time.Duration(a.CreationTTL) * time.Second) 800 } 801 v.revLock.Unlock() 802 } 803 804 // parallelRevoke revokes the passed VaultAccessors in parallel. 805 func (v *vaultClient) parallelRevoke(ctx context.Context, accessors []*structs.VaultAccessor) error { 806 if !v.Enabled() { 807 return fmt.Errorf("Vault integration disabled") 808 } 809 810 if !v.Active() { 811 return fmt.Errorf("Vault client not active") 812 } 813 814 // Check if we have established a connection with Vault 815 if established, err := v.ConnectionEstablished(); !established && err == nil { 816 return structs.NewRecoverableError(fmt.Errorf("Connection to Vault has not been established"), true) 817 } else if !established { 818 return fmt.Errorf("Connection to Vault failed: %v", err) 819 } 820 821 g, pCtx := errgroup.WithContext(ctx) 822 823 // Cap the handlers 824 handlers := len(accessors) 825 if handlers > maxParallelRevokes { 826 handlers = maxParallelRevokes 827 } 828 829 // Create the Vault Tokens 830 input := make(chan *structs.VaultAccessor, handlers) 831 for i := 0; i < handlers; i++ { 832 g.Go(func() error { 833 for { 834 select { 835 case va, ok := <-input: 836 if !ok { 837 return nil 838 } 839 840 if err := v.auth.RevokeAccessor(va.Accessor); err != nil { 841 return fmt.Errorf("failed to revoke token (alloc: %q, node: %q, task: %q): %v", va.AllocID, va.NodeID, va.Task, err) 842 } 843 case <-pCtx.Done(): 844 return nil 845 } 846 } 847 }) 848 } 849 850 // Send the input 851 go func() { 852 defer close(input) 853 for _, va := range accessors { 854 select { 855 case <-pCtx.Done(): 856 return 857 case input <- va: 858 } 859 } 860 861 }() 862 863 // Wait for everything to complete 864 return g.Wait() 865 } 866 867 // revokeDaemon should be called in a goroutine and is used to periodically 868 // revoke Vault accessors that failed the original revocation 869 func (v *vaultClient) revokeDaemon() { 870 ticker := time.NewTicker(vaultRevocationIntv) 871 defer ticker.Stop() 872 873 for { 874 select { 875 case <-v.tomb.Dying(): 876 return 877 case now := <-ticker.C: 878 if established, _ := v.ConnectionEstablished(); !established { 879 continue 880 } 881 882 v.revLock.Lock() 883 884 // Fast path 885 if len(v.revoking) == 0 { 886 v.revLock.Unlock() 887 continue 888 } 889 890 // Build the list of allocations that need to revoked while pruning any TTL'd checks 891 revoking := make([]*structs.VaultAccessor, 0, len(v.revoking)) 892 for va, ttl := range v.revoking { 893 if now.After(ttl) { 894 delete(v.revoking, va) 895 } else { 896 revoking = append(revoking, va) 897 } 898 } 899 900 if err := v.parallelRevoke(context.Background(), revoking); err != nil { 901 v.logger.Printf("[WARN] vault: background token revocation errored: %v", err) 902 v.revLock.Unlock() 903 continue 904 } 905 906 // Unlock before a potentially expensive operation 907 v.revLock.Unlock() 908 909 // Call the passed in token revocation function 910 if err := v.purgeFn(revoking); err != nil { 911 // Can continue since revocation is idempotent 912 v.logger.Printf("[ERR] vault: token revocation errored: %v", err) 913 continue 914 } 915 916 // Can delete from the tracked list now that we have purged 917 v.revLock.Lock() 918 for _, va := range revoking { 919 delete(v.revoking, va) 920 } 921 v.revLock.Unlock() 922 } 923 } 924 } 925 926 // purgeVaultAccessors creates a Raft transaction to remove the passed Vault 927 // Accessors 928 func (s *Server) purgeVaultAccessors(accessors []*structs.VaultAccessor) error { 929 // Commit this update via Raft 930 req := structs.VaultAccessorsRequest{Accessors: accessors} 931 _, _, err := s.raftApply(structs.VaultAccessorDegisterRequestType, req) 932 return err 933 } 934 935 // wrapNilError is a helper that returns a wrapped function that returns a nil 936 // error 937 func wrapNilError(f func()) func() error { 938 return func() error { 939 f() 940 return nil 941 } 942 } 943 944 // setLimit is used to update the rate limit 945 func (v *vaultClient) setLimit(l rate.Limit) { 946 v.l.Lock() 947 defer v.l.Unlock() 948 v.limiter = rate.NewLimiter(l, int(l)) 949 }