github.com/ferranbt/nomad@v0.9.3-0.20190607002617-85c449b7667c/client/vaultclient/vaultclient.go (about) 1 package vaultclient 2 3 import ( 4 "container/heap" 5 "fmt" 6 "math/rand" 7 "net/http" 8 "strings" 9 "sync" 10 "time" 11 12 metrics "github.com/armon/go-metrics" 13 hclog "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/nomad/structs/config" 16 vaultapi "github.com/hashicorp/vault/api" 17 ) 18 19 // TokenDeriverFunc takes in an allocation and a set of tasks and derives a 20 // wrapped token for all the tasks, from the nomad server. All the derived 21 // wrapped tokens will be unwrapped using the vault API client. 22 type TokenDeriverFunc func(*structs.Allocation, []string, *vaultapi.Client) (map[string]string, error) 23 24 // The interface which nomad client uses to interact with vault and 25 // periodically renews the tokens and secrets. 26 type VaultClient interface { 27 // Start initiates the renewal loop of tokens and secrets 28 Start() 29 30 // Stop terminates the renewal loop for tokens and secrets 31 Stop() 32 33 // DeriveToken contacts the nomad server and fetches wrapped tokens for 34 // a set of tasks. The wrapped tokens will be unwrapped using vault and 35 // returned. 36 DeriveToken(*structs.Allocation, []string) (map[string]string, error) 37 38 // GetConsulACL fetches the Consul ACL token required for the task 39 GetConsulACL(string, string) (*vaultapi.Secret, error) 40 41 // RenewToken renews a token with the given increment and adds it to 42 // the min-heap for periodic renewal. 43 RenewToken(string, int) (<-chan error, error) 44 45 // StopRenewToken removes the token from the min-heap, stopping its 46 // renewal. 47 StopRenewToken(string) error 48 49 // RenewLease renews a vault secret's lease and adds the lease 50 // identifier to the min-heap for periodic renewal. 51 RenewLease(string, int) (<-chan error, error) 52 53 // StopRenewLease removes a secret's lease ID from the min-heap, 54 // stopping its renewal. 55 StopRenewLease(string) error 56 } 57 58 // Implementation of VaultClient interface to interact with vault and perform 59 // token and lease renewals periodically. 60 type vaultClient struct { 61 // tokenDeriver is a function pointer passed in by the client to derive 62 // tokens by making RPC calls to the nomad server. The wrapped tokens 63 // returned by the nomad server will be unwrapped by this function 64 // using the vault API client. 65 tokenDeriver TokenDeriverFunc 66 67 // running indicates if the renewal loop is active or not 68 running bool 69 70 // client is the API client to interact with vault 71 client *vaultapi.Client 72 73 // updateCh is the channel to notify heap modifications to the renewal 74 // loop 75 updateCh chan struct{} 76 77 // stopCh is the channel to trigger termination of renewal loop 78 stopCh chan struct{} 79 80 // heap is the min-heap to keep track of both tokens and leases 81 heap *vaultClientHeap 82 83 // config is the configuration to connect to vault 84 config *config.VaultConfig 85 86 lock sync.RWMutex 87 logger hclog.Logger 88 } 89 90 // vaultClientRenewalRequest is a request object for renewal of both tokens and 91 // secret's leases. 92 type vaultClientRenewalRequest struct { 93 // errCh is the channel into which any renewal error will be sent to 94 errCh chan error 95 96 // id is an identifier which represents either a token or a lease 97 id string 98 99 // increment is the duration for which the token or lease should be 100 // renewed for 101 increment int 102 103 // isToken indicates whether the 'id' field is a token or not 104 isToken bool 105 } 106 107 // Element representing an entry in the renewal heap 108 type vaultClientHeapEntry struct { 109 req *vaultClientRenewalRequest 110 next time.Time 111 index int 112 } 113 114 // Wrapper around the actual heap to provide additional semantics on top of 115 // functions provided by the heap interface. In order to achieve that, an 116 // additional map is placed beside the actual heap. This map can be used to 117 // check if an entry is already present in the heap. 118 type vaultClientHeap struct { 119 heapMap map[string]*vaultClientHeapEntry 120 heap vaultDataHeapImp 121 } 122 123 // Data type of the heap 124 type vaultDataHeapImp []*vaultClientHeapEntry 125 126 // NewVaultClient returns a new vault client from the given config. 127 func NewVaultClient(config *config.VaultConfig, logger hclog.Logger, tokenDeriver TokenDeriverFunc) (*vaultClient, error) { 128 if config == nil { 129 return nil, fmt.Errorf("nil vault config") 130 } 131 132 logger = logger.Named("vault") 133 134 c := &vaultClient{ 135 config: config, 136 stopCh: make(chan struct{}), 137 // Update channel should be a buffered channel 138 updateCh: make(chan struct{}, 1), 139 heap: newVaultClientHeap(), 140 logger: logger, 141 tokenDeriver: tokenDeriver, 142 } 143 144 if !config.IsEnabled() { 145 return c, nil 146 } 147 148 // Get the Vault API configuration 149 apiConf, err := config.ApiConfig() 150 if err != nil { 151 logger.Error("error creating vault API config", "error", err) 152 return nil, err 153 } 154 155 // Create the Vault API client 156 client, err := vaultapi.NewClient(apiConf) 157 if err != nil { 158 logger.Error("error creating vault client", "error", err) 159 return nil, err 160 } 161 162 client.SetHeaders(http.Header{ 163 "User-Agent": []string{"hashicorp/nomad"}, 164 }) 165 166 // SetHeaders above will replace all headers, make this call second 167 if config.Namespace != "" { 168 logger.Debug("configuring Vault namespace", "namespace", config.Namespace) 169 client.SetNamespace(config.Namespace) 170 } 171 172 c.client = client 173 174 return c, nil 175 } 176 177 // newVaultClientHeap returns a new vault client heap with both the heap and a 178 // map which is a secondary index for heap elements, both initialized. 179 func newVaultClientHeap() *vaultClientHeap { 180 return &vaultClientHeap{ 181 heapMap: make(map[string]*vaultClientHeapEntry), 182 heap: make(vaultDataHeapImp, 0), 183 } 184 } 185 186 // isTracked returns if a given identifier is already present in the heap and 187 // hence is being renewed. Lock should be held before calling this method. 188 func (c *vaultClient) isTracked(id string) bool { 189 if id == "" { 190 return false 191 } 192 193 _, ok := c.heap.heapMap[id] 194 return ok 195 } 196 197 // isRunning returns true if the client is running. 198 func (c *vaultClient) isRunning() bool { 199 c.lock.RLock() 200 defer c.lock.RUnlock() 201 return c.running 202 } 203 204 // Starts the renewal loop of vault client 205 func (c *vaultClient) Start() { 206 c.lock.Lock() 207 defer c.lock.Unlock() 208 209 if !c.config.IsEnabled() || c.running { 210 return 211 } 212 213 c.running = true 214 215 go c.run() 216 } 217 218 // Stops the renewal loop of vault client 219 func (c *vaultClient) Stop() { 220 c.lock.Lock() 221 defer c.lock.Unlock() 222 223 if !c.config.IsEnabled() || !c.running { 224 return 225 } 226 227 c.running = false 228 close(c.stopCh) 229 } 230 231 // unlockAndUnset is used to unset the vault token on the client and release the 232 // lock. Helper method for deferring a call that does both. 233 func (c *vaultClient) unlockAndUnset() { 234 c.client.SetToken("") 235 c.lock.Unlock() 236 } 237 238 // DeriveToken takes in an allocation and a set of tasks and for each of the 239 // task, it derives a vault token from nomad server and unwraps it using vault. 240 // The return value is a map containing all the unwrapped tokens indexed by the 241 // task name. 242 func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 243 if !c.config.IsEnabled() { 244 return nil, fmt.Errorf("vault client not enabled") 245 } 246 if !c.isRunning() { 247 return nil, fmt.Errorf("vault client is not running") 248 } 249 250 c.lock.Lock() 251 defer c.unlockAndUnset() 252 253 // Use the token supplied to interact with vault 254 c.client.SetToken("") 255 256 tokens, err := c.tokenDeriver(alloc, taskNames, c.client) 257 if err != nil { 258 c.logger.Error("error deriving token", "error", err, "alloc_id", alloc.ID, "task_names", taskNames) 259 return nil, err 260 } 261 262 return tokens, nil 263 } 264 265 // GetConsulACL creates a vault API client and reads from vault a consul ACL 266 // token used by the task. 267 func (c *vaultClient) GetConsulACL(token, path string) (*vaultapi.Secret, error) { 268 if !c.config.IsEnabled() { 269 return nil, fmt.Errorf("vault client not enabled") 270 } 271 if token == "" { 272 return nil, fmt.Errorf("missing token") 273 } 274 if path == "" { 275 return nil, fmt.Errorf("missing consul ACL token vault path") 276 } 277 278 c.lock.Lock() 279 defer c.unlockAndUnset() 280 281 // Use the token supplied to interact with vault 282 c.client.SetToken(token) 283 284 // Read the consul ACL token and return the secret directly 285 return c.client.Logical().Read(path) 286 } 287 288 // RenewToken renews the supplied token for a given duration (in seconds) and 289 // adds it to the min-heap so that it is renewed periodically by the renewal 290 // loop. Any error returned during renewal will be written to a buffered 291 // channel and the channel is returned instead of an actual error. This helps 292 // the caller be notified of a renewal failure asynchronously for appropriate 293 // actions to be taken. The caller of this function need not have to close the 294 // error channel. 295 func (c *vaultClient) RenewToken(token string, increment int) (<-chan error, error) { 296 if token == "" { 297 err := fmt.Errorf("missing token") 298 return nil, err 299 } 300 if increment < 1 { 301 err := fmt.Errorf("increment cannot be less than 1") 302 return nil, err 303 } 304 305 // Create a buffered error channel 306 errCh := make(chan error, 1) 307 308 // Create a renewal request and indicate that the identifier in the 309 // request is a token and not a lease 310 renewalReq := &vaultClientRenewalRequest{ 311 errCh: errCh, 312 id: token, 313 isToken: true, 314 increment: increment, 315 } 316 317 // Perform the renewal of the token and send any error to the dedicated 318 // error channel. 319 if err := c.renew(renewalReq); err != nil { 320 c.logger.Error("error during renewal of token", "error", err) 321 metrics.IncrCounter([]string{"client", "vault", "renew_token_failure"}, 1) 322 return nil, err 323 } 324 325 return errCh, nil 326 } 327 328 // RenewLease renews the supplied lease identifier for a supplied duration (in 329 // seconds) and adds it to the min-heap so that it gets renewed periodically by 330 // the renewal loop. Any error returned during renewal will be written to a 331 // buffered channel and the channel is returned instead of an actual error. 332 // This helps the caller be notified of a renewal failure asynchronously for 333 // appropriate actions to be taken. The caller of this function need not have 334 // to close the error channel. 335 func (c *vaultClient) RenewLease(leaseId string, increment int) (<-chan error, error) { 336 if leaseId == "" { 337 err := fmt.Errorf("missing lease ID") 338 return nil, err 339 } 340 341 if increment < 1 { 342 err := fmt.Errorf("increment cannot be less than 1") 343 return nil, err 344 } 345 346 // Create a buffered error channel 347 errCh := make(chan error, 1) 348 349 // Create a renewal request using the supplied lease and duration 350 renewalReq := &vaultClientRenewalRequest{ 351 errCh: errCh, 352 id: leaseId, 353 increment: increment, 354 } 355 356 // Renew the secret and send any error to the dedicated error channel 357 if err := c.renew(renewalReq); err != nil { 358 c.logger.Error("error during renewal of lease", "error", err) 359 metrics.IncrCounter([]string{"client", "vault", "renew_lease_error"}, 1) 360 return nil, err 361 } 362 363 return errCh, nil 364 } 365 366 // renew is a common method to handle renewal of both tokens and secret leases. 367 // It invokes a token renewal or a secret's lease renewal. If renewal is 368 // successful, min-heap is updated based on the duration after which it needs 369 // renewal again. The next renewal time is randomly selected to avoid spikes in 370 // the number of APIs periodically. 371 func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { 372 c.lock.Lock() 373 defer c.lock.Unlock() 374 375 if req == nil { 376 return fmt.Errorf("nil renewal request") 377 } 378 if req.errCh == nil { 379 return fmt.Errorf("renewal request error channel nil") 380 } 381 382 if !c.config.IsEnabled() { 383 close(req.errCh) 384 return fmt.Errorf("vault client not enabled") 385 } 386 if !c.running { 387 close(req.errCh) 388 return fmt.Errorf("vault client is not running") 389 } 390 if req.id == "" { 391 close(req.errCh) 392 return fmt.Errorf("missing id in renewal request") 393 } 394 if req.increment < 1 { 395 close(req.errCh) 396 return fmt.Errorf("increment cannot be less than 1") 397 } 398 399 var renewalErr error 400 leaseDuration := req.increment 401 if req.isToken { 402 // Set the token in the API client to the one that needs 403 // renewal 404 c.client.SetToken(req.id) 405 406 // Renew the token 407 renewResp, err := c.client.Auth().Token().RenewSelf(req.increment) 408 if err != nil { 409 renewalErr = fmt.Errorf("failed to renew the vault token: %v", err) 410 } else if renewResp == nil || renewResp.Auth == nil { 411 renewalErr = fmt.Errorf("failed to renew the vault token") 412 } else { 413 // Don't set this if renewal fails 414 leaseDuration = renewResp.Auth.LeaseDuration 415 } 416 417 // Reset the token in the API client before returning 418 c.client.SetToken("") 419 } else { 420 // Renew the secret 421 renewResp, err := c.client.Sys().Renew(req.id, req.increment) 422 if err != nil { 423 renewalErr = fmt.Errorf("failed to renew vault secret: %v", err) 424 } else if renewResp == nil { 425 renewalErr = fmt.Errorf("failed to renew vault secret") 426 } else { 427 // Don't set this if renewal fails 428 leaseDuration = renewResp.LeaseDuration 429 } 430 } 431 432 // Determine the next renewal time 433 renewalDuration := renewalTime(rand.Intn, leaseDuration) 434 next := time.Now().Add(renewalDuration) 435 436 fatal := false 437 if renewalErr != nil && 438 (strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") || 439 strings.Contains(renewalErr.Error(), "lease is not renewable") || 440 strings.Contains(renewalErr.Error(), "token not found") || 441 strings.Contains(renewalErr.Error(), "permission denied")) { 442 fatal = true 443 } else if renewalErr != nil { 444 c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration) 445 c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying", 446 "error", renewalErr, "period", next) 447 } 448 449 if c.isTracked(req.id) { 450 if fatal { 451 // If encountered with an error where in a lease or a 452 // token is not valid at all with vault, and if that 453 // item is tracked by the renewal loop, stop renewing 454 // it by removing the corresponding heap entry. 455 if err := c.heap.Remove(req.id); err != nil { 456 return fmt.Errorf("failed to remove heap entry: %v", err) 457 } 458 459 // Report the fatal error to the client 460 req.errCh <- renewalErr 461 close(req.errCh) 462 463 return renewalErr 464 } 465 466 // If the identifier is already tracked, this indicates a 467 // subsequest renewal. In this case, update the existing 468 // element in the heap with the new renewal time. 469 if err := c.heap.Update(req, next); err != nil { 470 return fmt.Errorf("failed to update heap entry. err: %v", err) 471 } 472 473 // There is no need to signal an update to the renewal loop 474 // here because this case is hit from the renewal loop itself. 475 } else { 476 if fatal { 477 // If encountered with an error where in a lease or a 478 // token is not valid at all with vault, and if that 479 // item is not tracked by renewal loop, don't add it. 480 481 // Report the fatal error to the client 482 req.errCh <- renewalErr 483 close(req.errCh) 484 485 return renewalErr 486 } 487 488 // If the identifier is not already tracked, this is a first 489 // renewal request. In this case, add an entry into the heap 490 // with the next renewal time. 491 if err := c.heap.Push(req, next); err != nil { 492 return fmt.Errorf("failed to push an entry to heap. err: %v", err) 493 } 494 495 // Signal an update for the renewal loop to trigger a fresh 496 // computation for the next best candidate for renewal. 497 if c.running { 498 select { 499 case c.updateCh <- struct{}{}: 500 default: 501 } 502 } 503 } 504 505 return nil 506 } 507 508 // run is the renewal loop which performs the periodic renewals of both the 509 // tokens and the secret leases. 510 func (c *vaultClient) run() { 511 if !c.config.IsEnabled() { 512 return 513 } 514 515 var renewalCh <-chan time.Time 516 for c.config.IsEnabled() && c.isRunning() { 517 // Fetches the candidate for next renewal 518 renewalReq, renewalTime := c.nextRenewal() 519 if renewalTime.IsZero() { 520 // If the heap is empty, don't do anything 521 renewalCh = nil 522 } else { 523 now := time.Now() 524 if renewalTime.After(now) { 525 // Compute the duration after which the item 526 // needs renewal and set the renewalCh to fire 527 // at that time. 528 renewalDuration := renewalTime.Sub(time.Now()) 529 renewalCh = time.After(renewalDuration) 530 } else { 531 // If the renewals of multiple items are too 532 // close to each other and by the time the 533 // entry is fetched from heap it might be past 534 // the current time (by a small margin). In 535 // which case, fire immediately. 536 renewalCh = time.After(0) 537 } 538 } 539 540 select { 541 case <-renewalCh: 542 if err := c.renew(renewalReq); err != nil { 543 c.logger.Error("error renewing token", "error", err) 544 metrics.IncrCounter([]string{"client", "vault", "renew_token_error"}, 1) 545 } 546 case <-c.updateCh: 547 continue 548 case <-c.stopCh: 549 c.logger.Debug("stopped") 550 return 551 } 552 } 553 } 554 555 // StopRenewToken removes the item from the heap which represents the given 556 // token. 557 func (c *vaultClient) StopRenewToken(token string) error { 558 return c.stopRenew(token) 559 } 560 561 // StopRenewLease removes the item from the heap which represents the given 562 // lease identifier. 563 func (c *vaultClient) StopRenewLease(leaseId string) error { 564 return c.stopRenew(leaseId) 565 } 566 567 // stopRenew removes the given identifier from the heap and signals the renewal 568 // loop to compute the next best candidate for renewal. 569 func (c *vaultClient) stopRenew(id string) error { 570 c.lock.Lock() 571 defer c.lock.Unlock() 572 573 if !c.isTracked(id) { 574 return nil 575 } 576 577 if err := c.heap.Remove(id); err != nil { 578 return fmt.Errorf("failed to remove heap entry: %v", err) 579 } 580 581 // Signal an update to the renewal loop. 582 if c.running { 583 select { 584 case c.updateCh <- struct{}{}: 585 default: 586 } 587 } 588 589 return nil 590 } 591 592 // nextRenewal returns the root element of the min-heap, which represents the 593 // next element to be renewed and the time at which the renewal needs to be 594 // triggered. 595 func (c *vaultClient) nextRenewal() (*vaultClientRenewalRequest, time.Time) { 596 c.lock.RLock() 597 defer c.lock.RUnlock() 598 599 if c.heap.Length() == 0 { 600 return nil, time.Time{} 601 } 602 603 // Fetches the root element in the min-heap 604 nextEntry := c.heap.Peek() 605 if nextEntry == nil { 606 return nil, time.Time{} 607 } 608 609 return nextEntry.req, nextEntry.next 610 } 611 612 // Additional helper functions on top of interface methods 613 614 // Length returns the number of elements in the heap 615 func (h *vaultClientHeap) Length() int { 616 return len(h.heap) 617 } 618 619 // Returns the root node of the min-heap 620 func (h *vaultClientHeap) Peek() *vaultClientHeapEntry { 621 if len(h.heap) == 0 { 622 return nil 623 } 624 625 return h.heap[0] 626 } 627 628 // Push adds the secondary index and inserts an item into the heap 629 func (h *vaultClientHeap) Push(req *vaultClientRenewalRequest, next time.Time) error { 630 if req == nil { 631 return fmt.Errorf("nil request") 632 } 633 634 if _, ok := h.heapMap[req.id]; ok { 635 return fmt.Errorf("entry %v already exists", req.id) 636 } 637 638 heapEntry := &vaultClientHeapEntry{ 639 req: req, 640 next: next, 641 } 642 h.heapMap[req.id] = heapEntry 643 heap.Push(&h.heap, heapEntry) 644 return nil 645 } 646 647 // Update will modify the existing item in the heap with the new data and the 648 // time, and fixes the heap. 649 func (h *vaultClientHeap) Update(req *vaultClientRenewalRequest, next time.Time) error { 650 if entry, ok := h.heapMap[req.id]; ok { 651 entry.req = req 652 entry.next = next 653 heap.Fix(&h.heap, entry.index) 654 return nil 655 } 656 657 return fmt.Errorf("heap doesn't contain %v", req.id) 658 } 659 660 // Remove will remove an identifier from the secondary index and deletes the 661 // corresponding node from the heap. 662 func (h *vaultClientHeap) Remove(id string) error { 663 if entry, ok := h.heapMap[id]; ok { 664 heap.Remove(&h.heap, entry.index) 665 delete(h.heapMap, id) 666 return nil 667 } 668 669 return fmt.Errorf("heap doesn't contain entry for %v", id) 670 } 671 672 // The heap interface requires the following methods to be implemented. 673 // * Push(x interface{}) // add x as element Len() 674 // * Pop() interface{} // remove and return element Len() - 1. 675 // * sort.Interface 676 // 677 // sort.Interface comprises of the following methods: 678 // * Len() int 679 // * Less(i, j int) bool 680 // * Swap(i, j int) 681 682 // Part of sort.Interface 683 func (h vaultDataHeapImp) Len() int { return len(h) } 684 685 // Part of sort.Interface 686 func (h vaultDataHeapImp) Less(i, j int) bool { 687 // Two zero times should return false. 688 // Otherwise, zero is "greater" than any other time. 689 // (To sort it at the end of the list.) 690 // Sort such that zero times are at the end of the list. 691 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 692 if iZero && jZero { 693 return false 694 } else if iZero { 695 return false 696 } else if jZero { 697 return true 698 } 699 700 return h[i].next.Before(h[j].next) 701 } 702 703 // Part of sort.Interface 704 func (h vaultDataHeapImp) Swap(i, j int) { 705 h[i], h[j] = h[j], h[i] 706 h[i].index = i 707 h[j].index = j 708 } 709 710 // Part of heap.Interface 711 func (h *vaultDataHeapImp) Push(x interface{}) { 712 n := len(*h) 713 entry := x.(*vaultClientHeapEntry) 714 entry.index = n 715 *h = append(*h, entry) 716 } 717 718 // Part of heap.Interface 719 func (h *vaultDataHeapImp) Pop() interface{} { 720 old := *h 721 n := len(old) 722 entry := old[n-1] 723 entry.index = -1 // for safety 724 *h = old[0 : n-1] 725 return entry 726 } 727 728 // randIntn is the function in math/rand needed by renewalTime. A type is used 729 // to ease deterministic testing. 730 type randIntn func(int) int 731 732 // renewalTime returns when a token should be renewed given its leaseDuration 733 // and a randomizer to provide jitter. 734 // 735 // Leases < 1m will be not jitter. 736 func renewalTime(dice randIntn, leaseDuration int) time.Duration { 737 // Start trying to renew at half the lease duration to allow ample time 738 // for latency and retries. 739 renew := leaseDuration / 2 740 741 // Don't bother about introducing randomness if the 742 // leaseDuration is too small. 743 const cutoff = 30 744 if renew < cutoff { 745 return time.Duration(renew) * time.Second 746 } 747 748 // jitter is the amount +/- to vary the renewal time 749 const jitter = 10 750 min := renew - jitter 751 renew = min + dice(jitter*2) 752 753 return time.Duration(renew) * time.Second 754 }