github.com/mattyr/nomad@v0.3.3-0.20160919021406-3485a065154a/client/vaultclient/vaultclient.go (about) 1 package vaultclient 2 3 import ( 4 "container/heap" 5 "fmt" 6 "log" 7 "math/rand" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/nomad/nomad/structs" 13 "github.com/hashicorp/nomad/nomad/structs/config" 14 vaultapi "github.com/hashicorp/vault/api" 15 ) 16 17 // TokenDeriverFunc takes in an allocation and a set of tasks and derives a 18 // wrapped token for all the tasks, from the nomad server. All the derived 19 // wrapped tokens will be unwrapped using the vault API client. 20 type TokenDeriverFunc func(*structs.Allocation, []string, *vaultapi.Client) (map[string]string, error) 21 22 // The interface which nomad client uses to interact with vault and 23 // periodically renews the tokens and secrets. 24 type VaultClient interface { 25 // Start initiates the renewal loop of tokens and secrets 26 Start() 27 28 // Stop terminates the renewal loop for tokens and secrets 29 Stop() 30 31 // DeriveToken contacts the nomad server and fetches wrapped tokens for 32 // a set of tasks. The wrapped tokens will be unwrapped using vault and 33 // returned. 34 DeriveToken(*structs.Allocation, []string) (map[string]string, error) 35 36 // GetConsulACL fetches the Consul ACL token required for the task 37 GetConsulACL(string, string) (*vaultapi.Secret, error) 38 39 // RenewToken renews a token with the given increment and adds it to 40 // the min-heap for periodic renewal. 41 RenewToken(string, int) <-chan error 42 43 // StopRenewToken removes the token from the min-heap, stopping its 44 // renewal. 45 StopRenewToken(string) error 46 47 // RenewLease renews a vault secret's lease and adds the lease 48 // identifier to the min-heap for periodic renewal. 49 RenewLease(string, int) <-chan error 50 51 // StopRenewLease removes a secret's lease ID from the min-heap, 52 // stopping its renewal. 53 StopRenewLease(string) error 54 } 55 56 // Implementation of VaultClient interface to interact with vault and perform 57 // token and lease renewals periodically. 58 type vaultClient struct { 59 // tokenDeriver is a function pointer passed in by the client to derive 60 // tokens by making RPC calls to the nomad server. The wrapped tokens 61 // returned by the nomad server will be unwrapped by this function 62 // using the vault API client. 63 tokenDeriver TokenDeriverFunc 64 65 // running indicates if the renewal loop is active or not 66 running bool 67 68 // connEstablished marks whether the connection to vault was successful 69 // or not 70 connEstablished bool 71 72 // tokenData is the data of the passed VaultClient token 73 token *tokenData 74 75 // client is the API client to interact with vault 76 client *vaultapi.Client 77 78 // updateCh is the channel to notify heap modifications to the renewal 79 // loop 80 updateCh chan struct{} 81 82 // stopCh is the channel to trigger termination of renewal loop 83 stopCh chan struct{} 84 85 // heap is the min-heap to keep track of both tokens and leases 86 heap *vaultClientHeap 87 88 // config is the configuration to connect to vault 89 config *config.VaultConfig 90 91 lock sync.RWMutex 92 logger *log.Logger 93 } 94 95 // tokenData holds the relevant information about the Vault token passed to the 96 // client. 97 type tokenData struct { 98 CreationTTL int `mapstructure:"creation_ttl"` 99 TTL int `mapstructure:"ttl"` 100 Renewable bool `mapstructure:"renewable"` 101 Policies []string `mapstructure:"policies"` 102 Role string `mapstructure:"role"` 103 Root bool 104 } 105 106 // vaultClientRenewalRequest is a request object for renewal of both tokens and 107 // secret's leases. 108 type vaultClientRenewalRequest struct { 109 // errCh is the channel into which any renewal error will be sent to 110 errCh chan error 111 112 // id is an identifier which represents either a token or a lease 113 id string 114 115 // increment is the duration for which the token or lease should be 116 // renewed for 117 increment int 118 119 // isToken indicates whether the 'id' field is a token or not 120 isToken bool 121 } 122 123 // Element representing an entry in the renewal heap 124 type vaultClientHeapEntry struct { 125 req *vaultClientRenewalRequest 126 next time.Time 127 index int 128 } 129 130 // Wrapper around the actual heap to provide additional symantics on top of 131 // functions provided by the heap interface. In order to achieve that, an 132 // additional map is placed beside the actual heap. This map can be used to 133 // check if an entry is already present in the heap. 134 type vaultClientHeap struct { 135 heapMap map[string]*vaultClientHeapEntry 136 heap vaultDataHeapImp 137 } 138 139 // Data type of the heap 140 type vaultDataHeapImp []*vaultClientHeapEntry 141 142 // NewVaultClient returns a new vault client from the given config. 143 func NewVaultClient(config *config.VaultConfig, logger *log.Logger, tokenDeriver TokenDeriverFunc) (*vaultClient, error) { 144 if config == nil { 145 return nil, fmt.Errorf("nil vault config") 146 } 147 148 if !config.Enabled { 149 return nil, nil 150 } 151 152 if logger == nil { 153 return nil, fmt.Errorf("nil logger") 154 } 155 156 c := &vaultClient{ 157 config: config, 158 stopCh: make(chan struct{}), 159 // Update channel should be a buffered channel 160 updateCh: make(chan struct{}, 1), 161 heap: newVaultClientHeap(), 162 logger: logger, 163 } 164 165 // Get the Vault API configuration 166 apiConf, err := config.ApiConfig() 167 if err != nil { 168 logger.Printf("[ERR] client.vault: failed to create vault API config: %v", err) 169 return nil, err 170 } 171 172 // Create the Vault API client 173 client, err := vaultapi.NewClient(apiConf) 174 if err != nil { 175 logger.Printf("[ERR] client.vault: failed to create Vault client. Not retrying: %v", err) 176 return nil, err 177 } 178 179 c.client = client 180 181 return c, nil 182 } 183 184 // newVaultClientHeap returns a new vault client heap with both the heap and a 185 // map which is a secondary index for heap elements, both initialized. 186 func newVaultClientHeap() *vaultClientHeap { 187 return &vaultClientHeap{ 188 heapMap: make(map[string]*vaultClientHeapEntry), 189 heap: make(vaultDataHeapImp, 0), 190 } 191 } 192 193 // isTracked returns if a given identifier is already present in the heap and 194 // hence is being renewed. Lock should be held before calling this method. 195 func (c *vaultClient) isTracked(id string) bool { 196 if id == "" { 197 return false 198 } 199 200 _, ok := c.heap.heapMap[id] 201 return ok 202 } 203 204 // Starts the renewal loop of vault client 205 func (c *vaultClient) Start() { 206 if !c.config.Enabled || c.running { 207 return 208 } 209 210 c.logger.Printf("[DEBUG] client.vault: establishing connection to vault") 211 go c.establishConnection() 212 } 213 214 // ConnectionEstablished indicates whether VaultClient successfully established 215 // connection to vault or not 216 func (c *vaultClient) ConnectionEstablished() bool { 217 c.lock.RLock() 218 defer c.lock.RUnlock() 219 return c.connEstablished 220 } 221 222 // establishConnection is used to make first contact with Vault. This should be 223 // called in a go-routine since the connection is retried till the Vault Client 224 // is stopped or the connection is successfully made at which point the renew 225 // loop is started. 226 func (c *vaultClient) establishConnection() { 227 // Create the retry timer and set initial duration to zero so it fires 228 // immediately 229 retryTimer := time.NewTimer(0) 230 231 OUTER: 232 for { 233 select { 234 case <-c.stopCh: 235 return 236 case <-retryTimer.C: 237 // Ensure the API is reachable 238 if _, err := c.client.Sys().InitStatus(); err != nil { 239 c.logger.Printf("[WARN] client.vault: failed to contact Vault API. Retrying in %v: %v", 240 c.config.ConnectionRetryIntv, err) 241 retryTimer.Reset(c.config.ConnectionRetryIntv) 242 continue OUTER 243 } 244 245 break OUTER 246 } 247 } 248 249 c.lock.Lock() 250 c.connEstablished = true 251 c.lock.Unlock() 252 253 // Begin the renewal loop 254 go c.run() 255 c.logger.Printf("[DEBUG] client.vault: started") 256 } 257 258 // Stops the renewal loop of vault client 259 func (c *vaultClient) Stop() { 260 if !c.config.Enabled || !c.running { 261 return 262 } 263 264 c.lock.Lock() 265 defer c.lock.Unlock() 266 267 c.running = false 268 close(c.stopCh) 269 } 270 271 // DeriveToken takes in an allocation and a set of tasks and for each of the 272 // task, it derives a vault token from nomad server and unwraps it using vault. 273 // The return value is a map containing all the unwrapped tokens indexed by the 274 // task name. 275 func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 276 if !c.running { 277 return nil, fmt.Errorf("vault client is not running") 278 } 279 280 return c.tokenDeriver(alloc, taskNames, c.client) 281 } 282 283 // GetConsulACL creates a vault API client and reads from vault a consul ACL 284 // token used by the task. 285 func (c *vaultClient) GetConsulACL(token, path string) (*vaultapi.Secret, error) { 286 if token == "" { 287 return nil, fmt.Errorf("missing token") 288 } 289 if path == "" { 290 return nil, fmt.Errorf("missing consul ACL token vault path") 291 } 292 293 if !c.ConnectionEstablished() { 294 return nil, fmt.Errorf("connection with vault is not yet established") 295 } 296 297 c.lock.Lock() 298 defer c.lock.Unlock() 299 300 // Use the token supplied to interact with vault 301 c.client.SetToken(token) 302 303 // Reset the token before returning 304 defer c.client.SetToken("") 305 306 // Read the consul ACL token and return the secret directly 307 return c.client.Logical().Read(path) 308 } 309 310 // RenewToken renews the supplied token for a given duration (in seconds) and 311 // adds it to the min-heap so that it is renewed periodically by the renewal 312 // loop. Any error returned during renewal will be written to a buffered 313 // channel and the channel is returned instead of an actual error. This helps 314 // the caller be notified of a renewal failure asynchronously for appropriate 315 // actions to be taken. The caller of this function need not have to close the 316 // error channel. 317 func (c *vaultClient) RenewToken(token string, increment int) <-chan error { 318 // Create a buffered error channel 319 errCh := make(chan error, 1) 320 321 if token == "" { 322 errCh <- fmt.Errorf("missing token") 323 close(errCh) 324 return errCh 325 } 326 if increment < 1 { 327 errCh <- fmt.Errorf("increment cannot be less than 1") 328 close(errCh) 329 return errCh 330 } 331 332 // Create a renewal request and indicate that the identifier in the 333 // request is a token and not a lease 334 renewalReq := &vaultClientRenewalRequest{ 335 errCh: errCh, 336 id: token, 337 isToken: true, 338 increment: increment, 339 } 340 341 // Perform the renewal of the token and send any error to the dedicated 342 // error channel. 343 if err := c.renew(renewalReq); err != nil { 344 c.logger.Printf("[ERR] client.vault: renewal of token failed: %v", err) 345 } 346 347 return errCh 348 } 349 350 // RenewLease renews the supplied lease identifier for a supplied duration (in 351 // seconds) and adds it to the min-heap so that it gets renewed periodically by 352 // the renewal loop. Any error returned during renewal will be written to a 353 // buffered channel and the channel is returned instead of an actual error. 354 // This helps the caller be notified of a renewal failure asynchronously for 355 // appropriate actions to be taken. The caller of this function need not have 356 // to close the error channel. 357 func (c *vaultClient) RenewLease(leaseId string, increment int) <-chan error { 358 c.logger.Printf("[DEBUG] client.vault: renewing lease %q", leaseId) 359 // Create a buffered error channel 360 errCh := make(chan error, 1) 361 362 if leaseId == "" { 363 errCh <- fmt.Errorf("missing lease ID") 364 close(errCh) 365 return errCh 366 } 367 368 if increment < 1 { 369 errCh <- fmt.Errorf("increment cannot be less than 1") 370 close(errCh) 371 return errCh 372 } 373 374 // Create a renewal request using the supplied lease and duration 375 renewalReq := &vaultClientRenewalRequest{ 376 errCh: errCh, 377 id: leaseId, 378 increment: increment, 379 } 380 381 // Renew the secret and send any error to the dedicated error channel 382 if err := c.renew(renewalReq); err != nil { 383 c.logger.Printf("[ERR] client.vault: renewal of lease failed: %v", err) 384 } 385 386 return errCh 387 } 388 389 // renew is a common method to handle renewal of both tokens and secret leases. 390 // It invokes a token renewal or a secret's lease renewal. If renewal is 391 // successful, min-heap is updated based on the duration after which it needs 392 // renewal again. The next renewal time is randomly selected to avoid spikes in 393 // the number of APIs periodically. 394 func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { 395 c.lock.Lock() 396 defer c.lock.Unlock() 397 398 if !c.running { 399 return fmt.Errorf("vault client is not running") 400 } 401 402 if req == nil { 403 return fmt.Errorf("nil renewal request") 404 } 405 if req.id == "" { 406 return fmt.Errorf("missing id in renewal request") 407 } 408 if req.increment < 1 { 409 return fmt.Errorf("increment cannot be less than 1") 410 } 411 412 var renewalErr error 413 leaseDuration := req.increment 414 if req.isToken { 415 // Reset the token in the API client before returning 416 defer c.client.SetToken("") 417 418 // Set the token in the API client to the one that needs 419 // renewal 420 c.client.SetToken(req.id) 421 422 // Renew the token 423 renewResp, err := c.client.Auth().Token().RenewSelf(req.increment) 424 if err != nil { 425 renewalErr = fmt.Errorf("failed to renew the vault token: %v", err) 426 } 427 if renewResp == nil || renewResp.Auth == nil { 428 renewalErr = fmt.Errorf("failed to renew the vault token") 429 } else { 430 // Don't set this if renewal fails 431 leaseDuration = renewResp.Auth.LeaseDuration 432 } 433 } else { 434 // Renew the secret 435 renewResp, err := c.client.Sys().Renew(req.id, req.increment) 436 if err != nil { 437 renewalErr = fmt.Errorf("failed to renew vault secret: %v", err) 438 } 439 if renewResp == nil { 440 renewalErr = fmt.Errorf("failed to renew vault secret") 441 } else { 442 // Don't set this if renewal fails 443 leaseDuration = renewResp.LeaseDuration 444 } 445 } 446 447 duration := leaseDuration / 2 448 switch { 449 case leaseDuration < 30: 450 // Don't bother about introducing randomness if the 451 // leaseDuration is too small. 452 default: 453 // Give a breathing space of 20 seconds 454 min := 10 455 max := leaseDuration - min 456 rand.Seed(time.Now().Unix()) 457 duration = min + rand.Intn(max-min) 458 } 459 460 // Determine the next renewal time 461 next := time.Now().Add(time.Duration(duration) * time.Second) 462 463 fatal := false 464 if renewalErr != nil && 465 (strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") || 466 strings.Contains(renewalErr.Error(), "token not found")) { 467 fatal = true 468 } else if renewalErr != nil { 469 c.logger.Printf("[DEBUG] client.vault: req.increment: %d, leaseDuration: %d, duration: %d", req.increment, leaseDuration, duration) 470 c.logger.Printf("[ERR] client.vault: renewal of lease or token failed due to a non-fatal error. Retrying at %v", next.String()) 471 } 472 473 if c.isTracked(req.id) { 474 if fatal { 475 // If encountered with an error where in a lease or a 476 // token is not valid at all with vault, and if that 477 // item is tracked by the renewal loop, stop renewing 478 // it by removing the corresponding heap entry. 479 if err := c.heap.Remove(req.id); err != nil { 480 return fmt.Errorf("failed to remove heap entry. err: %v", err) 481 } 482 delete(c.heap.heapMap, req.id) 483 484 // Report the fatal error to the client 485 req.errCh <- renewalErr 486 close(req.errCh) 487 488 return renewalErr 489 } 490 491 // If the identifier is already tracked, this indicates a 492 // subsequest renewal. In this case, update the existing 493 // element in the heap with the new renewal time. 494 if err := c.heap.Update(req, next); err != nil { 495 return fmt.Errorf("failed to update heap entry. err: %v", err) 496 } 497 498 // There is no need to signal an update to the renewal loop 499 // here because this case is hit from the renewal loop itself. 500 } else { 501 if fatal { 502 // If encountered with an error where in a lease or a 503 // token is not valid at all with vault, and if that 504 // item is not tracked by renewal loop, don't add it. 505 506 // Report the fatal error to the client 507 req.errCh <- renewalErr 508 close(req.errCh) 509 510 return renewalErr 511 } 512 513 // If the identifier is not already tracked, this is a first 514 // renewal request. In this case, add an entry into the heap 515 // with the next renewal time. 516 if err := c.heap.Push(req, next); err != nil { 517 return fmt.Errorf("failed to push an entry to heap. err: %v", err) 518 } 519 520 // Signal an update for the renewal loop to trigger a fresh 521 // computation for the next best candidate for renewal. 522 if c.running { 523 select { 524 case c.updateCh <- struct{}{}: 525 default: 526 } 527 } 528 } 529 530 return nil 531 } 532 533 // run is the renewal loop which performs the periodic renewals of both the 534 // tokens and the secret leases. 535 func (c *vaultClient) run() { 536 if !c.config.Enabled { 537 return 538 } 539 540 c.lock.Lock() 541 c.running = true 542 c.lock.Unlock() 543 544 var renewalCh <-chan time.Time 545 for c.config.Enabled && c.running { 546 // Fetches the candidate for next renewal 547 renewalReq, renewalTime := c.nextRenewal() 548 if renewalTime.IsZero() { 549 // If the heap is empty, don't do anything 550 renewalCh = nil 551 } else { 552 now := time.Now() 553 if renewalTime.After(now) { 554 // Compute the duration after which the item 555 // needs renewal and set the renewalCh to fire 556 // at that time. 557 renewalDuration := renewalTime.Sub(time.Now()) 558 renewalCh = time.After(renewalDuration) 559 } else { 560 // If the renewals of multiple items are too 561 // close to each other and by the time the 562 // entry is fetched from heap it might be past 563 // the current time (by a small margin). In 564 // which case, fire immediately. 565 renewalCh = time.After(0) 566 } 567 } 568 569 select { 570 case <-renewalCh: 571 if err := c.renew(renewalReq); err != nil { 572 c.logger.Printf("[ERR] client.vault: renewal of token failed: %v", err) 573 } 574 case <-c.updateCh: 575 continue 576 case <-c.stopCh: 577 c.logger.Printf("[DEBUG] client.vault: stopped") 578 return 579 } 580 } 581 } 582 583 // StopRenewToken removes the item from the heap which represents the given 584 // token. 585 func (c *vaultClient) StopRenewToken(token string) error { 586 return c.stopRenew(token) 587 } 588 589 // StopRenewLease removes the item from the heap which represents the given 590 // lease identifier. 591 func (c *vaultClient) StopRenewLease(leaseId string) error { 592 return c.stopRenew(leaseId) 593 } 594 595 // stopRenew removes the given identifier from the heap and signals the renewal 596 // loop to compute the next best candidate for renewal. 597 func (c *vaultClient) stopRenew(id string) error { 598 c.lock.Lock() 599 defer c.lock.Unlock() 600 601 if !c.isTracked(id) { 602 return nil 603 } 604 605 // Remove the identifier from the heap 606 if err := c.heap.Remove(id); err != nil { 607 return fmt.Errorf("failed to remove heap entry: %v", err) 608 } 609 610 // Delete the identifier from the map only after the it is removed from 611 // the heap. Heap's remove method relies on the heap map. 612 delete(c.heap.heapMap, id) 613 614 // Signal an update to the renewal loop. 615 if c.running { 616 select { 617 case c.updateCh <- struct{}{}: 618 default: 619 } 620 } 621 622 return nil 623 } 624 625 // nextRenewal returns the root element of the min-heap, which represents the 626 // next element to be renewed and the time at which the renewal needs to be 627 // triggered. 628 func (c *vaultClient) nextRenewal() (*vaultClientRenewalRequest, time.Time) { 629 c.lock.RLock() 630 defer c.lock.RUnlock() 631 632 if c.heap.Length() == 0 { 633 return nil, time.Time{} 634 } 635 636 // Fetches the root element in the min-heap 637 nextEntry := c.heap.Peek() 638 if nextEntry == nil { 639 return nil, time.Time{} 640 } 641 642 return nextEntry.req, nextEntry.next 643 } 644 645 // Additional helper functions on top of interface methods 646 647 // Length returns the number of elements in the heap 648 func (h *vaultClientHeap) Length() int { 649 return len(h.heap) 650 } 651 652 // Returns the root node of the min-heap 653 func (h *vaultClientHeap) Peek() *vaultClientHeapEntry { 654 if len(h.heap) == 0 { 655 return nil 656 } 657 658 return h.heap[0] 659 } 660 661 // Push adds the secondary index and inserts an item into the heap 662 func (h *vaultClientHeap) Push(req *vaultClientRenewalRequest, next time.Time) error { 663 if req == nil { 664 return fmt.Errorf("nil request") 665 } 666 667 if _, ok := h.heapMap[req.id]; ok { 668 return fmt.Errorf("entry %v already exists", req.id) 669 } 670 671 heapEntry := &vaultClientHeapEntry{ 672 req: req, 673 next: next, 674 } 675 h.heapMap[req.id] = heapEntry 676 heap.Push(&h.heap, heapEntry) 677 return nil 678 } 679 680 // Update will modify the existing item in the heap with the new data and the 681 // time, and fixes the heap. 682 func (h *vaultClientHeap) Update(req *vaultClientRenewalRequest, next time.Time) error { 683 if entry, ok := h.heapMap[req.id]; ok { 684 entry.req = req 685 entry.next = next 686 heap.Fix(&h.heap, entry.index) 687 return nil 688 } 689 690 return fmt.Errorf("heap doesn't contain %v", req.id) 691 } 692 693 // Remove will remove an identifier from the secondary index and deletes the 694 // corresponding node from the heap. 695 func (h *vaultClientHeap) Remove(id string) error { 696 if entry, ok := h.heapMap[id]; ok { 697 heap.Remove(&h.heap, entry.index) 698 delete(h.heapMap, id) 699 return nil 700 } 701 702 return fmt.Errorf("heap doesn't contain entry for %v", id) 703 } 704 705 // The heap interface requires the following methods to be implemented. 706 // * Push(x interface{}) // add x as element Len() 707 // * Pop() interface{} // remove and return element Len() - 1. 708 // * sort.Interface 709 // 710 // sort.Interface comprises of the following methods: 711 // * Len() int 712 // * Less(i, j int) bool 713 // * Swap(i, j int) 714 715 // Part of sort.Interface 716 func (h vaultDataHeapImp) Len() int { return len(h) } 717 718 // Part of sort.Interface 719 func (h vaultDataHeapImp) Less(i, j int) bool { 720 // Two zero times should return false. 721 // Otherwise, zero is "greater" than any other time. 722 // (To sort it at the end of the list.) 723 // Sort such that zero times are at the end of the list. 724 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 725 if iZero && jZero { 726 return false 727 } else if iZero { 728 return false 729 } else if jZero { 730 return true 731 } 732 733 return h[i].next.Before(h[j].next) 734 } 735 736 // Part of sort.Interface 737 func (h vaultDataHeapImp) Swap(i, j int) { 738 h[i], h[j] = h[j], h[i] 739 h[i].index = i 740 h[j].index = j 741 } 742 743 // Part of heap.Interface 744 func (h *vaultDataHeapImp) Push(x interface{}) { 745 n := len(*h) 746 entry := x.(*vaultClientHeapEntry) 747 entry.index = n 748 *h = append(*h, entry) 749 } 750 751 // Part of heap.Interface 752 func (h *vaultDataHeapImp) Pop() interface{} { 753 old := *h 754 n := len(old) 755 entry := old[n-1] 756 entry.index = -1 // for safety 757 *h = old[0 : n-1] 758 return entry 759 }