github.com/djenriquez/nomad-1@v0.8.1/client/vaultclient/vaultclient.go (about) 1 package vaultclient 2 3 import ( 4 "container/heap" 5 "fmt" 6 "log" 7 "math/rand" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/nomad/nomad/structs" 13 "github.com/hashicorp/nomad/nomad/structs/config" 14 vaultapi "github.com/hashicorp/vault/api" 15 ) 16 17 // TokenDeriverFunc takes in an allocation and a set of tasks and derives a 18 // wrapped token for all the tasks, from the nomad server. All the derived 19 // wrapped tokens will be unwrapped using the vault API client. 20 type TokenDeriverFunc func(*structs.Allocation, []string, *vaultapi.Client) (map[string]string, error) 21 22 // The interface which nomad client uses to interact with vault and 23 // periodically renews the tokens and secrets. 24 type VaultClient interface { 25 // Start initiates the renewal loop of tokens and secrets 26 Start() 27 28 // Stop terminates the renewal loop for tokens and secrets 29 Stop() 30 31 // DeriveToken contacts the nomad server and fetches wrapped tokens for 32 // a set of tasks. The wrapped tokens will be unwrapped using vault and 33 // returned. 34 DeriveToken(*structs.Allocation, []string) (map[string]string, error) 35 36 // GetConsulACL fetches the Consul ACL token required for the task 37 GetConsulACL(string, string) (*vaultapi.Secret, error) 38 39 // RenewToken renews a token with the given increment and adds it to 40 // the min-heap for periodic renewal. 41 RenewToken(string, int) (<-chan error, error) 42 43 // StopRenewToken removes the token from the min-heap, stopping its 44 // renewal. 45 StopRenewToken(string) error 46 47 // RenewLease renews a vault secret's lease and adds the lease 48 // identifier to the min-heap for periodic renewal. 49 RenewLease(string, int) (<-chan error, error) 50 51 // StopRenewLease removes a secret's lease ID from the min-heap, 52 // stopping its renewal. 53 StopRenewLease(string) error 54 } 55 56 // Implementation of VaultClient interface to interact with vault and perform 57 // token and lease renewals periodically. 58 type vaultClient struct { 59 // tokenDeriver is a function pointer passed in by the client to derive 60 // tokens by making RPC calls to the nomad server. The wrapped tokens 61 // returned by the nomad server will be unwrapped by this function 62 // using the vault API client. 63 tokenDeriver TokenDeriverFunc 64 65 // running indicates if the renewal loop is active or not 66 running bool 67 68 // client is the API client to interact with vault 69 client *vaultapi.Client 70 71 // updateCh is the channel to notify heap modifications to the renewal 72 // loop 73 updateCh chan struct{} 74 75 // stopCh is the channel to trigger termination of renewal loop 76 stopCh chan struct{} 77 78 // heap is the min-heap to keep track of both tokens and leases 79 heap *vaultClientHeap 80 81 // config is the configuration to connect to vault 82 config *config.VaultConfig 83 84 lock sync.RWMutex 85 logger *log.Logger 86 } 87 88 // vaultClientRenewalRequest is a request object for renewal of both tokens and 89 // secret's leases. 90 type vaultClientRenewalRequest struct { 91 // errCh is the channel into which any renewal error will be sent to 92 errCh chan error 93 94 // id is an identifier which represents either a token or a lease 95 id string 96 97 // increment is the duration for which the token or lease should be 98 // renewed for 99 increment int 100 101 // isToken indicates whether the 'id' field is a token or not 102 isToken bool 103 } 104 105 // Element representing an entry in the renewal heap 106 type vaultClientHeapEntry struct { 107 req *vaultClientRenewalRequest 108 next time.Time 109 index int 110 } 111 112 // Wrapper around the actual heap to provide additional semantics on top of 113 // functions provided by the heap interface. In order to achieve that, an 114 // additional map is placed beside the actual heap. This map can be used to 115 // check if an entry is already present in the heap. 116 type vaultClientHeap struct { 117 heapMap map[string]*vaultClientHeapEntry 118 heap vaultDataHeapImp 119 } 120 121 // Data type of the heap 122 type vaultDataHeapImp []*vaultClientHeapEntry 123 124 // NewVaultClient returns a new vault client from the given config. 125 func NewVaultClient(config *config.VaultConfig, logger *log.Logger, tokenDeriver TokenDeriverFunc) (*vaultClient, error) { 126 if config == nil { 127 return nil, fmt.Errorf("nil vault config") 128 } 129 130 if logger == nil { 131 return nil, fmt.Errorf("nil logger") 132 } 133 134 c := &vaultClient{ 135 config: config, 136 stopCh: make(chan struct{}), 137 // Update channel should be a buffered channel 138 updateCh: make(chan struct{}, 1), 139 heap: newVaultClientHeap(), 140 logger: logger, 141 tokenDeriver: tokenDeriver, 142 } 143 144 if !config.IsEnabled() { 145 return c, nil 146 } 147 148 // Get the Vault API configuration 149 apiConf, err := config.ApiConfig() 150 if err != nil { 151 logger.Printf("[ERR] client.vault: failed to create vault API config: %v", err) 152 return nil, err 153 } 154 155 // Create the Vault API client 156 client, err := vaultapi.NewClient(apiConf) 157 if err != nil { 158 logger.Printf("[ERR] client.vault: failed to create Vault client. Not retrying: %v", err) 159 return nil, err 160 } 161 162 c.client = client 163 164 return c, nil 165 } 166 167 // newVaultClientHeap returns a new vault client heap with both the heap and a 168 // map which is a secondary index for heap elements, both initialized. 169 func newVaultClientHeap() *vaultClientHeap { 170 return &vaultClientHeap{ 171 heapMap: make(map[string]*vaultClientHeapEntry), 172 heap: make(vaultDataHeapImp, 0), 173 } 174 } 175 176 // isTracked returns if a given identifier is already present in the heap and 177 // hence is being renewed. Lock should be held before calling this method. 178 func (c *vaultClient) isTracked(id string) bool { 179 if id == "" { 180 return false 181 } 182 183 _, ok := c.heap.heapMap[id] 184 return ok 185 } 186 187 // Starts the renewal loop of vault client 188 func (c *vaultClient) Start() { 189 if !c.config.IsEnabled() || c.running { 190 return 191 } 192 193 c.lock.Lock() 194 c.running = true 195 c.lock.Unlock() 196 197 go c.run() 198 } 199 200 // Stops the renewal loop of vault client 201 func (c *vaultClient) Stop() { 202 if !c.config.IsEnabled() || !c.running { 203 return 204 } 205 206 c.lock.Lock() 207 defer c.lock.Unlock() 208 209 c.running = false 210 close(c.stopCh) 211 } 212 213 // unlockAndUnset is used to unset the vault token on the client and release the 214 // lock. Helper method for deferring a call that does both. 215 func (c *vaultClient) unlockAndUnset() { 216 c.client.SetToken("") 217 c.lock.Unlock() 218 } 219 220 // DeriveToken takes in an allocation and a set of tasks and for each of the 221 // task, it derives a vault token from nomad server and unwraps it using vault. 222 // The return value is a map containing all the unwrapped tokens indexed by the 223 // task name. 224 func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 225 if !c.config.IsEnabled() { 226 return nil, fmt.Errorf("vault client not enabled") 227 } 228 if !c.running { 229 return nil, fmt.Errorf("vault client is not running") 230 } 231 232 c.lock.Lock() 233 defer c.unlockAndUnset() 234 235 // Use the token supplied to interact with vault 236 c.client.SetToken("") 237 238 tokens, err := c.tokenDeriver(alloc, taskNames, c.client) 239 if err != nil { 240 c.logger.Printf("[ERR] client.vault: failed to derive token for allocation %q and tasks %v: %v", alloc.ID, taskNames, err) 241 return nil, err 242 } 243 244 return tokens, nil 245 } 246 247 // GetConsulACL creates a vault API client and reads from vault a consul ACL 248 // token used by the task. 249 func (c *vaultClient) GetConsulACL(token, path string) (*vaultapi.Secret, error) { 250 if !c.config.IsEnabled() { 251 return nil, fmt.Errorf("vault client not enabled") 252 } 253 if token == "" { 254 return nil, fmt.Errorf("missing token") 255 } 256 if path == "" { 257 return nil, fmt.Errorf("missing consul ACL token vault path") 258 } 259 260 c.lock.Lock() 261 defer c.unlockAndUnset() 262 263 // Use the token supplied to interact with vault 264 c.client.SetToken(token) 265 266 // Read the consul ACL token and return the secret directly 267 return c.client.Logical().Read(path) 268 } 269 270 // RenewToken renews the supplied token for a given duration (in seconds) and 271 // adds it to the min-heap so that it is renewed periodically by the renewal 272 // loop. Any error returned during renewal will be written to a buffered 273 // channel and the channel is returned instead of an actual error. This helps 274 // the caller be notified of a renewal failure asynchronously for appropriate 275 // actions to be taken. The caller of this function need not have to close the 276 // error channel. 277 func (c *vaultClient) RenewToken(token string, increment int) (<-chan error, error) { 278 if token == "" { 279 err := fmt.Errorf("missing token") 280 return nil, err 281 } 282 if increment < 1 { 283 err := fmt.Errorf("increment cannot be less than 1") 284 return nil, err 285 } 286 287 // Create a buffered error channel 288 errCh := make(chan error, 1) 289 290 // Create a renewal request and indicate that the identifier in the 291 // request is a token and not a lease 292 renewalReq := &vaultClientRenewalRequest{ 293 errCh: errCh, 294 id: token, 295 isToken: true, 296 increment: increment, 297 } 298 299 // Perform the renewal of the token and send any error to the dedicated 300 // error channel. 301 if err := c.renew(renewalReq); err != nil { 302 c.logger.Printf("[ERR] client.vault: renewal of token failed: %v", err) 303 return nil, err 304 } 305 306 return errCh, nil 307 } 308 309 // RenewLease renews the supplied lease identifier for a supplied duration (in 310 // seconds) and adds it to the min-heap so that it gets renewed periodically by 311 // the renewal loop. Any error returned during renewal will be written to a 312 // buffered channel and the channel is returned instead of an actual error. 313 // This helps the caller be notified of a renewal failure asynchronously for 314 // appropriate actions to be taken. The caller of this function need not have 315 // to close the error channel. 316 func (c *vaultClient) RenewLease(leaseId string, increment int) (<-chan error, error) { 317 if leaseId == "" { 318 err := fmt.Errorf("missing lease ID") 319 return nil, err 320 } 321 322 if increment < 1 { 323 err := fmt.Errorf("increment cannot be less than 1") 324 return nil, err 325 } 326 327 // Create a buffered error channel 328 errCh := make(chan error, 1) 329 330 // Create a renewal request using the supplied lease and duration 331 renewalReq := &vaultClientRenewalRequest{ 332 errCh: errCh, 333 id: leaseId, 334 increment: increment, 335 } 336 337 // Renew the secret and send any error to the dedicated error channel 338 if err := c.renew(renewalReq); err != nil { 339 c.logger.Printf("[ERR] client.vault: renewal of lease failed: %v", err) 340 return nil, err 341 } 342 343 return errCh, nil 344 } 345 346 // renew is a common method to handle renewal of both tokens and secret leases. 347 // It invokes a token renewal or a secret's lease renewal. If renewal is 348 // successful, min-heap is updated based on the duration after which it needs 349 // renewal again. The next renewal time is randomly selected to avoid spikes in 350 // the number of APIs periodically. 351 func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { 352 c.lock.Lock() 353 defer c.lock.Unlock() 354 355 if req == nil { 356 return fmt.Errorf("nil renewal request") 357 } 358 if req.errCh == nil { 359 return fmt.Errorf("renewal request error channel nil") 360 } 361 362 if !c.config.IsEnabled() { 363 close(req.errCh) 364 return fmt.Errorf("vault client not enabled") 365 } 366 if !c.running { 367 close(req.errCh) 368 return fmt.Errorf("vault client is not running") 369 } 370 if req.id == "" { 371 close(req.errCh) 372 return fmt.Errorf("missing id in renewal request") 373 } 374 if req.increment < 1 { 375 close(req.errCh) 376 return fmt.Errorf("increment cannot be less than 1") 377 } 378 379 var renewalErr error 380 leaseDuration := req.increment 381 if req.isToken { 382 // Set the token in the API client to the one that needs 383 // renewal 384 c.client.SetToken(req.id) 385 386 // Renew the token 387 renewResp, err := c.client.Auth().Token().RenewSelf(req.increment) 388 if err != nil { 389 renewalErr = fmt.Errorf("failed to renew the vault token: %v", err) 390 } else if renewResp == nil || renewResp.Auth == nil { 391 renewalErr = fmt.Errorf("failed to renew the vault token") 392 } else { 393 // Don't set this if renewal fails 394 leaseDuration = renewResp.Auth.LeaseDuration 395 } 396 397 // Reset the token in the API client before returning 398 c.client.SetToken("") 399 } else { 400 // Renew the secret 401 renewResp, err := c.client.Sys().Renew(req.id, req.increment) 402 if err != nil { 403 renewalErr = fmt.Errorf("failed to renew vault secret: %v", err) 404 } else if renewResp == nil { 405 renewalErr = fmt.Errorf("failed to renew vault secret") 406 } else { 407 // Don't set this if renewal fails 408 leaseDuration = renewResp.LeaseDuration 409 } 410 } 411 412 duration := leaseDuration / 2 413 switch { 414 case leaseDuration < 30: 415 // Don't bother about introducing randomness if the 416 // leaseDuration is too small. 417 default: 418 // Give a breathing space of 20 seconds 419 min := 10 420 max := leaseDuration - min 421 rand.Seed(time.Now().Unix()) 422 duration = min + rand.Intn(max-min) 423 } 424 425 // Determine the next renewal time 426 next := time.Now().Add(time.Duration(duration) * time.Second) 427 428 fatal := false 429 if renewalErr != nil && 430 (strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") || 431 strings.Contains(renewalErr.Error(), "lease is not renewable") || 432 strings.Contains(renewalErr.Error(), "token not found") || 433 strings.Contains(renewalErr.Error(), "permission denied")) { 434 fatal = true 435 } else if renewalErr != nil { 436 c.logger.Printf("[DEBUG] client.vault: req.increment: %d, leaseDuration: %d, duration: %d", req.increment, leaseDuration, duration) 437 c.logger.Printf("[ERR] client.vault: renewal of lease or token failed due to a non-fatal error. Retrying at %v: %v", next.String(), renewalErr) 438 } 439 440 if c.isTracked(req.id) { 441 if fatal { 442 // If encountered with an error where in a lease or a 443 // token is not valid at all with vault, and if that 444 // item is tracked by the renewal loop, stop renewing 445 // it by removing the corresponding heap entry. 446 if err := c.heap.Remove(req.id); err != nil { 447 return fmt.Errorf("failed to remove heap entry: %v", err) 448 } 449 450 // Report the fatal error to the client 451 req.errCh <- renewalErr 452 close(req.errCh) 453 454 return renewalErr 455 } 456 457 // If the identifier is already tracked, this indicates a 458 // subsequest renewal. In this case, update the existing 459 // element in the heap with the new renewal time. 460 if err := c.heap.Update(req, next); err != nil { 461 return fmt.Errorf("failed to update heap entry. err: %v", err) 462 } 463 464 // There is no need to signal an update to the renewal loop 465 // here because this case is hit from the renewal loop itself. 466 } else { 467 if fatal { 468 // If encountered with an error where in a lease or a 469 // token is not valid at all with vault, and if that 470 // item is not tracked by renewal loop, don't add it. 471 472 // Report the fatal error to the client 473 req.errCh <- renewalErr 474 close(req.errCh) 475 476 return renewalErr 477 } 478 479 // If the identifier is not already tracked, this is a first 480 // renewal request. In this case, add an entry into the heap 481 // with the next renewal time. 482 if err := c.heap.Push(req, next); err != nil { 483 return fmt.Errorf("failed to push an entry to heap. err: %v", err) 484 } 485 486 // Signal an update for the renewal loop to trigger a fresh 487 // computation for the next best candidate for renewal. 488 if c.running { 489 select { 490 case c.updateCh <- struct{}{}: 491 default: 492 } 493 } 494 } 495 496 return nil 497 } 498 499 // run is the renewal loop which performs the periodic renewals of both the 500 // tokens and the secret leases. 501 func (c *vaultClient) run() { 502 if !c.config.IsEnabled() { 503 return 504 } 505 506 var renewalCh <-chan time.Time 507 for c.config.IsEnabled() && c.running { 508 // Fetches the candidate for next renewal 509 renewalReq, renewalTime := c.nextRenewal() 510 if renewalTime.IsZero() { 511 // If the heap is empty, don't do anything 512 renewalCh = nil 513 } else { 514 now := time.Now() 515 if renewalTime.After(now) { 516 // Compute the duration after which the item 517 // needs renewal and set the renewalCh to fire 518 // at that time. 519 renewalDuration := renewalTime.Sub(time.Now()) 520 renewalCh = time.After(renewalDuration) 521 } else { 522 // If the renewals of multiple items are too 523 // close to each other and by the time the 524 // entry is fetched from heap it might be past 525 // the current time (by a small margin). In 526 // which case, fire immediately. 527 renewalCh = time.After(0) 528 } 529 } 530 531 select { 532 case <-renewalCh: 533 if err := c.renew(renewalReq); err != nil { 534 c.logger.Printf("[ERR] client.vault: renewal of token failed: %v", err) 535 } 536 case <-c.updateCh: 537 continue 538 case <-c.stopCh: 539 c.logger.Printf("[DEBUG] client.vault: stopped") 540 return 541 } 542 } 543 } 544 545 // StopRenewToken removes the item from the heap which represents the given 546 // token. 547 func (c *vaultClient) StopRenewToken(token string) error { 548 return c.stopRenew(token) 549 } 550 551 // StopRenewLease removes the item from the heap which represents the given 552 // lease identifier. 553 func (c *vaultClient) StopRenewLease(leaseId string) error { 554 return c.stopRenew(leaseId) 555 } 556 557 // stopRenew removes the given identifier from the heap and signals the renewal 558 // loop to compute the next best candidate for renewal. 559 func (c *vaultClient) stopRenew(id string) error { 560 c.lock.Lock() 561 defer c.lock.Unlock() 562 563 if !c.isTracked(id) { 564 return nil 565 } 566 567 if err := c.heap.Remove(id); err != nil { 568 return fmt.Errorf("failed to remove heap entry: %v", err) 569 } 570 571 // Signal an update to the renewal loop. 572 if c.running { 573 select { 574 case c.updateCh <- struct{}{}: 575 default: 576 } 577 } 578 579 return nil 580 } 581 582 // nextRenewal returns the root element of the min-heap, which represents the 583 // next element to be renewed and the time at which the renewal needs to be 584 // triggered. 585 func (c *vaultClient) nextRenewal() (*vaultClientRenewalRequest, time.Time) { 586 c.lock.RLock() 587 defer c.lock.RUnlock() 588 589 if c.heap.Length() == 0 { 590 return nil, time.Time{} 591 } 592 593 // Fetches the root element in the min-heap 594 nextEntry := c.heap.Peek() 595 if nextEntry == nil { 596 return nil, time.Time{} 597 } 598 599 return nextEntry.req, nextEntry.next 600 } 601 602 // Additional helper functions on top of interface methods 603 604 // Length returns the number of elements in the heap 605 func (h *vaultClientHeap) Length() int { 606 return len(h.heap) 607 } 608 609 // Returns the root node of the min-heap 610 func (h *vaultClientHeap) Peek() *vaultClientHeapEntry { 611 if len(h.heap) == 0 { 612 return nil 613 } 614 615 return h.heap[0] 616 } 617 618 // Push adds the secondary index and inserts an item into the heap 619 func (h *vaultClientHeap) Push(req *vaultClientRenewalRequest, next time.Time) error { 620 if req == nil { 621 return fmt.Errorf("nil request") 622 } 623 624 if _, ok := h.heapMap[req.id]; ok { 625 return fmt.Errorf("entry %v already exists", req.id) 626 } 627 628 heapEntry := &vaultClientHeapEntry{ 629 req: req, 630 next: next, 631 } 632 h.heapMap[req.id] = heapEntry 633 heap.Push(&h.heap, heapEntry) 634 return nil 635 } 636 637 // Update will modify the existing item in the heap with the new data and the 638 // time, and fixes the heap. 639 func (h *vaultClientHeap) Update(req *vaultClientRenewalRequest, next time.Time) error { 640 if entry, ok := h.heapMap[req.id]; ok { 641 entry.req = req 642 entry.next = next 643 heap.Fix(&h.heap, entry.index) 644 return nil 645 } 646 647 return fmt.Errorf("heap doesn't contain %v", req.id) 648 } 649 650 // Remove will remove an identifier from the secondary index and deletes the 651 // corresponding node from the heap. 652 func (h *vaultClientHeap) Remove(id string) error { 653 if entry, ok := h.heapMap[id]; ok { 654 heap.Remove(&h.heap, entry.index) 655 delete(h.heapMap, id) 656 return nil 657 } 658 659 return fmt.Errorf("heap doesn't contain entry for %v", id) 660 } 661 662 // The heap interface requires the following methods to be implemented. 663 // * Push(x interface{}) // add x as element Len() 664 // * Pop() interface{} // remove and return element Len() - 1. 665 // * sort.Interface 666 // 667 // sort.Interface comprises of the following methods: 668 // * Len() int 669 // * Less(i, j int) bool 670 // * Swap(i, j int) 671 672 // Part of sort.Interface 673 func (h vaultDataHeapImp) Len() int { return len(h) } 674 675 // Part of sort.Interface 676 func (h vaultDataHeapImp) Less(i, j int) bool { 677 // Two zero times should return false. 678 // Otherwise, zero is "greater" than any other time. 679 // (To sort it at the end of the list.) 680 // Sort such that zero times are at the end of the list. 681 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 682 if iZero && jZero { 683 return false 684 } else if iZero { 685 return false 686 } else if jZero { 687 return true 688 } 689 690 return h[i].next.Before(h[j].next) 691 } 692 693 // Part of sort.Interface 694 func (h vaultDataHeapImp) Swap(i, j int) { 695 h[i], h[j] = h[j], h[i] 696 h[i].index = i 697 h[j].index = j 698 } 699 700 // Part of heap.Interface 701 func (h *vaultDataHeapImp) Push(x interface{}) { 702 n := len(*h) 703 entry := x.(*vaultClientHeapEntry) 704 entry.index = n 705 *h = append(*h, entry) 706 } 707 708 // Part of heap.Interface 709 func (h *vaultDataHeapImp) Pop() interface{} { 710 old := *h 711 n := len(old) 712 entry := old[n-1] 713 entry.index = -1 // for safety 714 *h = old[0 : n-1] 715 return entry 716 }