github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/client/vaultclient/vaultclient.go (about) 1 package vaultclient 2 3 import ( 4 "container/heap" 5 "fmt" 6 "log" 7 "math/rand" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/nomad/nomad/structs" 13 "github.com/hashicorp/nomad/nomad/structs/config" 14 vaultapi "github.com/hashicorp/vault/api" 15 ) 16 17 // TokenDeriverFunc takes in an allocation and a set of tasks and derives a 18 // wrapped token for all the tasks, from the nomad server. All the derived 19 // wrapped tokens will be unwrapped using the vault API client. 20 type TokenDeriverFunc func(*structs.Allocation, []string, *vaultapi.Client) (map[string]string, error) 21 22 // The interface which nomad client uses to interact with vault and 23 // periodically renews the tokens and secrets. 24 type VaultClient interface { 25 // Start initiates the renewal loop of tokens and secrets 26 Start() 27 28 // Stop terminates the renewal loop for tokens and secrets 29 Stop() 30 31 // DeriveToken contacts the nomad server and fetches wrapped tokens for 32 // a set of tasks. The wrapped tokens will be unwrapped using vault and 33 // returned. 34 DeriveToken(*structs.Allocation, []string) (map[string]string, error) 35 36 // GetConsulACL fetches the Consul ACL token required for the task 37 GetConsulACL(string, string) (*vaultapi.Secret, error) 38 39 // RenewToken renews a token with the given increment and adds it to 40 // the min-heap for periodic renewal. 41 RenewToken(string, int) (<-chan error, error) 42 43 // StopRenewToken removes the token from the min-heap, stopping its 44 // renewal. 45 StopRenewToken(string) error 46 47 // RenewLease renews a vault secret's lease and adds the lease 48 // identifier to the min-heap for periodic renewal. 49 RenewLease(string, int) (<-chan error, error) 50 51 // StopRenewLease removes a secret's lease ID from the min-heap, 52 // stopping its renewal. 53 StopRenewLease(string) error 54 } 55 56 // Implementation of VaultClient interface to interact with vault and perform 57 // token and lease renewals periodically. 58 type vaultClient struct { 59 // tokenDeriver is a function pointer passed in by the client to derive 60 // tokens by making RPC calls to the nomad server. The wrapped tokens 61 // returned by the nomad server will be unwrapped by this function 62 // using the vault API client. 63 tokenDeriver TokenDeriverFunc 64 65 // running indicates if the renewal loop is active or not 66 running bool 67 68 // tokenData is the data of the passed VaultClient token 69 token *tokenData 70 71 // client is the API client to interact with vault 72 client *vaultapi.Client 73 74 // updateCh is the channel to notify heap modifications to the renewal 75 // loop 76 updateCh chan struct{} 77 78 // stopCh is the channel to trigger termination of renewal loop 79 stopCh chan struct{} 80 81 // heap is the min-heap to keep track of both tokens and leases 82 heap *vaultClientHeap 83 84 // config is the configuration to connect to vault 85 config *config.VaultConfig 86 87 lock sync.RWMutex 88 logger *log.Logger 89 } 90 91 // tokenData holds the relevant information about the Vault token passed to the 92 // client. 93 type tokenData struct { 94 CreationTTL int `mapstructure:"creation_ttl"` 95 TTL int `mapstructure:"ttl"` 96 Renewable bool `mapstructure:"renewable"` 97 Policies []string `mapstructure:"policies"` 98 Role string `mapstructure:"role"` 99 Root bool 100 } 101 102 // vaultClientRenewalRequest is a request object for renewal of both tokens and 103 // secret's leases. 104 type vaultClientRenewalRequest struct { 105 // errCh is the channel into which any renewal error will be sent to 106 errCh chan error 107 108 // id is an identifier which represents either a token or a lease 109 id string 110 111 // increment is the duration for which the token or lease should be 112 // renewed for 113 increment int 114 115 // isToken indicates whether the 'id' field is a token or not 116 isToken bool 117 } 118 119 // Element representing an entry in the renewal heap 120 type vaultClientHeapEntry struct { 121 req *vaultClientRenewalRequest 122 next time.Time 123 index int 124 } 125 126 // Wrapper around the actual heap to provide additional symantics on top of 127 // functions provided by the heap interface. In order to achieve that, an 128 // additional map is placed beside the actual heap. This map can be used to 129 // check if an entry is already present in the heap. 130 type vaultClientHeap struct { 131 heapMap map[string]*vaultClientHeapEntry 132 heap vaultDataHeapImp 133 } 134 135 // Data type of the heap 136 type vaultDataHeapImp []*vaultClientHeapEntry 137 138 // NewVaultClient returns a new vault client from the given config. 139 func NewVaultClient(config *config.VaultConfig, logger *log.Logger, tokenDeriver TokenDeriverFunc) (*vaultClient, error) { 140 if config == nil { 141 return nil, fmt.Errorf("nil vault config") 142 } 143 144 if logger == nil { 145 return nil, fmt.Errorf("nil logger") 146 } 147 148 c := &vaultClient{ 149 config: config, 150 stopCh: make(chan struct{}), 151 // Update channel should be a buffered channel 152 updateCh: make(chan struct{}, 1), 153 heap: newVaultClientHeap(), 154 logger: logger, 155 tokenDeriver: tokenDeriver, 156 } 157 158 if !config.IsEnabled() { 159 return c, nil 160 } 161 162 // Get the Vault API configuration 163 apiConf, err := config.ApiConfig() 164 if err != nil { 165 logger.Printf("[ERR] client.vault: failed to create vault API config: %v", err) 166 return nil, err 167 } 168 169 // Create the Vault API client 170 client, err := vaultapi.NewClient(apiConf) 171 if err != nil { 172 logger.Printf("[ERR] client.vault: failed to create Vault client. Not retrying: %v", err) 173 return nil, err 174 } 175 176 c.client = client 177 178 return c, nil 179 } 180 181 // newVaultClientHeap returns a new vault client heap with both the heap and a 182 // map which is a secondary index for heap elements, both initialized. 183 func newVaultClientHeap() *vaultClientHeap { 184 return &vaultClientHeap{ 185 heapMap: make(map[string]*vaultClientHeapEntry), 186 heap: make(vaultDataHeapImp, 0), 187 } 188 } 189 190 // isTracked returns if a given identifier is already present in the heap and 191 // hence is being renewed. Lock should be held before calling this method. 192 func (c *vaultClient) isTracked(id string) bool { 193 if id == "" { 194 return false 195 } 196 197 _, ok := c.heap.heapMap[id] 198 return ok 199 } 200 201 // Starts the renewal loop of vault client 202 func (c *vaultClient) Start() { 203 if !c.config.IsEnabled() || c.running { 204 return 205 } 206 207 c.lock.Lock() 208 c.running = true 209 c.lock.Unlock() 210 211 go c.run() 212 } 213 214 // Stops the renewal loop of vault client 215 func (c *vaultClient) Stop() { 216 if !c.config.IsEnabled() || !c.running { 217 return 218 } 219 220 c.lock.Lock() 221 defer c.lock.Unlock() 222 223 c.running = false 224 close(c.stopCh) 225 } 226 227 // unlockAndUnset is used to unset the vault token on the client and release the 228 // lock. Helper method for deferring a call that does both. 229 func (c *vaultClient) unlockAndUnset() { 230 c.client.SetToken("") 231 c.lock.Unlock() 232 } 233 234 // DeriveToken takes in an allocation and a set of tasks and for each of the 235 // task, it derives a vault token from nomad server and unwraps it using vault. 236 // The return value is a map containing all the unwrapped tokens indexed by the 237 // task name. 238 func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 239 if !c.config.IsEnabled() { 240 return nil, fmt.Errorf("vault client not enabled") 241 } 242 if !c.running { 243 return nil, fmt.Errorf("vault client is not running") 244 } 245 246 c.lock.Lock() 247 defer c.unlockAndUnset() 248 249 // Use the token supplied to interact with vault 250 c.client.SetToken("") 251 252 tokens, err := c.tokenDeriver(alloc, taskNames, c.client) 253 if err != nil { 254 c.logger.Printf("[ERR] client.vault: failed to derive token for allocation %q and tasks %v: %v", alloc.ID, taskNames, err) 255 return nil, err 256 } 257 258 return tokens, nil 259 } 260 261 // GetConsulACL creates a vault API client and reads from vault a consul ACL 262 // token used by the task. 263 func (c *vaultClient) GetConsulACL(token, path string) (*vaultapi.Secret, error) { 264 if !c.config.IsEnabled() { 265 return nil, fmt.Errorf("vault client not enabled") 266 } 267 if token == "" { 268 return nil, fmt.Errorf("missing token") 269 } 270 if path == "" { 271 return nil, fmt.Errorf("missing consul ACL token vault path") 272 } 273 274 c.lock.Lock() 275 defer c.unlockAndUnset() 276 277 // Use the token supplied to interact with vault 278 c.client.SetToken(token) 279 280 // Read the consul ACL token and return the secret directly 281 return c.client.Logical().Read(path) 282 } 283 284 // RenewToken renews the supplied token for a given duration (in seconds) and 285 // adds it to the min-heap so that it is renewed periodically by the renewal 286 // loop. Any error returned during renewal will be written to a buffered 287 // channel and the channel is returned instead of an actual error. This helps 288 // the caller be notified of a renewal failure asynchronously for appropriate 289 // actions to be taken. The caller of this function need not have to close the 290 // error channel. 291 func (c *vaultClient) RenewToken(token string, increment int) (<-chan error, error) { 292 if token == "" { 293 err := fmt.Errorf("missing token") 294 return nil, err 295 } 296 if increment < 1 { 297 err := fmt.Errorf("increment cannot be less than 1") 298 return nil, err 299 } 300 301 // Create a buffered error channel 302 errCh := make(chan error, 1) 303 304 // Create a renewal request and indicate that the identifier in the 305 // request is a token and not a lease 306 renewalReq := &vaultClientRenewalRequest{ 307 errCh: errCh, 308 id: token, 309 isToken: true, 310 increment: increment, 311 } 312 313 // Perform the renewal of the token and send any error to the dedicated 314 // error channel. 315 if err := c.renew(renewalReq); err != nil { 316 c.logger.Printf("[ERR] client.vault: renewal of token failed: %v", err) 317 return nil, err 318 } 319 320 return errCh, nil 321 } 322 323 // RenewLease renews the supplied lease identifier for a supplied duration (in 324 // seconds) and adds it to the min-heap so that it gets renewed periodically by 325 // the renewal loop. Any error returned during renewal will be written to a 326 // buffered channel and the channel is returned instead of an actual error. 327 // This helps the caller be notified of a renewal failure asynchronously for 328 // appropriate actions to be taken. The caller of this function need not have 329 // to close the error channel. 330 func (c *vaultClient) RenewLease(leaseId string, increment int) (<-chan error, error) { 331 if leaseId == "" { 332 err := fmt.Errorf("missing lease ID") 333 return nil, err 334 } 335 336 if increment < 1 { 337 err := fmt.Errorf("increment cannot be less than 1") 338 return nil, err 339 } 340 341 // Create a buffered error channel 342 errCh := make(chan error, 1) 343 344 // Create a renewal request using the supplied lease and duration 345 renewalReq := &vaultClientRenewalRequest{ 346 errCh: errCh, 347 id: leaseId, 348 increment: increment, 349 } 350 351 // Renew the secret and send any error to the dedicated error channel 352 if err := c.renew(renewalReq); err != nil { 353 c.logger.Printf("[ERR] client.vault: renewal of lease failed: %v", err) 354 return nil, err 355 } 356 357 return errCh, nil 358 } 359 360 // renew is a common method to handle renewal of both tokens and secret leases. 361 // It invokes a token renewal or a secret's lease renewal. If renewal is 362 // successful, min-heap is updated based on the duration after which it needs 363 // renewal again. The next renewal time is randomly selected to avoid spikes in 364 // the number of APIs periodically. 365 func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { 366 c.lock.Lock() 367 defer c.lock.Unlock() 368 369 if req == nil { 370 return fmt.Errorf("nil renewal request") 371 } 372 if req.errCh == nil { 373 return fmt.Errorf("renewal request error channel nil") 374 } 375 376 if !c.config.IsEnabled() { 377 close(req.errCh) 378 return fmt.Errorf("vault client not enabled") 379 } 380 if !c.running { 381 close(req.errCh) 382 return fmt.Errorf("vault client is not running") 383 } 384 if req.id == "" { 385 close(req.errCh) 386 return fmt.Errorf("missing id in renewal request") 387 } 388 if req.increment < 1 { 389 close(req.errCh) 390 return fmt.Errorf("increment cannot be less than 1") 391 } 392 393 var renewalErr error 394 leaseDuration := req.increment 395 if req.isToken { 396 // Set the token in the API client to the one that needs 397 // renewal 398 c.client.SetToken(req.id) 399 400 // Renew the token 401 renewResp, err := c.client.Auth().Token().RenewSelf(req.increment) 402 if err != nil { 403 renewalErr = fmt.Errorf("failed to renew the vault token: %v", err) 404 } else if renewResp == nil || renewResp.Auth == nil { 405 renewalErr = fmt.Errorf("failed to renew the vault token") 406 } else { 407 // Don't set this if renewal fails 408 leaseDuration = renewResp.Auth.LeaseDuration 409 } 410 411 // Reset the token in the API client before returning 412 c.client.SetToken("") 413 } else { 414 // Renew the secret 415 renewResp, err := c.client.Sys().Renew(req.id, req.increment) 416 if err != nil { 417 renewalErr = fmt.Errorf("failed to renew vault secret: %v", err) 418 } else if renewResp == nil { 419 renewalErr = fmt.Errorf("failed to renew vault secret") 420 } else { 421 // Don't set this if renewal fails 422 leaseDuration = renewResp.LeaseDuration 423 } 424 } 425 426 duration := leaseDuration / 2 427 switch { 428 case leaseDuration < 30: 429 // Don't bother about introducing randomness if the 430 // leaseDuration is too small. 431 default: 432 // Give a breathing space of 20 seconds 433 min := 10 434 max := leaseDuration - min 435 rand.Seed(time.Now().Unix()) 436 duration = min + rand.Intn(max-min) 437 } 438 439 // Determine the next renewal time 440 next := time.Now().Add(time.Duration(duration) * time.Second) 441 442 fatal := false 443 if renewalErr != nil && 444 (strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") || 445 strings.Contains(renewalErr.Error(), "token not found") || 446 strings.Contains(renewalErr.Error(), "permission denied")) { 447 fatal = true 448 } else if renewalErr != nil { 449 c.logger.Printf("[DEBUG] client.vault: req.increment: %d, leaseDuration: %d, duration: %d", req.increment, leaseDuration, duration) 450 c.logger.Printf("[ERR] client.vault: renewal of lease or token failed due to a non-fatal error. Retrying at %v: %v", next.String(), renewalErr) 451 } 452 453 if c.isTracked(req.id) { 454 if fatal { 455 // If encountered with an error where in a lease or a 456 // token is not valid at all with vault, and if that 457 // item is tracked by the renewal loop, stop renewing 458 // it by removing the corresponding heap entry. 459 if err := c.heap.Remove(req.id); err != nil { 460 return fmt.Errorf("failed to remove heap entry: %v", err) 461 } 462 463 // Report the fatal error to the client 464 req.errCh <- renewalErr 465 close(req.errCh) 466 467 return renewalErr 468 } 469 470 // If the identifier is already tracked, this indicates a 471 // subsequest renewal. In this case, update the existing 472 // element in the heap with the new renewal time. 473 if err := c.heap.Update(req, next); err != nil { 474 return fmt.Errorf("failed to update heap entry. err: %v", err) 475 } 476 477 // There is no need to signal an update to the renewal loop 478 // here because this case is hit from the renewal loop itself. 479 } else { 480 if fatal { 481 // If encountered with an error where in a lease or a 482 // token is not valid at all with vault, and if that 483 // item is not tracked by renewal loop, don't add it. 484 485 // Report the fatal error to the client 486 req.errCh <- renewalErr 487 close(req.errCh) 488 489 return renewalErr 490 } 491 492 // If the identifier is not already tracked, this is a first 493 // renewal request. In this case, add an entry into the heap 494 // with the next renewal time. 495 if err := c.heap.Push(req, next); err != nil { 496 return fmt.Errorf("failed to push an entry to heap. err: %v", err) 497 } 498 499 // Signal an update for the renewal loop to trigger a fresh 500 // computation for the next best candidate for renewal. 501 if c.running { 502 select { 503 case c.updateCh <- struct{}{}: 504 default: 505 } 506 } 507 } 508 509 return nil 510 } 511 512 // run is the renewal loop which performs the periodic renewals of both the 513 // tokens and the secret leases. 514 func (c *vaultClient) run() { 515 if !c.config.IsEnabled() { 516 return 517 } 518 519 var renewalCh <-chan time.Time 520 for c.config.IsEnabled() && c.running { 521 // Fetches the candidate for next renewal 522 renewalReq, renewalTime := c.nextRenewal() 523 if renewalTime.IsZero() { 524 // If the heap is empty, don't do anything 525 renewalCh = nil 526 } else { 527 now := time.Now() 528 if renewalTime.After(now) { 529 // Compute the duration after which the item 530 // needs renewal and set the renewalCh to fire 531 // at that time. 532 renewalDuration := renewalTime.Sub(time.Now()) 533 renewalCh = time.After(renewalDuration) 534 } else { 535 // If the renewals of multiple items are too 536 // close to each other and by the time the 537 // entry is fetched from heap it might be past 538 // the current time (by a small margin). In 539 // which case, fire immediately. 540 renewalCh = time.After(0) 541 } 542 } 543 544 select { 545 case <-renewalCh: 546 if err := c.renew(renewalReq); err != nil { 547 c.logger.Printf("[ERR] client.vault: renewal of token failed: %v", err) 548 } 549 case <-c.updateCh: 550 continue 551 case <-c.stopCh: 552 c.logger.Printf("[DEBUG] client.vault: stopped") 553 return 554 } 555 } 556 } 557 558 // StopRenewToken removes the item from the heap which represents the given 559 // token. 560 func (c *vaultClient) StopRenewToken(token string) error { 561 return c.stopRenew(token) 562 } 563 564 // StopRenewLease removes the item from the heap which represents the given 565 // lease identifier. 566 func (c *vaultClient) StopRenewLease(leaseId string) error { 567 return c.stopRenew(leaseId) 568 } 569 570 // stopRenew removes the given identifier from the heap and signals the renewal 571 // loop to compute the next best candidate for renewal. 572 func (c *vaultClient) stopRenew(id string) error { 573 c.lock.Lock() 574 defer c.lock.Unlock() 575 576 if !c.isTracked(id) { 577 return nil 578 } 579 580 if err := c.heap.Remove(id); err != nil { 581 return fmt.Errorf("failed to remove heap entry: %v", err) 582 } 583 584 // Signal an update to the renewal loop. 585 if c.running { 586 select { 587 case c.updateCh <- struct{}{}: 588 default: 589 } 590 } 591 592 return nil 593 } 594 595 // nextRenewal returns the root element of the min-heap, which represents the 596 // next element to be renewed and the time at which the renewal needs to be 597 // triggered. 598 func (c *vaultClient) nextRenewal() (*vaultClientRenewalRequest, time.Time) { 599 c.lock.RLock() 600 defer c.lock.RUnlock() 601 602 if c.heap.Length() == 0 { 603 return nil, time.Time{} 604 } 605 606 // Fetches the root element in the min-heap 607 nextEntry := c.heap.Peek() 608 if nextEntry == nil { 609 return nil, time.Time{} 610 } 611 612 return nextEntry.req, nextEntry.next 613 } 614 615 // Additional helper functions on top of interface methods 616 617 // Length returns the number of elements in the heap 618 func (h *vaultClientHeap) Length() int { 619 return len(h.heap) 620 } 621 622 // Returns the root node of the min-heap 623 func (h *vaultClientHeap) Peek() *vaultClientHeapEntry { 624 if len(h.heap) == 0 { 625 return nil 626 } 627 628 return h.heap[0] 629 } 630 631 // Push adds the secondary index and inserts an item into the heap 632 func (h *vaultClientHeap) Push(req *vaultClientRenewalRequest, next time.Time) error { 633 if req == nil { 634 return fmt.Errorf("nil request") 635 } 636 637 if _, ok := h.heapMap[req.id]; ok { 638 return fmt.Errorf("entry %v already exists", req.id) 639 } 640 641 heapEntry := &vaultClientHeapEntry{ 642 req: req, 643 next: next, 644 } 645 h.heapMap[req.id] = heapEntry 646 heap.Push(&h.heap, heapEntry) 647 return nil 648 } 649 650 // Update will modify the existing item in the heap with the new data and the 651 // time, and fixes the heap. 652 func (h *vaultClientHeap) Update(req *vaultClientRenewalRequest, next time.Time) error { 653 if entry, ok := h.heapMap[req.id]; ok { 654 entry.req = req 655 entry.next = next 656 heap.Fix(&h.heap, entry.index) 657 return nil 658 } 659 660 return fmt.Errorf("heap doesn't contain %v", req.id) 661 } 662 663 // Remove will remove an identifier from the secondary index and deletes the 664 // corresponding node from the heap. 665 func (h *vaultClientHeap) Remove(id string) error { 666 if entry, ok := h.heapMap[id]; ok { 667 heap.Remove(&h.heap, entry.index) 668 delete(h.heapMap, id) 669 return nil 670 } 671 672 return fmt.Errorf("heap doesn't contain entry for %v", id) 673 } 674 675 // The heap interface requires the following methods to be implemented. 676 // * Push(x interface{}) // add x as element Len() 677 // * Pop() interface{} // remove and return element Len() - 1. 678 // * sort.Interface 679 // 680 // sort.Interface comprises of the following methods: 681 // * Len() int 682 // * Less(i, j int) bool 683 // * Swap(i, j int) 684 685 // Part of sort.Interface 686 func (h vaultDataHeapImp) Len() int { return len(h) } 687 688 // Part of sort.Interface 689 func (h vaultDataHeapImp) Less(i, j int) bool { 690 // Two zero times should return false. 691 // Otherwise, zero is "greater" than any other time. 692 // (To sort it at the end of the list.) 693 // Sort such that zero times are at the end of the list. 694 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 695 if iZero && jZero { 696 return false 697 } else if iZero { 698 return false 699 } else if jZero { 700 return true 701 } 702 703 return h[i].next.Before(h[j].next) 704 } 705 706 // Part of sort.Interface 707 func (h vaultDataHeapImp) Swap(i, j int) { 708 h[i], h[j] = h[j], h[i] 709 h[i].index = i 710 h[j].index = j 711 } 712 713 // Part of heap.Interface 714 func (h *vaultDataHeapImp) Push(x interface{}) { 715 n := len(*h) 716 entry := x.(*vaultClientHeapEntry) 717 entry.index = n 718 *h = append(*h, entry) 719 } 720 721 // Part of heap.Interface 722 func (h *vaultDataHeapImp) Pop() interface{} { 723 old := *h 724 n := len(old) 725 entry := old[n-1] 726 entry.index = -1 // for safety 727 *h = old[0 : n-1] 728 return entry 729 }