github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/client/vaultclient/vaultclient.go (about) 1 package vaultclient 2 3 import ( 4 "container/heap" 5 "fmt" 6 "log" 7 "math/rand" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/nomad/nomad/structs" 13 "github.com/hashicorp/nomad/nomad/structs/config" 14 vaultapi "github.com/hashicorp/vault/api" 15 ) 16 17 // TokenDeriverFunc takes in an allocation and a set of tasks and derives a 18 // wrapped token for all the tasks, from the nomad server. All the derived 19 // wrapped tokens will be unwrapped using the vault API client. 20 type TokenDeriverFunc func(*structs.Allocation, []string, *vaultapi.Client) (map[string]string, error) 21 22 // The interface which nomad client uses to interact with vault and 23 // periodically renews the tokens and secrets. 24 type VaultClient interface { 25 // Start initiates the renewal loop of tokens and secrets 26 Start() 27 28 // Stop terminates the renewal loop for tokens and secrets 29 Stop() 30 31 // DeriveToken contacts the nomad server and fetches wrapped tokens for 32 // a set of tasks. The wrapped tokens will be unwrapped using vault and 33 // returned. 34 DeriveToken(*structs.Allocation, []string) (map[string]string, error) 35 36 // GetConsulACL fetches the Consul ACL token required for the task 37 GetConsulACL(string, string) (*vaultapi.Secret, error) 38 39 // RenewToken renews a token with the given increment and adds it to 40 // the min-heap for periodic renewal. 41 RenewToken(string, int) (<-chan error, error) 42 43 // StopRenewToken removes the token from the min-heap, stopping its 44 // renewal. 45 StopRenewToken(string) error 46 47 // RenewLease renews a vault secret's lease and adds the lease 48 // identifier to the min-heap for periodic renewal. 49 RenewLease(string, int) (<-chan error, error) 50 51 // StopRenewLease removes a secret's lease ID from the min-heap, 52 // stopping its renewal. 53 StopRenewLease(string) error 54 } 55 56 // Implementation of VaultClient interface to interact with vault and perform 57 // token and lease renewals periodically. 58 type vaultClient struct { 59 // tokenDeriver is a function pointer passed in by the client to derive 60 // tokens by making RPC calls to the nomad server. The wrapped tokens 61 // returned by the nomad server will be unwrapped by this function 62 // using the vault API client. 63 tokenDeriver TokenDeriverFunc 64 65 // running indicates if the renewal loop is active or not 66 running bool 67 68 // tokenData is the data of the passed VaultClient token 69 token *tokenData 70 71 // client is the API client to interact with vault 72 client *vaultapi.Client 73 74 // updateCh is the channel to notify heap modifications to the renewal 75 // loop 76 updateCh chan struct{} 77 78 // stopCh is the channel to trigger termination of renewal loop 79 stopCh chan struct{} 80 81 // heap is the min-heap to keep track of both tokens and leases 82 heap *vaultClientHeap 83 84 // config is the configuration to connect to vault 85 config *config.VaultConfig 86 87 lock sync.RWMutex 88 logger *log.Logger 89 } 90 91 // tokenData holds the relevant information about the Vault token passed to the 92 // client. 93 type tokenData struct { 94 CreationTTL int `mapstructure:"creation_ttl"` 95 TTL int `mapstructure:"ttl"` 96 Renewable bool `mapstructure:"renewable"` 97 Policies []string `mapstructure:"policies"` 98 Role string `mapstructure:"role"` 99 Root bool 100 } 101 102 // vaultClientRenewalRequest is a request object for renewal of both tokens and 103 // secret's leases. 104 type vaultClientRenewalRequest struct { 105 // errCh is the channel into which any renewal error will be sent to 106 errCh chan error 107 108 // id is an identifier which represents either a token or a lease 109 id string 110 111 // increment is the duration for which the token or lease should be 112 // renewed for 113 increment int 114 115 // isToken indicates whether the 'id' field is a token or not 116 isToken bool 117 } 118 119 // Element representing an entry in the renewal heap 120 type vaultClientHeapEntry struct { 121 req *vaultClientRenewalRequest 122 next time.Time 123 index int 124 } 125 126 // Wrapper around the actual heap to provide additional symantics on top of 127 // functions provided by the heap interface. In order to achieve that, an 128 // additional map is placed beside the actual heap. This map can be used to 129 // check if an entry is already present in the heap. 130 type vaultClientHeap struct { 131 heapMap map[string]*vaultClientHeapEntry 132 heap vaultDataHeapImp 133 } 134 135 // Data type of the heap 136 type vaultDataHeapImp []*vaultClientHeapEntry 137 138 // NewVaultClient returns a new vault client from the given config. 139 func NewVaultClient(config *config.VaultConfig, logger *log.Logger, tokenDeriver TokenDeriverFunc) (*vaultClient, error) { 140 if config == nil { 141 return nil, fmt.Errorf("nil vault config") 142 } 143 144 if logger == nil { 145 return nil, fmt.Errorf("nil logger") 146 } 147 148 c := &vaultClient{ 149 config: config, 150 stopCh: make(chan struct{}), 151 // Update channel should be a buffered channel 152 updateCh: make(chan struct{}, 1), 153 heap: newVaultClientHeap(), 154 logger: logger, 155 tokenDeriver: tokenDeriver, 156 } 157 158 if !config.IsEnabled() { 159 return c, nil 160 } 161 162 // Get the Vault API configuration 163 apiConf, err := config.ApiConfig() 164 if err != nil { 165 logger.Printf("[ERR] client.vault: failed to create vault API config: %v", err) 166 return nil, err 167 } 168 169 // Create the Vault API client 170 client, err := vaultapi.NewClient(apiConf) 171 if err != nil { 172 logger.Printf("[ERR] client.vault: failed to create Vault client. Not retrying: %v", err) 173 return nil, err 174 } 175 176 c.client = client 177 178 return c, nil 179 } 180 181 // newVaultClientHeap returns a new vault client heap with both the heap and a 182 // map which is a secondary index for heap elements, both initialized. 183 func newVaultClientHeap() *vaultClientHeap { 184 return &vaultClientHeap{ 185 heapMap: make(map[string]*vaultClientHeapEntry), 186 heap: make(vaultDataHeapImp, 0), 187 } 188 } 189 190 // isTracked returns if a given identifier is already present in the heap and 191 // hence is being renewed. Lock should be held before calling this method. 192 func (c *vaultClient) isTracked(id string) bool { 193 if id == "" { 194 return false 195 } 196 197 _, ok := c.heap.heapMap[id] 198 return ok 199 } 200 201 // Starts the renewal loop of vault client 202 func (c *vaultClient) Start() { 203 if !c.config.IsEnabled() || c.running { 204 return 205 } 206 207 c.lock.Lock() 208 c.running = true 209 c.lock.Unlock() 210 211 go c.run() 212 } 213 214 // Stops the renewal loop of vault client 215 func (c *vaultClient) Stop() { 216 if !c.config.IsEnabled() || !c.running { 217 return 218 } 219 220 c.lock.Lock() 221 defer c.lock.Unlock() 222 223 c.running = false 224 close(c.stopCh) 225 } 226 227 // DeriveToken takes in an allocation and a set of tasks and for each of the 228 // task, it derives a vault token from nomad server and unwraps it using vault. 229 // The return value is a map containing all the unwrapped tokens indexed by the 230 // task name. 231 func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 232 if !c.config.IsEnabled() { 233 return nil, fmt.Errorf("vault client not enabled") 234 } 235 if !c.running { 236 return nil, fmt.Errorf("vault client is not running") 237 } 238 239 return c.tokenDeriver(alloc, taskNames, c.client) 240 } 241 242 // GetConsulACL creates a vault API client and reads from vault a consul ACL 243 // token used by the task. 244 func (c *vaultClient) GetConsulACL(token, path string) (*vaultapi.Secret, error) { 245 if !c.config.IsEnabled() { 246 return nil, fmt.Errorf("vault client not enabled") 247 } 248 if token == "" { 249 return nil, fmt.Errorf("missing token") 250 } 251 if path == "" { 252 return nil, fmt.Errorf("missing consul ACL token vault path") 253 } 254 255 c.lock.Lock() 256 defer c.lock.Unlock() 257 258 // Use the token supplied to interact with vault 259 c.client.SetToken(token) 260 261 // Reset the token before returning 262 defer c.client.SetToken("") 263 264 // Read the consul ACL token and return the secret directly 265 return c.client.Logical().Read(path) 266 } 267 268 // RenewToken renews the supplied token for a given duration (in seconds) and 269 // adds it to the min-heap so that it is renewed periodically by the renewal 270 // loop. Any error returned during renewal will be written to a buffered 271 // channel and the channel is returned instead of an actual error. This helps 272 // the caller be notified of a renewal failure asynchronously for appropriate 273 // actions to be taken. The caller of this function need not have to close the 274 // error channel. 275 func (c *vaultClient) RenewToken(token string, increment int) (<-chan error, error) { 276 if token == "" { 277 err := fmt.Errorf("missing token") 278 return nil, err 279 } 280 if increment < 1 { 281 err := fmt.Errorf("increment cannot be less than 1") 282 return nil, err 283 } 284 285 // Create a buffered error channel 286 errCh := make(chan error, 1) 287 288 // Create a renewal request and indicate that the identifier in the 289 // request is a token and not a lease 290 renewalReq := &vaultClientRenewalRequest{ 291 errCh: errCh, 292 id: token, 293 isToken: true, 294 increment: increment, 295 } 296 297 // Perform the renewal of the token and send any error to the dedicated 298 // error channel. 299 if err := c.renew(renewalReq); err != nil { 300 c.logger.Printf("[ERR] client.vault: renewal of token failed: %v", err) 301 return nil, err 302 } 303 304 return errCh, nil 305 } 306 307 // RenewLease renews the supplied lease identifier for a supplied duration (in 308 // seconds) and adds it to the min-heap so that it gets renewed periodically by 309 // the renewal loop. Any error returned during renewal will be written to a 310 // buffered channel and the channel is returned instead of an actual error. 311 // This helps the caller be notified of a renewal failure asynchronously for 312 // appropriate actions to be taken. The caller of this function need not have 313 // to close the error channel. 314 func (c *vaultClient) RenewLease(leaseId string, increment int) (<-chan error, error) { 315 if leaseId == "" { 316 err := fmt.Errorf("missing lease ID") 317 return nil, err 318 } 319 320 if increment < 1 { 321 err := fmt.Errorf("increment cannot be less than 1") 322 return nil, err 323 } 324 325 // Create a buffered error channel 326 errCh := make(chan error, 1) 327 328 // Create a renewal request using the supplied lease and duration 329 renewalReq := &vaultClientRenewalRequest{ 330 errCh: errCh, 331 id: leaseId, 332 increment: increment, 333 } 334 335 // Renew the secret and send any error to the dedicated error channel 336 if err := c.renew(renewalReq); err != nil { 337 c.logger.Printf("[ERR] client.vault: renewal of lease failed: %v", err) 338 return nil, err 339 } 340 341 return errCh, nil 342 } 343 344 // renew is a common method to handle renewal of both tokens and secret leases. 345 // It invokes a token renewal or a secret's lease renewal. If renewal is 346 // successful, min-heap is updated based on the duration after which it needs 347 // renewal again. The next renewal time is randomly selected to avoid spikes in 348 // the number of APIs periodically. 349 func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { 350 c.lock.Lock() 351 defer c.lock.Unlock() 352 353 if req == nil { 354 return fmt.Errorf("nil renewal request") 355 } 356 if req.errCh == nil { 357 return fmt.Errorf("renewal request error channel nil") 358 } 359 360 if !c.config.IsEnabled() { 361 close(req.errCh) 362 return fmt.Errorf("vault client not enabled") 363 } 364 if !c.running { 365 close(req.errCh) 366 return fmt.Errorf("vault client is not running") 367 } 368 if req.id == "" { 369 close(req.errCh) 370 return fmt.Errorf("missing id in renewal request") 371 } 372 if req.increment < 1 { 373 close(req.errCh) 374 return fmt.Errorf("increment cannot be less than 1") 375 } 376 377 var renewalErr error 378 leaseDuration := req.increment 379 if req.isToken { 380 // Reset the token in the API client before returning 381 defer c.client.SetToken("") 382 383 // Set the token in the API client to the one that needs 384 // renewal 385 c.client.SetToken(req.id) 386 387 // Renew the token 388 renewResp, err := c.client.Auth().Token().RenewSelf(req.increment) 389 if err != nil { 390 renewalErr = fmt.Errorf("failed to renew the vault token: %v", err) 391 } else if renewResp == nil || renewResp.Auth == nil { 392 renewalErr = fmt.Errorf("failed to renew the vault token") 393 } else { 394 // Don't set this if renewal fails 395 leaseDuration = renewResp.Auth.LeaseDuration 396 } 397 } else { 398 // Renew the secret 399 renewResp, err := c.client.Sys().Renew(req.id, req.increment) 400 if err != nil { 401 renewalErr = fmt.Errorf("failed to renew vault secret: %v", err) 402 } else if renewResp == nil { 403 renewalErr = fmt.Errorf("failed to renew vault secret") 404 } else { 405 // Don't set this if renewal fails 406 leaseDuration = renewResp.LeaseDuration 407 } 408 } 409 410 duration := leaseDuration / 2 411 switch { 412 case leaseDuration < 30: 413 // Don't bother about introducing randomness if the 414 // leaseDuration is too small. 415 default: 416 // Give a breathing space of 20 seconds 417 min := 10 418 max := leaseDuration - min 419 rand.Seed(time.Now().Unix()) 420 duration = min + rand.Intn(max-min) 421 } 422 423 // Determine the next renewal time 424 next := time.Now().Add(time.Duration(duration) * time.Second) 425 426 fatal := false 427 if renewalErr != nil && 428 (strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") || 429 strings.Contains(renewalErr.Error(), "token not found") || 430 strings.Contains(renewalErr.Error(), "permission denied")) { 431 fatal = true 432 } else if renewalErr != nil { 433 c.logger.Printf("[DEBUG] client.vault: req.increment: %d, leaseDuration: %d, duration: %d", req.increment, leaseDuration, duration) 434 c.logger.Printf("[ERR] client.vault: renewal of lease or token failed due to a non-fatal error. Retrying at %v: %v", next.String(), renewalErr) 435 } 436 437 if c.isTracked(req.id) { 438 if fatal { 439 // If encountered with an error where in a lease or a 440 // token is not valid at all with vault, and if that 441 // item is tracked by the renewal loop, stop renewing 442 // it by removing the corresponding heap entry. 443 if err := c.heap.Remove(req.id); err != nil { 444 return fmt.Errorf("failed to remove heap entry. err: %v", err) 445 } 446 delete(c.heap.heapMap, req.id) 447 448 // Report the fatal error to the client 449 req.errCh <- renewalErr 450 close(req.errCh) 451 452 return renewalErr 453 } 454 455 // If the identifier is already tracked, this indicates a 456 // subsequest renewal. In this case, update the existing 457 // element in the heap with the new renewal time. 458 if err := c.heap.Update(req, next); err != nil { 459 return fmt.Errorf("failed to update heap entry. err: %v", err) 460 } 461 462 // There is no need to signal an update to the renewal loop 463 // here because this case is hit from the renewal loop itself. 464 } else { 465 if fatal { 466 // If encountered with an error where in a lease or a 467 // token is not valid at all with vault, and if that 468 // item is not tracked by renewal loop, don't add it. 469 470 // Report the fatal error to the client 471 req.errCh <- renewalErr 472 close(req.errCh) 473 474 return renewalErr 475 } 476 477 // If the identifier is not already tracked, this is a first 478 // renewal request. In this case, add an entry into the heap 479 // with the next renewal time. 480 if err := c.heap.Push(req, next); err != nil { 481 return fmt.Errorf("failed to push an entry to heap. err: %v", err) 482 } 483 484 // Signal an update for the renewal loop to trigger a fresh 485 // computation for the next best candidate for renewal. 486 if c.running { 487 select { 488 case c.updateCh <- struct{}{}: 489 default: 490 } 491 } 492 } 493 494 return nil 495 } 496 497 // run is the renewal loop which performs the periodic renewals of both the 498 // tokens and the secret leases. 499 func (c *vaultClient) run() { 500 if !c.config.IsEnabled() { 501 return 502 } 503 504 var renewalCh <-chan time.Time 505 for c.config.IsEnabled() && c.running { 506 // Fetches the candidate for next renewal 507 renewalReq, renewalTime := c.nextRenewal() 508 if renewalTime.IsZero() { 509 // If the heap is empty, don't do anything 510 renewalCh = nil 511 } else { 512 now := time.Now() 513 if renewalTime.After(now) { 514 // Compute the duration after which the item 515 // needs renewal and set the renewalCh to fire 516 // at that time. 517 renewalDuration := renewalTime.Sub(time.Now()) 518 renewalCh = time.After(renewalDuration) 519 } else { 520 // If the renewals of multiple items are too 521 // close to each other and by the time the 522 // entry is fetched from heap it might be past 523 // the current time (by a small margin). In 524 // which case, fire immediately. 525 renewalCh = time.After(0) 526 } 527 } 528 529 select { 530 case <-renewalCh: 531 if err := c.renew(renewalReq); err != nil { 532 c.logger.Printf("[ERR] client.vault: renewal of token failed: %v", err) 533 } 534 case <-c.updateCh: 535 continue 536 case <-c.stopCh: 537 c.logger.Printf("[DEBUG] client.vault: stopped") 538 return 539 } 540 } 541 } 542 543 // StopRenewToken removes the item from the heap which represents the given 544 // token. 545 func (c *vaultClient) StopRenewToken(token string) error { 546 return c.stopRenew(token) 547 } 548 549 // StopRenewLease removes the item from the heap which represents the given 550 // lease identifier. 551 func (c *vaultClient) StopRenewLease(leaseId string) error { 552 return c.stopRenew(leaseId) 553 } 554 555 // stopRenew removes the given identifier from the heap and signals the renewal 556 // loop to compute the next best candidate for renewal. 557 func (c *vaultClient) stopRenew(id string) error { 558 c.lock.Lock() 559 defer c.lock.Unlock() 560 561 if !c.isTracked(id) { 562 return nil 563 } 564 565 // Remove the identifier from the heap 566 if err := c.heap.Remove(id); err != nil { 567 return fmt.Errorf("failed to remove heap entry: %v", err) 568 } 569 570 // Delete the identifier from the map only after the it is removed from 571 // the heap. Heap's remove method relies on the heap map. 572 delete(c.heap.heapMap, id) 573 574 // Signal an update to the renewal loop. 575 if c.running { 576 select { 577 case c.updateCh <- struct{}{}: 578 default: 579 } 580 } 581 582 return nil 583 } 584 585 // nextRenewal returns the root element of the min-heap, which represents the 586 // next element to be renewed and the time at which the renewal needs to be 587 // triggered. 588 func (c *vaultClient) nextRenewal() (*vaultClientRenewalRequest, time.Time) { 589 c.lock.RLock() 590 defer c.lock.RUnlock() 591 592 if c.heap.Length() == 0 { 593 return nil, time.Time{} 594 } 595 596 // Fetches the root element in the min-heap 597 nextEntry := c.heap.Peek() 598 if nextEntry == nil { 599 return nil, time.Time{} 600 } 601 602 return nextEntry.req, nextEntry.next 603 } 604 605 // Additional helper functions on top of interface methods 606 607 // Length returns the number of elements in the heap 608 func (h *vaultClientHeap) Length() int { 609 return len(h.heap) 610 } 611 612 // Returns the root node of the min-heap 613 func (h *vaultClientHeap) Peek() *vaultClientHeapEntry { 614 if len(h.heap) == 0 { 615 return nil 616 } 617 618 return h.heap[0] 619 } 620 621 // Push adds the secondary index and inserts an item into the heap 622 func (h *vaultClientHeap) Push(req *vaultClientRenewalRequest, next time.Time) error { 623 if req == nil { 624 return fmt.Errorf("nil request") 625 } 626 627 if _, ok := h.heapMap[req.id]; ok { 628 return fmt.Errorf("entry %v already exists", req.id) 629 } 630 631 heapEntry := &vaultClientHeapEntry{ 632 req: req, 633 next: next, 634 } 635 h.heapMap[req.id] = heapEntry 636 heap.Push(&h.heap, heapEntry) 637 return nil 638 } 639 640 // Update will modify the existing item in the heap with the new data and the 641 // time, and fixes the heap. 642 func (h *vaultClientHeap) Update(req *vaultClientRenewalRequest, next time.Time) error { 643 if entry, ok := h.heapMap[req.id]; ok { 644 entry.req = req 645 entry.next = next 646 heap.Fix(&h.heap, entry.index) 647 return nil 648 } 649 650 return fmt.Errorf("heap doesn't contain %v", req.id) 651 } 652 653 // Remove will remove an identifier from the secondary index and deletes the 654 // corresponding node from the heap. 655 func (h *vaultClientHeap) Remove(id string) error { 656 if entry, ok := h.heapMap[id]; ok { 657 heap.Remove(&h.heap, entry.index) 658 delete(h.heapMap, id) 659 return nil 660 } 661 662 return fmt.Errorf("heap doesn't contain entry for %v", id) 663 } 664 665 // The heap interface requires the following methods to be implemented. 666 // * Push(x interface{}) // add x as element Len() 667 // * Pop() interface{} // remove and return element Len() - 1. 668 // * sort.Interface 669 // 670 // sort.Interface comprises of the following methods: 671 // * Len() int 672 // * Less(i, j int) bool 673 // * Swap(i, j int) 674 675 // Part of sort.Interface 676 func (h vaultDataHeapImp) Len() int { return len(h) } 677 678 // Part of sort.Interface 679 func (h vaultDataHeapImp) Less(i, j int) bool { 680 // Two zero times should return false. 681 // Otherwise, zero is "greater" than any other time. 682 // (To sort it at the end of the list.) 683 // Sort such that zero times are at the end of the list. 684 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 685 if iZero && jZero { 686 return false 687 } else if iZero { 688 return false 689 } else if jZero { 690 return true 691 } 692 693 return h[i].next.Before(h[j].next) 694 } 695 696 // Part of sort.Interface 697 func (h vaultDataHeapImp) Swap(i, j int) { 698 h[i], h[j] = h[j], h[i] 699 h[i].index = i 700 h[j].index = j 701 } 702 703 // Part of heap.Interface 704 func (h *vaultDataHeapImp) Push(x interface{}) { 705 n := len(*h) 706 entry := x.(*vaultClientHeapEntry) 707 entry.index = n 708 *h = append(*h, entry) 709 } 710 711 // Part of heap.Interface 712 func (h *vaultDataHeapImp) Pop() interface{} { 713 old := *h 714 n := len(old) 715 entry := old[n-1] 716 entry.index = -1 // for safety 717 *h = old[0 : n-1] 718 return entry 719 }