github.com/smithx10/nomad@v0.9.1-rc1/client/vaultclient/vaultclient.go (about) 1 package vaultclient 2 3 import ( 4 "container/heap" 5 "fmt" 6 "math/rand" 7 "net/http" 8 "strings" 9 "sync" 10 "time" 11 12 metrics "github.com/armon/go-metrics" 13 hclog "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/nomad/structs/config" 16 vaultapi "github.com/hashicorp/vault/api" 17 ) 18 19 // TokenDeriverFunc takes in an allocation and a set of tasks and derives a 20 // wrapped token for all the tasks, from the nomad server. All the derived 21 // wrapped tokens will be unwrapped using the vault API client. 22 type TokenDeriverFunc func(*structs.Allocation, []string, *vaultapi.Client) (map[string]string, error) 23 24 // The interface which nomad client uses to interact with vault and 25 // periodically renews the tokens and secrets. 26 type VaultClient interface { 27 // Start initiates the renewal loop of tokens and secrets 28 Start() 29 30 // Stop terminates the renewal loop for tokens and secrets 31 Stop() 32 33 // DeriveToken contacts the nomad server and fetches wrapped tokens for 34 // a set of tasks. The wrapped tokens will be unwrapped using vault and 35 // returned. 36 DeriveToken(*structs.Allocation, []string) (map[string]string, error) 37 38 // GetConsulACL fetches the Consul ACL token required for the task 39 GetConsulACL(string, string) (*vaultapi.Secret, error) 40 41 // RenewToken renews a token with the given increment and adds it to 42 // the min-heap for periodic renewal. 43 RenewToken(string, int) (<-chan error, error) 44 45 // StopRenewToken removes the token from the min-heap, stopping its 46 // renewal. 47 StopRenewToken(string) error 48 49 // RenewLease renews a vault secret's lease and adds the lease 50 // identifier to the min-heap for periodic renewal. 51 RenewLease(string, int) (<-chan error, error) 52 53 // StopRenewLease removes a secret's lease ID from the min-heap, 54 // stopping its renewal. 55 StopRenewLease(string) error 56 } 57 58 // Implementation of VaultClient interface to interact with vault and perform 59 // token and lease renewals periodically. 60 type vaultClient struct { 61 // tokenDeriver is a function pointer passed in by the client to derive 62 // tokens by making RPC calls to the nomad server. The wrapped tokens 63 // returned by the nomad server will be unwrapped by this function 64 // using the vault API client. 65 tokenDeriver TokenDeriverFunc 66 67 // running indicates if the renewal loop is active or not 68 running bool 69 70 // client is the API client to interact with vault 71 client *vaultapi.Client 72 73 // updateCh is the channel to notify heap modifications to the renewal 74 // loop 75 updateCh chan struct{} 76 77 // stopCh is the channel to trigger termination of renewal loop 78 stopCh chan struct{} 79 80 // heap is the min-heap to keep track of both tokens and leases 81 heap *vaultClientHeap 82 83 // config is the configuration to connect to vault 84 config *config.VaultConfig 85 86 lock sync.RWMutex 87 logger hclog.Logger 88 } 89 90 // vaultClientRenewalRequest is a request object for renewal of both tokens and 91 // secret's leases. 92 type vaultClientRenewalRequest struct { 93 // errCh is the channel into which any renewal error will be sent to 94 errCh chan error 95 96 // id is an identifier which represents either a token or a lease 97 id string 98 99 // increment is the duration for which the token or lease should be 100 // renewed for 101 increment int 102 103 // isToken indicates whether the 'id' field is a token or not 104 isToken bool 105 } 106 107 // Element representing an entry in the renewal heap 108 type vaultClientHeapEntry struct { 109 req *vaultClientRenewalRequest 110 next time.Time 111 index int 112 } 113 114 // Wrapper around the actual heap to provide additional semantics on top of 115 // functions provided by the heap interface. In order to achieve that, an 116 // additional map is placed beside the actual heap. This map can be used to 117 // check if an entry is already present in the heap. 118 type vaultClientHeap struct { 119 heapMap map[string]*vaultClientHeapEntry 120 heap vaultDataHeapImp 121 } 122 123 // Data type of the heap 124 type vaultDataHeapImp []*vaultClientHeapEntry 125 126 // NewVaultClient returns a new vault client from the given config. 127 func NewVaultClient(config *config.VaultConfig, logger hclog.Logger, tokenDeriver TokenDeriverFunc) (*vaultClient, error) { 128 if config == nil { 129 return nil, fmt.Errorf("nil vault config") 130 } 131 132 logger = logger.Named("vault") 133 134 c := &vaultClient{ 135 config: config, 136 stopCh: make(chan struct{}), 137 // Update channel should be a buffered channel 138 updateCh: make(chan struct{}, 1), 139 heap: newVaultClientHeap(), 140 logger: logger, 141 tokenDeriver: tokenDeriver, 142 } 143 144 if !config.IsEnabled() { 145 return c, nil 146 } 147 148 // Get the Vault API configuration 149 apiConf, err := config.ApiConfig() 150 if err != nil { 151 logger.Error("error creating vault API config", "error", err) 152 return nil, err 153 } 154 155 // Create the Vault API client 156 client, err := vaultapi.NewClient(apiConf) 157 if err != nil { 158 logger.Error("error creating vault client", "error", err) 159 return nil, err 160 } 161 162 client.SetHeaders(http.Header{ 163 "User-Agent": []string{"hashicorp/nomad"}, 164 }) 165 166 c.client = client 167 168 return c, nil 169 } 170 171 // newVaultClientHeap returns a new vault client heap with both the heap and a 172 // map which is a secondary index for heap elements, both initialized. 173 func newVaultClientHeap() *vaultClientHeap { 174 return &vaultClientHeap{ 175 heapMap: make(map[string]*vaultClientHeapEntry), 176 heap: make(vaultDataHeapImp, 0), 177 } 178 } 179 180 // isTracked returns if a given identifier is already present in the heap and 181 // hence is being renewed. Lock should be held before calling this method. 182 func (c *vaultClient) isTracked(id string) bool { 183 if id == "" { 184 return false 185 } 186 187 _, ok := c.heap.heapMap[id] 188 return ok 189 } 190 191 // Starts the renewal loop of vault client 192 func (c *vaultClient) Start() { 193 if !c.config.IsEnabled() || c.running { 194 return 195 } 196 197 c.lock.Lock() 198 c.running = true 199 c.lock.Unlock() 200 201 go c.run() 202 } 203 204 // Stops the renewal loop of vault client 205 func (c *vaultClient) Stop() { 206 if !c.config.IsEnabled() || !c.running { 207 return 208 } 209 210 c.lock.Lock() 211 defer c.lock.Unlock() 212 213 c.running = false 214 close(c.stopCh) 215 } 216 217 // unlockAndUnset is used to unset the vault token on the client and release the 218 // lock. Helper method for deferring a call that does both. 219 func (c *vaultClient) unlockAndUnset() { 220 c.client.SetToken("") 221 c.lock.Unlock() 222 } 223 224 // DeriveToken takes in an allocation and a set of tasks and for each of the 225 // task, it derives a vault token from nomad server and unwraps it using vault. 226 // The return value is a map containing all the unwrapped tokens indexed by the 227 // task name. 228 func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 229 if !c.config.IsEnabled() { 230 return nil, fmt.Errorf("vault client not enabled") 231 } 232 if !c.running { 233 return nil, fmt.Errorf("vault client is not running") 234 } 235 236 c.lock.Lock() 237 defer c.unlockAndUnset() 238 239 // Use the token supplied to interact with vault 240 c.client.SetToken("") 241 242 tokens, err := c.tokenDeriver(alloc, taskNames, c.client) 243 if err != nil { 244 c.logger.Error("error deriving token", "error", err, "alloc_id", alloc.ID, "task_names", taskNames) 245 return nil, err 246 } 247 248 return tokens, nil 249 } 250 251 // GetConsulACL creates a vault API client and reads from vault a consul ACL 252 // token used by the task. 253 func (c *vaultClient) GetConsulACL(token, path string) (*vaultapi.Secret, error) { 254 if !c.config.IsEnabled() { 255 return nil, fmt.Errorf("vault client not enabled") 256 } 257 if token == "" { 258 return nil, fmt.Errorf("missing token") 259 } 260 if path == "" { 261 return nil, fmt.Errorf("missing consul ACL token vault path") 262 } 263 264 c.lock.Lock() 265 defer c.unlockAndUnset() 266 267 // Use the token supplied to interact with vault 268 c.client.SetToken(token) 269 270 // Read the consul ACL token and return the secret directly 271 return c.client.Logical().Read(path) 272 } 273 274 // RenewToken renews the supplied token for a given duration (in seconds) and 275 // adds it to the min-heap so that it is renewed periodically by the renewal 276 // loop. Any error returned during renewal will be written to a buffered 277 // channel and the channel is returned instead of an actual error. This helps 278 // the caller be notified of a renewal failure asynchronously for appropriate 279 // actions to be taken. The caller of this function need not have to close the 280 // error channel. 281 func (c *vaultClient) RenewToken(token string, increment int) (<-chan error, error) { 282 if token == "" { 283 err := fmt.Errorf("missing token") 284 return nil, err 285 } 286 if increment < 1 { 287 err := fmt.Errorf("increment cannot be less than 1") 288 return nil, err 289 } 290 291 // Create a buffered error channel 292 errCh := make(chan error, 1) 293 294 // Create a renewal request and indicate that the identifier in the 295 // request is a token and not a lease 296 renewalReq := &vaultClientRenewalRequest{ 297 errCh: errCh, 298 id: token, 299 isToken: true, 300 increment: increment, 301 } 302 303 // Perform the renewal of the token and send any error to the dedicated 304 // error channel. 305 if err := c.renew(renewalReq); err != nil { 306 c.logger.Error("error during renewal of token", "error", err) 307 metrics.IncrCounter([]string{"client", "vault", "renew_token_failure"}, 1) 308 return nil, err 309 } 310 311 return errCh, nil 312 } 313 314 // RenewLease renews the supplied lease identifier for a supplied duration (in 315 // seconds) and adds it to the min-heap so that it gets renewed periodically by 316 // the renewal loop. Any error returned during renewal will be written to a 317 // buffered channel and the channel is returned instead of an actual error. 318 // This helps the caller be notified of a renewal failure asynchronously for 319 // appropriate actions to be taken. The caller of this function need not have 320 // to close the error channel. 321 func (c *vaultClient) RenewLease(leaseId string, increment int) (<-chan error, error) { 322 if leaseId == "" { 323 err := fmt.Errorf("missing lease ID") 324 return nil, err 325 } 326 327 if increment < 1 { 328 err := fmt.Errorf("increment cannot be less than 1") 329 return nil, err 330 } 331 332 // Create a buffered error channel 333 errCh := make(chan error, 1) 334 335 // Create a renewal request using the supplied lease and duration 336 renewalReq := &vaultClientRenewalRequest{ 337 errCh: errCh, 338 id: leaseId, 339 increment: increment, 340 } 341 342 // Renew the secret and send any error to the dedicated error channel 343 if err := c.renew(renewalReq); err != nil { 344 c.logger.Error("error during renewal of lease", "error", err) 345 metrics.IncrCounter([]string{"client", "vault", "renew_lease_error"}, 1) 346 return nil, err 347 } 348 349 return errCh, nil 350 } 351 352 // renew is a common method to handle renewal of both tokens and secret leases. 353 // It invokes a token renewal or a secret's lease renewal. If renewal is 354 // successful, min-heap is updated based on the duration after which it needs 355 // renewal again. The next renewal time is randomly selected to avoid spikes in 356 // the number of APIs periodically. 357 func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { 358 c.lock.Lock() 359 defer c.lock.Unlock() 360 361 if req == nil { 362 return fmt.Errorf("nil renewal request") 363 } 364 if req.errCh == nil { 365 return fmt.Errorf("renewal request error channel nil") 366 } 367 368 if !c.config.IsEnabled() { 369 close(req.errCh) 370 return fmt.Errorf("vault client not enabled") 371 } 372 if !c.running { 373 close(req.errCh) 374 return fmt.Errorf("vault client is not running") 375 } 376 if req.id == "" { 377 close(req.errCh) 378 return fmt.Errorf("missing id in renewal request") 379 } 380 if req.increment < 1 { 381 close(req.errCh) 382 return fmt.Errorf("increment cannot be less than 1") 383 } 384 385 var renewalErr error 386 leaseDuration := req.increment 387 if req.isToken { 388 // Set the token in the API client to the one that needs 389 // renewal 390 c.client.SetToken(req.id) 391 392 // Renew the token 393 renewResp, err := c.client.Auth().Token().RenewSelf(req.increment) 394 if err != nil { 395 renewalErr = fmt.Errorf("failed to renew the vault token: %v", err) 396 } else if renewResp == nil || renewResp.Auth == nil { 397 renewalErr = fmt.Errorf("failed to renew the vault token") 398 } else { 399 // Don't set this if renewal fails 400 leaseDuration = renewResp.Auth.LeaseDuration 401 } 402 403 // Reset the token in the API client before returning 404 c.client.SetToken("") 405 } else { 406 // Renew the secret 407 renewResp, err := c.client.Sys().Renew(req.id, req.increment) 408 if err != nil { 409 renewalErr = fmt.Errorf("failed to renew vault secret: %v", err) 410 } else if renewResp == nil { 411 renewalErr = fmt.Errorf("failed to renew vault secret") 412 } else { 413 // Don't set this if renewal fails 414 leaseDuration = renewResp.LeaseDuration 415 } 416 } 417 418 duration := leaseDuration / 2 419 switch { 420 case leaseDuration < 30: 421 // Don't bother about introducing randomness if the 422 // leaseDuration is too small. 423 default: 424 // Give a breathing space of 20 seconds 425 min := 10 426 max := leaseDuration - min 427 rand.Seed(time.Now().Unix()) 428 duration = min + rand.Intn(max-min) 429 } 430 431 // Determine the next renewal time 432 next := time.Now().Add(time.Duration(duration) * time.Second) 433 434 fatal := false 435 if renewalErr != nil && 436 (strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") || 437 strings.Contains(renewalErr.Error(), "lease is not renewable") || 438 strings.Contains(renewalErr.Error(), "token not found") || 439 strings.Contains(renewalErr.Error(), "permission denied")) { 440 fatal = true 441 } else if renewalErr != nil { 442 c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "duration", duration) 443 c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying", 444 "error", renewalErr, "period", next) 445 } 446 447 if c.isTracked(req.id) { 448 if fatal { 449 // If encountered with an error where in a lease or a 450 // token is not valid at all with vault, and if that 451 // item is tracked by the renewal loop, stop renewing 452 // it by removing the corresponding heap entry. 453 if err := c.heap.Remove(req.id); err != nil { 454 return fmt.Errorf("failed to remove heap entry: %v", err) 455 } 456 457 // Report the fatal error to the client 458 req.errCh <- renewalErr 459 close(req.errCh) 460 461 return renewalErr 462 } 463 464 // If the identifier is already tracked, this indicates a 465 // subsequest renewal. In this case, update the existing 466 // element in the heap with the new renewal time. 467 if err := c.heap.Update(req, next); err != nil { 468 return fmt.Errorf("failed to update heap entry. err: %v", err) 469 } 470 471 // There is no need to signal an update to the renewal loop 472 // here because this case is hit from the renewal loop itself. 473 } else { 474 if fatal { 475 // If encountered with an error where in a lease or a 476 // token is not valid at all with vault, and if that 477 // item is not tracked by renewal loop, don't add it. 478 479 // Report the fatal error to the client 480 req.errCh <- renewalErr 481 close(req.errCh) 482 483 return renewalErr 484 } 485 486 // If the identifier is not already tracked, this is a first 487 // renewal request. In this case, add an entry into the heap 488 // with the next renewal time. 489 if err := c.heap.Push(req, next); err != nil { 490 return fmt.Errorf("failed to push an entry to heap. err: %v", err) 491 } 492 493 // Signal an update for the renewal loop to trigger a fresh 494 // computation for the next best candidate for renewal. 495 if c.running { 496 select { 497 case c.updateCh <- struct{}{}: 498 default: 499 } 500 } 501 } 502 503 return nil 504 } 505 506 // run is the renewal loop which performs the periodic renewals of both the 507 // tokens and the secret leases. 508 func (c *vaultClient) run() { 509 if !c.config.IsEnabled() { 510 return 511 } 512 513 var renewalCh <-chan time.Time 514 for c.config.IsEnabled() && c.running { 515 // Fetches the candidate for next renewal 516 renewalReq, renewalTime := c.nextRenewal() 517 if renewalTime.IsZero() { 518 // If the heap is empty, don't do anything 519 renewalCh = nil 520 } else { 521 now := time.Now() 522 if renewalTime.After(now) { 523 // Compute the duration after which the item 524 // needs renewal and set the renewalCh to fire 525 // at that time. 526 renewalDuration := renewalTime.Sub(time.Now()) 527 renewalCh = time.After(renewalDuration) 528 } else { 529 // If the renewals of multiple items are too 530 // close to each other and by the time the 531 // entry is fetched from heap it might be past 532 // the current time (by a small margin). In 533 // which case, fire immediately. 534 renewalCh = time.After(0) 535 } 536 } 537 538 select { 539 case <-renewalCh: 540 if err := c.renew(renewalReq); err != nil { 541 c.logger.Error("error renewing token", "error", err) 542 metrics.IncrCounter([]string{"client", "vault", "renew_token_error"}, 1) 543 } 544 case <-c.updateCh: 545 continue 546 case <-c.stopCh: 547 c.logger.Debug("stopped") 548 return 549 } 550 } 551 } 552 553 // StopRenewToken removes the item from the heap which represents the given 554 // token. 555 func (c *vaultClient) StopRenewToken(token string) error { 556 return c.stopRenew(token) 557 } 558 559 // StopRenewLease removes the item from the heap which represents the given 560 // lease identifier. 561 func (c *vaultClient) StopRenewLease(leaseId string) error { 562 return c.stopRenew(leaseId) 563 } 564 565 // stopRenew removes the given identifier from the heap and signals the renewal 566 // loop to compute the next best candidate for renewal. 567 func (c *vaultClient) stopRenew(id string) error { 568 c.lock.Lock() 569 defer c.lock.Unlock() 570 571 if !c.isTracked(id) { 572 return nil 573 } 574 575 if err := c.heap.Remove(id); err != nil { 576 return fmt.Errorf("failed to remove heap entry: %v", err) 577 } 578 579 // Signal an update to the renewal loop. 580 if c.running { 581 select { 582 case c.updateCh <- struct{}{}: 583 default: 584 } 585 } 586 587 return nil 588 } 589 590 // nextRenewal returns the root element of the min-heap, which represents the 591 // next element to be renewed and the time at which the renewal needs to be 592 // triggered. 593 func (c *vaultClient) nextRenewal() (*vaultClientRenewalRequest, time.Time) { 594 c.lock.RLock() 595 defer c.lock.RUnlock() 596 597 if c.heap.Length() == 0 { 598 return nil, time.Time{} 599 } 600 601 // Fetches the root element in the min-heap 602 nextEntry := c.heap.Peek() 603 if nextEntry == nil { 604 return nil, time.Time{} 605 } 606 607 return nextEntry.req, nextEntry.next 608 } 609 610 // Additional helper functions on top of interface methods 611 612 // Length returns the number of elements in the heap 613 func (h *vaultClientHeap) Length() int { 614 return len(h.heap) 615 } 616 617 // Returns the root node of the min-heap 618 func (h *vaultClientHeap) Peek() *vaultClientHeapEntry { 619 if len(h.heap) == 0 { 620 return nil 621 } 622 623 return h.heap[0] 624 } 625 626 // Push adds the secondary index and inserts an item into the heap 627 func (h *vaultClientHeap) Push(req *vaultClientRenewalRequest, next time.Time) error { 628 if req == nil { 629 return fmt.Errorf("nil request") 630 } 631 632 if _, ok := h.heapMap[req.id]; ok { 633 return fmt.Errorf("entry %v already exists", req.id) 634 } 635 636 heapEntry := &vaultClientHeapEntry{ 637 req: req, 638 next: next, 639 } 640 h.heapMap[req.id] = heapEntry 641 heap.Push(&h.heap, heapEntry) 642 return nil 643 } 644 645 // Update will modify the existing item in the heap with the new data and the 646 // time, and fixes the heap. 647 func (h *vaultClientHeap) Update(req *vaultClientRenewalRequest, next time.Time) error { 648 if entry, ok := h.heapMap[req.id]; ok { 649 entry.req = req 650 entry.next = next 651 heap.Fix(&h.heap, entry.index) 652 return nil 653 } 654 655 return fmt.Errorf("heap doesn't contain %v", req.id) 656 } 657 658 // Remove will remove an identifier from the secondary index and deletes the 659 // corresponding node from the heap. 660 func (h *vaultClientHeap) Remove(id string) error { 661 if entry, ok := h.heapMap[id]; ok { 662 heap.Remove(&h.heap, entry.index) 663 delete(h.heapMap, id) 664 return nil 665 } 666 667 return fmt.Errorf("heap doesn't contain entry for %v", id) 668 } 669 670 // The heap interface requires the following methods to be implemented. 671 // * Push(x interface{}) // add x as element Len() 672 // * Pop() interface{} // remove and return element Len() - 1. 673 // * sort.Interface 674 // 675 // sort.Interface comprises of the following methods: 676 // * Len() int 677 // * Less(i, j int) bool 678 // * Swap(i, j int) 679 680 // Part of sort.Interface 681 func (h vaultDataHeapImp) Len() int { return len(h) } 682 683 // Part of sort.Interface 684 func (h vaultDataHeapImp) Less(i, j int) bool { 685 // Two zero times should return false. 686 // Otherwise, zero is "greater" than any other time. 687 // (To sort it at the end of the list.) 688 // Sort such that zero times are at the end of the list. 689 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 690 if iZero && jZero { 691 return false 692 } else if iZero { 693 return false 694 } else if jZero { 695 return true 696 } 697 698 return h[i].next.Before(h[j].next) 699 } 700 701 // Part of sort.Interface 702 func (h vaultDataHeapImp) Swap(i, j int) { 703 h[i], h[j] = h[j], h[i] 704 h[i].index = i 705 h[j].index = j 706 } 707 708 // Part of heap.Interface 709 func (h *vaultDataHeapImp) Push(x interface{}) { 710 n := len(*h) 711 entry := x.(*vaultClientHeapEntry) 712 entry.index = n 713 *h = append(*h, entry) 714 } 715 716 // Part of heap.Interface 717 func (h *vaultDataHeapImp) Pop() interface{} { 718 old := *h 719 n := len(old) 720 entry := old[n-1] 721 entry.index = -1 // for safety 722 *h = old[0 : n-1] 723 return entry 724 }