github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/vaultclient/vaultclient.go (about) 1 package vaultclient 2 3 import ( 4 "container/heap" 5 "fmt" 6 "math/rand" 7 "net/http" 8 "strings" 9 "sync" 10 "time" 11 12 metrics "github.com/armon/go-metrics" 13 hclog "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/nomad/structs/config" 16 vaultapi "github.com/hashicorp/vault/api" 17 ) 18 19 // TokenDeriverFunc takes in an allocation and a set of tasks and derives a 20 // wrapped token for all the tasks, from the nomad server. All the derived 21 // wrapped tokens will be unwrapped using the vault API client. 22 type TokenDeriverFunc func(*structs.Allocation, []string, *vaultapi.Client) (map[string]string, error) 23 24 // The interface which nomad client uses to interact with vault and 25 // periodically renews the tokens and secrets. 26 type VaultClient interface { 27 // Start initiates the renewal loop of tokens and secrets 28 Start() 29 30 // Stop terminates the renewal loop for tokens and secrets 31 Stop() 32 33 // DeriveToken contacts the nomad server and fetches wrapped tokens for 34 // a set of tasks. The wrapped tokens will be unwrapped using vault and 35 // returned. 36 DeriveToken(*structs.Allocation, []string) (map[string]string, error) 37 38 // GetConsulACL fetches the Consul ACL token required for the task 39 GetConsulACL(string, string) (*vaultapi.Secret, error) 40 41 // RenewToken renews a token with the given increment and adds it to 42 // the min-heap for periodic renewal. 43 RenewToken(string, int) (<-chan error, error) 44 45 // StopRenewToken removes the token from the min-heap, stopping its 46 // renewal. 47 StopRenewToken(string) error 48 } 49 50 // Implementation of VaultClient interface to interact with vault and perform 51 // token and lease renewals periodically. 52 type vaultClient struct { 53 // tokenDeriver is a function pointer passed in by the client to derive 54 // tokens by making RPC calls to the nomad server. The wrapped tokens 55 // returned by the nomad server will be unwrapped by this function 56 // using the vault API client. 57 tokenDeriver TokenDeriverFunc 58 59 // running indicates if the renewal loop is active or not 60 running bool 61 62 // client is the API client to interact with vault 63 client *vaultapi.Client 64 65 // updateCh is the channel to notify heap modifications to the renewal 66 // loop 67 updateCh chan struct{} 68 69 // stopCh is the channel to trigger termination of renewal loop 70 stopCh chan struct{} 71 72 // heap is the min-heap to keep track of both tokens and leases 73 heap *vaultClientHeap 74 75 // config is the configuration to connect to vault 76 config *config.VaultConfig 77 78 lock sync.RWMutex 79 logger hclog.Logger 80 } 81 82 // vaultClientRenewalRequest is a request object for renewal of both tokens and 83 // secret's leases. 84 type vaultClientRenewalRequest struct { 85 // errCh is the channel into which any renewal error will be sent to 86 errCh chan error 87 88 // id is an identifier which represents either a token or a lease 89 id string 90 91 // increment is the duration for which the token or lease should be 92 // renewed for 93 increment int 94 95 // isToken indicates whether the 'id' field is a token or not 96 isToken bool 97 } 98 99 // Element representing an entry in the renewal heap 100 type vaultClientHeapEntry struct { 101 req *vaultClientRenewalRequest 102 next time.Time 103 index int 104 } 105 106 // Wrapper around the actual heap to provide additional semantics on top of 107 // functions provided by the heap interface. In order to achieve that, an 108 // additional map is placed beside the actual heap. This map can be used to 109 // check if an entry is already present in the heap. 110 type vaultClientHeap struct { 111 heapMap map[string]*vaultClientHeapEntry 112 heap vaultDataHeapImp 113 } 114 115 // Data type of the heap 116 type vaultDataHeapImp []*vaultClientHeapEntry 117 118 // NewVaultClient returns a new vault client from the given config. 119 func NewVaultClient(config *config.VaultConfig, logger hclog.Logger, tokenDeriver TokenDeriverFunc) (*vaultClient, error) { 120 if config == nil { 121 return nil, fmt.Errorf("nil vault config") 122 } 123 124 logger = logger.Named("vault") 125 126 c := &vaultClient{ 127 config: config, 128 stopCh: make(chan struct{}), 129 // Update channel should be a buffered channel 130 updateCh: make(chan struct{}, 1), 131 heap: newVaultClientHeap(), 132 logger: logger, 133 tokenDeriver: tokenDeriver, 134 } 135 136 if !config.IsEnabled() { 137 return c, nil 138 } 139 140 // Get the Vault API configuration 141 apiConf, err := config.ApiConfig() 142 if err != nil { 143 logger.Error("error creating vault API config", "error", err) 144 return nil, err 145 } 146 147 // Create the Vault API client 148 client, err := vaultapi.NewClient(apiConf) 149 if err != nil { 150 logger.Error("error creating vault client", "error", err) 151 return nil, err 152 } 153 154 client.SetHeaders(http.Header{ 155 "User-Agent": []string{"hashicorp/nomad"}, 156 }) 157 158 // SetHeaders above will replace all headers, make this call second 159 if config.Namespace != "" { 160 logger.Debug("configuring Vault namespace", "namespace", config.Namespace) 161 client.SetNamespace(config.Namespace) 162 } 163 164 c.client = client 165 166 return c, nil 167 } 168 169 // newVaultClientHeap returns a new vault client heap with both the heap and a 170 // map which is a secondary index for heap elements, both initialized. 171 func newVaultClientHeap() *vaultClientHeap { 172 return &vaultClientHeap{ 173 heapMap: make(map[string]*vaultClientHeapEntry), 174 heap: make(vaultDataHeapImp, 0), 175 } 176 } 177 178 // isTracked returns if a given identifier is already present in the heap and 179 // hence is being renewed. Lock should be held before calling this method. 180 func (c *vaultClient) isTracked(id string) bool { 181 if id == "" { 182 return false 183 } 184 185 _, ok := c.heap.heapMap[id] 186 return ok 187 } 188 189 // isRunning returns true if the client is running. 190 func (c *vaultClient) isRunning() bool { 191 c.lock.RLock() 192 defer c.lock.RUnlock() 193 return c.running 194 } 195 196 // Starts the renewal loop of vault client 197 func (c *vaultClient) Start() { 198 c.lock.Lock() 199 defer c.lock.Unlock() 200 201 if !c.config.IsEnabled() || c.running { 202 return 203 } 204 205 c.running = true 206 207 go c.run() 208 } 209 210 // Stops the renewal loop of vault client 211 func (c *vaultClient) Stop() { 212 c.lock.Lock() 213 defer c.lock.Unlock() 214 215 if !c.config.IsEnabled() || !c.running { 216 return 217 } 218 219 c.running = false 220 close(c.stopCh) 221 } 222 223 // unlockAndUnset is used to unset the vault token on the client and release the 224 // lock. Helper method for deferring a call that does both. 225 func (c *vaultClient) unlockAndUnset() { 226 c.client.SetToken("") 227 c.lock.Unlock() 228 } 229 230 // DeriveToken takes in an allocation and a set of tasks and for each of the 231 // task, it derives a vault token from nomad server and unwraps it using vault. 232 // The return value is a map containing all the unwrapped tokens indexed by the 233 // task name. 234 func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 235 if !c.config.IsEnabled() { 236 return nil, fmt.Errorf("vault client not enabled") 237 } 238 if !c.isRunning() { 239 return nil, fmt.Errorf("vault client is not running") 240 } 241 242 c.lock.Lock() 243 defer c.unlockAndUnset() 244 245 // Use the token supplied to interact with vault 246 c.client.SetToken("") 247 248 tokens, err := c.tokenDeriver(alloc, taskNames, c.client) 249 if err != nil { 250 c.logger.Error("error deriving token", "error", err, "alloc_id", alloc.ID, "task_names", taskNames) 251 return nil, err 252 } 253 254 return tokens, nil 255 } 256 257 // GetConsulACL creates a vault API client and reads from vault a consul ACL 258 // token used by the task. 259 func (c *vaultClient) GetConsulACL(token, path string) (*vaultapi.Secret, error) { 260 if !c.config.IsEnabled() { 261 return nil, fmt.Errorf("vault client not enabled") 262 } 263 if token == "" { 264 return nil, fmt.Errorf("missing token") 265 } 266 if path == "" { 267 return nil, fmt.Errorf("missing consul ACL token vault path") 268 } 269 270 c.lock.Lock() 271 defer c.unlockAndUnset() 272 273 // Use the token supplied to interact with vault 274 c.client.SetToken(token) 275 276 // Read the consul ACL token and return the secret directly 277 return c.client.Logical().Read(path) 278 } 279 280 // RenewToken renews the supplied token for a given duration (in seconds) and 281 // adds it to the min-heap so that it is renewed periodically by the renewal 282 // loop. Any error returned during renewal will be written to a buffered 283 // channel and the channel is returned instead of an actual error. This helps 284 // the caller be notified of a renewal failure asynchronously for appropriate 285 // actions to be taken. The caller of this function need not have to close the 286 // error channel. 287 func (c *vaultClient) RenewToken(token string, increment int) (<-chan error, error) { 288 if token == "" { 289 err := fmt.Errorf("missing token") 290 return nil, err 291 } 292 if increment < 1 { 293 err := fmt.Errorf("increment cannot be less than 1") 294 return nil, err 295 } 296 297 // Create a buffered error channel 298 errCh := make(chan error, 1) 299 300 // Create a renewal request and indicate that the identifier in the 301 // request is a token and not a lease 302 renewalReq := &vaultClientRenewalRequest{ 303 errCh: errCh, 304 id: token, 305 isToken: true, 306 increment: increment, 307 } 308 309 // Perform the renewal of the token and send any error to the dedicated 310 // error channel. 311 if err := c.renew(renewalReq); err != nil { 312 c.logger.Error("error during renewal of token", "error", err) 313 metrics.IncrCounter([]string{"client", "vault", "renew_token_failure"}, 1) 314 return nil, err 315 } 316 317 return errCh, nil 318 } 319 320 // renew is a common method to handle renewal of both tokens and secret leases. 321 // It invokes a token renewal or a secret's lease renewal. If renewal is 322 // successful, min-heap is updated based on the duration after which it needs 323 // renewal again. The next renewal time is randomly selected to avoid spikes in 324 // the number of APIs periodically. 325 func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { 326 c.lock.Lock() 327 defer c.lock.Unlock() 328 329 if req == nil { 330 return fmt.Errorf("nil renewal request") 331 } 332 if req.errCh == nil { 333 return fmt.Errorf("renewal request error channel nil") 334 } 335 336 if !c.config.IsEnabled() { 337 close(req.errCh) 338 return fmt.Errorf("vault client not enabled") 339 } 340 if !c.running { 341 close(req.errCh) 342 return fmt.Errorf("vault client is not running") 343 } 344 if req.id == "" { 345 close(req.errCh) 346 return fmt.Errorf("missing id in renewal request") 347 } 348 if req.increment < 1 { 349 close(req.errCh) 350 return fmt.Errorf("increment cannot be less than 1") 351 } 352 353 var renewalErr error 354 leaseDuration := req.increment 355 if req.isToken { 356 // Set the token in the API client to the one that needs 357 // renewal 358 c.client.SetToken(req.id) 359 360 // Renew the token 361 renewResp, err := c.client.Auth().Token().RenewSelf(req.increment) 362 if err != nil { 363 renewalErr = fmt.Errorf("failed to renew the vault token: %v", err) 364 } else if renewResp == nil || renewResp.Auth == nil { 365 renewalErr = fmt.Errorf("failed to renew the vault token") 366 } else { 367 // Don't set this if renewal fails 368 leaseDuration = renewResp.Auth.LeaseDuration 369 } 370 371 // Reset the token in the API client before returning 372 c.client.SetToken("") 373 } else { 374 // Renew the secret 375 renewResp, err := c.client.Sys().Renew(req.id, req.increment) 376 if err != nil { 377 renewalErr = fmt.Errorf("failed to renew vault secret: %v", err) 378 } else if renewResp == nil { 379 renewalErr = fmt.Errorf("failed to renew vault secret") 380 } else { 381 // Don't set this if renewal fails 382 leaseDuration = renewResp.LeaseDuration 383 } 384 } 385 386 // Determine the next renewal time 387 renewalDuration := renewalTime(rand.Intn, leaseDuration) 388 next := time.Now().Add(renewalDuration) 389 390 fatal := false 391 if renewalErr != nil && 392 (strings.Contains(renewalErr.Error(), "lease not found or lease is not renewable") || 393 strings.Contains(renewalErr.Error(), "lease is not renewable") || 394 strings.Contains(renewalErr.Error(), "token not found") || 395 strings.Contains(renewalErr.Error(), "permission denied")) { 396 fatal = true 397 } else if renewalErr != nil { 398 c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration) 399 c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying", 400 "error", renewalErr, "period", next) 401 } 402 403 if c.isTracked(req.id) { 404 if fatal { 405 // If encountered with an error where in a lease or a 406 // token is not valid at all with vault, and if that 407 // item is tracked by the renewal loop, stop renewing 408 // it by removing the corresponding heap entry. 409 if err := c.heap.Remove(req.id); err != nil { 410 return fmt.Errorf("failed to remove heap entry: %v", err) 411 } 412 413 // Report the fatal error to the client 414 req.errCh <- renewalErr 415 close(req.errCh) 416 417 return renewalErr 418 } 419 420 // If the identifier is already tracked, this indicates a 421 // subsequest renewal. In this case, update the existing 422 // element in the heap with the new renewal time. 423 if err := c.heap.Update(req, next); err != nil { 424 return fmt.Errorf("failed to update heap entry. err: %v", err) 425 } 426 427 // There is no need to signal an update to the renewal loop 428 // here because this case is hit from the renewal loop itself. 429 } else { 430 if fatal { 431 // If encountered with an error where in a lease or a 432 // token is not valid at all with vault, and if that 433 // item is not tracked by renewal loop, don't add it. 434 435 // Report the fatal error to the client 436 req.errCh <- renewalErr 437 close(req.errCh) 438 439 return renewalErr 440 } 441 442 // If the identifier is not already tracked, this is a first 443 // renewal request. In this case, add an entry into the heap 444 // with the next renewal time. 445 if err := c.heap.Push(req, next); err != nil { 446 return fmt.Errorf("failed to push an entry to heap. err: %v", err) 447 } 448 449 // Signal an update for the renewal loop to trigger a fresh 450 // computation for the next best candidate for renewal. 451 if c.running { 452 select { 453 case c.updateCh <- struct{}{}: 454 default: 455 } 456 } 457 } 458 459 return nil 460 } 461 462 // run is the renewal loop which performs the periodic renewals of both the 463 // tokens and the secret leases. 464 func (c *vaultClient) run() { 465 if !c.config.IsEnabled() { 466 return 467 } 468 469 var renewalCh <-chan time.Time 470 for c.config.IsEnabled() && c.isRunning() { 471 // Fetches the candidate for next renewal 472 renewalReq, renewalTime := c.nextRenewal() 473 if renewalTime.IsZero() { 474 // If the heap is empty, don't do anything 475 renewalCh = nil 476 } else { 477 now := time.Now() 478 if renewalTime.After(now) { 479 // Compute the duration after which the item 480 // needs renewal and set the renewalCh to fire 481 // at that time. 482 renewalDuration := renewalTime.Sub(time.Now()) 483 renewalCh = time.After(renewalDuration) 484 } else { 485 // If the renewals of multiple items are too 486 // close to each other and by the time the 487 // entry is fetched from heap it might be past 488 // the current time (by a small margin). In 489 // which case, fire immediately. 490 renewalCh = time.After(0) 491 } 492 } 493 494 select { 495 case <-renewalCh: 496 if err := c.renew(renewalReq); err != nil { 497 c.logger.Error("error renewing token", "error", err) 498 metrics.IncrCounter([]string{"client", "vault", "renew_token_error"}, 1) 499 } 500 case <-c.updateCh: 501 continue 502 case <-c.stopCh: 503 c.logger.Debug("stopped") 504 return 505 } 506 } 507 } 508 509 // StopRenewToken removes the item from the heap which represents the given 510 // token. 511 func (c *vaultClient) StopRenewToken(token string) error { 512 return c.stopRenew(token) 513 } 514 515 // stopRenew removes the given identifier from the heap and signals the renewal 516 // loop to compute the next best candidate for renewal. 517 func (c *vaultClient) stopRenew(id string) error { 518 c.lock.Lock() 519 defer c.lock.Unlock() 520 521 if !c.isTracked(id) { 522 return nil 523 } 524 525 if err := c.heap.Remove(id); err != nil { 526 return fmt.Errorf("failed to remove heap entry: %v", err) 527 } 528 529 // Signal an update to the renewal loop. 530 if c.running { 531 select { 532 case c.updateCh <- struct{}{}: 533 default: 534 } 535 } 536 537 return nil 538 } 539 540 // nextRenewal returns the root element of the min-heap, which represents the 541 // next element to be renewed and the time at which the renewal needs to be 542 // triggered. 543 func (c *vaultClient) nextRenewal() (*vaultClientRenewalRequest, time.Time) { 544 c.lock.RLock() 545 defer c.lock.RUnlock() 546 547 if c.heap.Length() == 0 { 548 return nil, time.Time{} 549 } 550 551 // Fetches the root element in the min-heap 552 nextEntry := c.heap.Peek() 553 if nextEntry == nil { 554 return nil, time.Time{} 555 } 556 557 return nextEntry.req, nextEntry.next 558 } 559 560 // Additional helper functions on top of interface methods 561 562 // Length returns the number of elements in the heap 563 func (h *vaultClientHeap) Length() int { 564 return len(h.heap) 565 } 566 567 // Returns the root node of the min-heap 568 func (h *vaultClientHeap) Peek() *vaultClientHeapEntry { 569 if len(h.heap) == 0 { 570 return nil 571 } 572 573 return h.heap[0] 574 } 575 576 // Push adds the secondary index and inserts an item into the heap 577 func (h *vaultClientHeap) Push(req *vaultClientRenewalRequest, next time.Time) error { 578 if req == nil { 579 return fmt.Errorf("nil request") 580 } 581 582 if _, ok := h.heapMap[req.id]; ok { 583 return fmt.Errorf("entry %v already exists", req.id) 584 } 585 586 heapEntry := &vaultClientHeapEntry{ 587 req: req, 588 next: next, 589 } 590 h.heapMap[req.id] = heapEntry 591 heap.Push(&h.heap, heapEntry) 592 return nil 593 } 594 595 // Update will modify the existing item in the heap with the new data and the 596 // time, and fixes the heap. 597 func (h *vaultClientHeap) Update(req *vaultClientRenewalRequest, next time.Time) error { 598 if entry, ok := h.heapMap[req.id]; ok { 599 entry.req = req 600 entry.next = next 601 heap.Fix(&h.heap, entry.index) 602 return nil 603 } 604 605 return fmt.Errorf("heap doesn't contain %v", req.id) 606 } 607 608 // Remove will remove an identifier from the secondary index and deletes the 609 // corresponding node from the heap. 610 func (h *vaultClientHeap) Remove(id string) error { 611 if entry, ok := h.heapMap[id]; ok { 612 heap.Remove(&h.heap, entry.index) 613 delete(h.heapMap, id) 614 return nil 615 } 616 617 return fmt.Errorf("heap doesn't contain entry for %v", id) 618 } 619 620 // The heap interface requires the following methods to be implemented. 621 // * Push(x interface{}) // add x as element Len() 622 // * Pop() interface{} // remove and return element Len() - 1. 623 // * sort.Interface 624 // 625 // sort.Interface comprises of the following methods: 626 // * Len() int 627 // * Less(i, j int) bool 628 // * Swap(i, j int) 629 630 // Part of sort.Interface 631 func (h vaultDataHeapImp) Len() int { return len(h) } 632 633 // Part of sort.Interface 634 func (h vaultDataHeapImp) Less(i, j int) bool { 635 // Two zero times should return false. 636 // Otherwise, zero is "greater" than any other time. 637 // (To sort it at the end of the list.) 638 // Sort such that zero times are at the end of the list. 639 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 640 if iZero && jZero { 641 return false 642 } else if iZero { 643 return false 644 } else if jZero { 645 return true 646 } 647 648 return h[i].next.Before(h[j].next) 649 } 650 651 // Part of sort.Interface 652 func (h vaultDataHeapImp) Swap(i, j int) { 653 h[i], h[j] = h[j], h[i] 654 h[i].index = i 655 h[j].index = j 656 } 657 658 // Part of heap.Interface 659 func (h *vaultDataHeapImp) Push(x interface{}) { 660 n := len(*h) 661 entry := x.(*vaultClientHeapEntry) 662 entry.index = n 663 *h = append(*h, entry) 664 } 665 666 // Part of heap.Interface 667 func (h *vaultDataHeapImp) Pop() interface{} { 668 old := *h 669 n := len(old) 670 entry := old[n-1] 671 entry.index = -1 // for safety 672 *h = old[0 : n-1] 673 return entry 674 } 675 676 // randIntn is the function in math/rand needed by renewalTime. A type is used 677 // to ease deterministic testing. 678 type randIntn func(int) int 679 680 // renewalTime returns when a token should be renewed given its leaseDuration 681 // and a randomizer to provide jitter. 682 // 683 // Leases < 1m will be not jitter. 684 func renewalTime(dice randIntn, leaseDuration int) time.Duration { 685 // Start trying to renew at half the lease duration to allow ample time 686 // for latency and retries. 687 renew := leaseDuration / 2 688 689 // Don't bother about introducing randomness if the 690 // leaseDuration is too small. 691 const cutoff = 30 692 if renew < cutoff { 693 return time.Duration(renew) * time.Second 694 } 695 696 // jitter is the amount +/- to vary the renewal time 697 const jitter = 10 698 min := renew - jitter 699 renew = min + dice(jitter*2) 700 701 return time.Duration(renew) * time.Second 702 }