github.com/jmitchell/nomad@v0.1.3-0.20151007230021-7ab84c2862d8/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "sync" 8 9 "github.com/hashicorp/go-memdb" 10 "github.com/hashicorp/nomad/nomad/structs" 11 ) 12 13 // The StateStore is responsible for maintaining all the Nomad 14 // state. It is manipulated by the FSM which maintains consistency 15 // through the use of Raft. The goals of the StateStore are to provide 16 // high concurrency for read operations without blocking writes, and 17 // to provide write availability in the face of reads. EVERY object 18 // returned as a result of a read against the state store should be 19 // considered a constant and NEVER modified in place. 20 type StateStore struct { 21 logger *log.Logger 22 db *memdb.MemDB 23 watch *stateWatch 24 } 25 26 // StateSnapshot is used to provide a point-in-time snapshot 27 type StateSnapshot struct { 28 StateStore 29 } 30 31 // StateRestore is used to optimize the performance when 32 // restoring state by only using a single large transaction 33 // instead of thousands of sub transactions 34 type StateRestore struct { 35 txn *memdb.Txn 36 watch *stateWatch 37 allocNodes map[string]struct{} 38 } 39 40 // Abort is used to abort the restore operation 41 func (s *StateRestore) Abort() { 42 s.txn.Abort() 43 } 44 45 // Commit is used to commit the restore operation 46 func (s *StateRestore) Commit() { 47 s.txn.Defer(func() { s.watch.notifyAllocs(s.allocNodes) }) 48 s.txn.Commit() 49 } 50 51 // IndexEntry is used with the "index" table 52 // for managing the latest Raft index affecting a table. 53 type IndexEntry struct { 54 Key string 55 Value uint64 56 } 57 58 // stateWatch holds shared state for watching updates. This is 59 // outside of StateStore so it can be shared with snapshots. 60 type stateWatch struct { 61 allocs map[string]*NotifyGroup 62 allocLock sync.Mutex 63 } 64 65 // NewStateStore is used to create a new state store 66 func NewStateStore(logOutput io.Writer) (*StateStore, error) { 67 // Create the MemDB 68 db, err := memdb.NewMemDB(stateStoreSchema()) 69 if err != nil { 70 return nil, fmt.Errorf("state store setup failed: %v", err) 71 } 72 73 // Create the watch entry 74 watch := &stateWatch{ 75 allocs: make(map[string]*NotifyGroup), 76 } 77 78 // Create the state store 79 s := &StateStore{ 80 logger: log.New(logOutput, "", log.LstdFlags), 81 db: db, 82 watch: watch, 83 } 84 return s, nil 85 } 86 87 // Snapshot is used to create a point in time snapshot. Because 88 // we use MemDB, we just need to snapshot the state of the underlying 89 // database. 90 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 91 snap := &StateSnapshot{ 92 StateStore: StateStore{ 93 logger: s.logger, 94 db: s.db.Snapshot(), 95 watch: s.watch, 96 }, 97 } 98 return snap, nil 99 } 100 101 // Restore is used to optimize the efficiency of rebuilding 102 // state by minimizing the number of transactions and checking 103 // overhead. 104 func (s *StateStore) Restore() (*StateRestore, error) { 105 txn := s.db.Txn(true) 106 r := &StateRestore{ 107 txn: txn, 108 watch: s.watch, 109 allocNodes: make(map[string]struct{}), 110 } 111 return r, nil 112 } 113 114 // WatchAllocs is used to subscribe a channel to changes in allocations for a node 115 func (s *StateStore) WatchAllocs(node string, notify chan struct{}) { 116 s.watch.allocLock.Lock() 117 defer s.watch.allocLock.Unlock() 118 119 // Check for an existing notify group 120 if grp, ok := s.watch.allocs[node]; ok { 121 grp.Wait(notify) 122 return 123 } 124 125 // Create new notify group 126 grp := &NotifyGroup{} 127 grp.Wait(notify) 128 s.watch.allocs[node] = grp 129 } 130 131 // StopWatchAllocs is used to unsubscribe a channel from changes in allocations 132 func (s *StateStore) StopWatchAllocs(node string, notify chan struct{}) { 133 s.watch.allocLock.Lock() 134 defer s.watch.allocLock.Unlock() 135 136 // Check for an existing notify group 137 if grp, ok := s.watch.allocs[node]; ok { 138 grp.Clear(notify) 139 if grp.Empty() { 140 delete(s.watch.allocs, node) 141 } 142 } 143 } 144 145 // notifyAllocs is used to notify any node alloc listeners of a change 146 func (w *stateWatch) notifyAllocs(nodes map[string]struct{}) { 147 w.allocLock.Lock() 148 defer w.allocLock.Unlock() 149 150 for node := range nodes { 151 if grp, ok := w.allocs[node]; ok { 152 grp.Notify() 153 delete(w.allocs, node) 154 } 155 } 156 } 157 158 // UpsertNode is used to register a node or update a node definition 159 // This is assumed to be triggered by the client, so we retain the value 160 // of drain which is set by the scheduler. 161 func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { 162 txn := s.db.Txn(true) 163 defer txn.Abort() 164 165 // Check if the node already exists 166 existing, err := txn.First("nodes", "id", node.ID) 167 if err != nil { 168 return fmt.Errorf("node lookup failed: %v", err) 169 } 170 171 // Setup the indexes correctly 172 if existing != nil { 173 exist := existing.(*structs.Node) 174 node.CreateIndex = exist.CreateIndex 175 node.ModifyIndex = index 176 node.Drain = exist.Drain // Retain the drain mode 177 } else { 178 node.CreateIndex = index 179 node.ModifyIndex = index 180 } 181 182 // Insert the node 183 if err := txn.Insert("nodes", node); err != nil { 184 return fmt.Errorf("node insert failed: %v", err) 185 } 186 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 187 return fmt.Errorf("index update failed: %v", err) 188 } 189 190 txn.Commit() 191 return nil 192 } 193 194 // DeleteNode is used to deregister a node 195 func (s *StateStore) DeleteNode(index uint64, nodeID string) error { 196 txn := s.db.Txn(true) 197 defer txn.Abort() 198 199 // Lookup the node 200 existing, err := txn.First("nodes", "id", nodeID) 201 if err != nil { 202 return fmt.Errorf("node lookup failed: %v", err) 203 } 204 if existing == nil { 205 return fmt.Errorf("node not found") 206 } 207 208 // Delete the node 209 if err := txn.Delete("nodes", existing); err != nil { 210 return fmt.Errorf("node delete failed: %v", err) 211 } 212 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 213 return fmt.Errorf("index update failed: %v", err) 214 } 215 216 txn.Commit() 217 return nil 218 } 219 220 // UpdateNodeStatus is used to update the status of a node 221 func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error { 222 txn := s.db.Txn(true) 223 defer txn.Abort() 224 225 // Lookup the node 226 existing, err := txn.First("nodes", "id", nodeID) 227 if err != nil { 228 return fmt.Errorf("node lookup failed: %v", err) 229 } 230 if existing == nil { 231 return fmt.Errorf("node not found") 232 } 233 234 // Copy the existing node 235 existingNode := existing.(*structs.Node) 236 copyNode := new(structs.Node) 237 *copyNode = *existingNode 238 239 // Update the status in the copy 240 copyNode.Status = status 241 copyNode.ModifyIndex = index 242 243 // Insert the node 244 if err := txn.Insert("nodes", copyNode); err != nil { 245 return fmt.Errorf("node update failed: %v", err) 246 } 247 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 248 return fmt.Errorf("index update failed: %v", err) 249 } 250 251 txn.Commit() 252 return nil 253 } 254 255 // UpdateNodeDrain is used to update the drain of a node 256 func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error { 257 txn := s.db.Txn(true) 258 defer txn.Abort() 259 260 // Lookup the node 261 existing, err := txn.First("nodes", "id", nodeID) 262 if err != nil { 263 return fmt.Errorf("node lookup failed: %v", err) 264 } 265 if existing == nil { 266 return fmt.Errorf("node not found") 267 } 268 269 // Copy the existing node 270 existingNode := existing.(*structs.Node) 271 copyNode := new(structs.Node) 272 *copyNode = *existingNode 273 274 // Update the drain in the copy 275 copyNode.Drain = drain 276 copyNode.ModifyIndex = index 277 278 // Insert the node 279 if err := txn.Insert("nodes", copyNode); err != nil { 280 return fmt.Errorf("node update failed: %v", err) 281 } 282 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 283 return fmt.Errorf("index update failed: %v", err) 284 } 285 286 txn.Commit() 287 return nil 288 } 289 290 // NodeByID is used to lookup a node by ID 291 func (s *StateStore) NodeByID(nodeID string) (*structs.Node, error) { 292 txn := s.db.Txn(false) 293 294 existing, err := txn.First("nodes", "id", nodeID) 295 if err != nil { 296 return nil, fmt.Errorf("node lookup failed: %v", err) 297 } 298 299 if existing != nil { 300 return existing.(*structs.Node), nil 301 } 302 return nil, nil 303 } 304 305 // Nodes returns an iterator over all the nodes 306 func (s *StateStore) Nodes() (memdb.ResultIterator, error) { 307 txn := s.db.Txn(false) 308 309 // Walk the entire nodes table 310 iter, err := txn.Get("nodes", "id") 311 if err != nil { 312 return nil, err 313 } 314 return iter, nil 315 } 316 317 // UpsertJob is used to register a job or update a job definition 318 func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { 319 txn := s.db.Txn(true) 320 defer txn.Abort() 321 322 // Check if the job already exists 323 existing, err := txn.First("jobs", "id", job.ID) 324 if err != nil { 325 return fmt.Errorf("job lookup failed: %v", err) 326 } 327 328 // Setup the indexes correctly 329 if existing != nil { 330 job.CreateIndex = existing.(*structs.Job).CreateIndex 331 job.ModifyIndex = index 332 } else { 333 job.CreateIndex = index 334 job.ModifyIndex = index 335 } 336 337 // Insert the job 338 if err := txn.Insert("jobs", job); err != nil { 339 return fmt.Errorf("job insert failed: %v", err) 340 } 341 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 342 return fmt.Errorf("index update failed: %v", err) 343 } 344 345 txn.Commit() 346 return nil 347 } 348 349 // DeleteJob is used to deregister a job 350 func (s *StateStore) DeleteJob(index uint64, jobID string) error { 351 txn := s.db.Txn(true) 352 defer txn.Abort() 353 354 // Lookup the node 355 existing, err := txn.First("jobs", "id", jobID) 356 if err != nil { 357 return fmt.Errorf("job lookup failed: %v", err) 358 } 359 if existing == nil { 360 return fmt.Errorf("job not found") 361 } 362 363 // Delete the node 364 if err := txn.Delete("jobs", existing); err != nil { 365 return fmt.Errorf("job delete failed: %v", err) 366 } 367 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 368 return fmt.Errorf("index update failed: %v", err) 369 } 370 371 txn.Commit() 372 return nil 373 } 374 375 // JobByID is used to lookup a job by its ID 376 func (s *StateStore) JobByID(id string) (*structs.Job, error) { 377 txn := s.db.Txn(false) 378 379 existing, err := txn.First("jobs", "id", id) 380 if err != nil { 381 return nil, fmt.Errorf("job lookup failed: %v", err) 382 } 383 384 if existing != nil { 385 return existing.(*structs.Job), nil 386 } 387 return nil, nil 388 } 389 390 // Jobs returns an iterator over all the jobs 391 func (s *StateStore) Jobs() (memdb.ResultIterator, error) { 392 txn := s.db.Txn(false) 393 394 // Walk the entire jobs table 395 iter, err := txn.Get("jobs", "id") 396 if err != nil { 397 return nil, err 398 } 399 return iter, nil 400 } 401 402 // UpsertEvaluation is used to upsert an evaluation 403 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { 404 txn := s.db.Txn(true) 405 defer txn.Abort() 406 407 // Do a nested upsert 408 for _, eval := range evals { 409 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 410 return err 411 } 412 } 413 414 txn.Commit() 415 return nil 416 } 417 418 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 419 func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { 420 // Lookup the evaluation 421 existing, err := txn.First("evals", "id", eval.ID) 422 if err != nil { 423 return fmt.Errorf("eval lookup failed: %v", err) 424 } 425 426 // Update the indexes 427 if existing != nil { 428 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 429 eval.ModifyIndex = index 430 } else { 431 eval.CreateIndex = index 432 eval.ModifyIndex = index 433 } 434 435 // Insert the eval 436 if err := txn.Insert("evals", eval); err != nil { 437 return fmt.Errorf("eval insert failed: %v", err) 438 } 439 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 440 return fmt.Errorf("index update failed: %v", err) 441 } 442 return nil 443 } 444 445 // DeleteEval is used to delete an evaluation 446 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 447 txn := s.db.Txn(true) 448 defer txn.Abort() 449 nodes := make(map[string]struct{}) 450 451 for _, eval := range evals { 452 existing, err := txn.First("evals", "id", eval) 453 if err != nil { 454 return fmt.Errorf("eval lookup failed: %v", err) 455 } 456 if existing == nil { 457 continue 458 } 459 if err := txn.Delete("evals", existing); err != nil { 460 return fmt.Errorf("eval delete failed: %v", err) 461 } 462 } 463 464 for _, alloc := range allocs { 465 existing, err := txn.First("allocs", "id", alloc) 466 if err != nil { 467 return fmt.Errorf("alloc lookup failed: %v", err) 468 } 469 if existing == nil { 470 continue 471 } 472 nodes[existing.(*structs.Allocation).NodeID] = struct{}{} 473 if err := txn.Delete("allocs", existing); err != nil { 474 return fmt.Errorf("alloc delete failed: %v", err) 475 } 476 } 477 478 // Update the indexes 479 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 480 return fmt.Errorf("index update failed: %v", err) 481 } 482 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 483 return fmt.Errorf("index update failed: %v", err) 484 } 485 txn.Defer(func() { s.watch.notifyAllocs(nodes) }) 486 txn.Commit() 487 return nil 488 } 489 490 // EvalByID is used to lookup an eval by its ID 491 func (s *StateStore) EvalByID(id string) (*structs.Evaluation, error) { 492 txn := s.db.Txn(false) 493 494 existing, err := txn.First("evals", "id", id) 495 if err != nil { 496 return nil, fmt.Errorf("eval lookup failed: %v", err) 497 } 498 499 if existing != nil { 500 return existing.(*structs.Evaluation), nil 501 } 502 return nil, nil 503 } 504 505 // EvalsByJob returns all the evaluations by job id 506 func (s *StateStore) EvalsByJob(jobID string) ([]*structs.Evaluation, error) { 507 txn := s.db.Txn(false) 508 509 // Get an iterator over the node allocations 510 iter, err := txn.Get("evals", "job", jobID) 511 if err != nil { 512 return nil, err 513 } 514 515 var out []*structs.Evaluation 516 for { 517 raw := iter.Next() 518 if raw == nil { 519 break 520 } 521 out = append(out, raw.(*structs.Evaluation)) 522 } 523 return out, nil 524 } 525 526 // Evals returns an iterator over all the evaluations 527 func (s *StateStore) Evals() (memdb.ResultIterator, error) { 528 txn := s.db.Txn(false) 529 530 // Walk the entire table 531 iter, err := txn.Get("evals", "id") 532 if err != nil { 533 return nil, err 534 } 535 return iter, nil 536 } 537 538 // UpdateAllocFromClient is used to update an allocation based on input 539 // from a client. While the schedulers are the authority on the allocation for 540 // most things, some updates are authoritative from the client. Specifically, 541 // the desired state comes from the schedulers, while the actual state comes 542 // from clients. 543 func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocation) error { 544 txn := s.db.Txn(true) 545 defer txn.Abort() 546 547 // Look for existing alloc 548 existing, err := txn.First("allocs", "id", alloc.ID) 549 if err != nil { 550 return fmt.Errorf("alloc lookup failed: %v", err) 551 } 552 553 // Nothing to do if this does not exist 554 if existing == nil { 555 return nil 556 } 557 exist := existing.(*structs.Allocation) 558 559 // Copy everything from the existing allocation 560 copyAlloc := new(structs.Allocation) 561 *copyAlloc = *exist 562 563 // Pull in anything the client is the authority on 564 copyAlloc.ClientStatus = alloc.ClientStatus 565 copyAlloc.ClientDescription = alloc.ClientDescription 566 567 // Update the modify index 568 copyAlloc.ModifyIndex = index 569 570 // Update the allocation 571 if err := txn.Insert("allocs", copyAlloc); err != nil { 572 return fmt.Errorf("alloc insert failed: %v", err) 573 } 574 575 // Update the indexes 576 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 577 return fmt.Errorf("index update failed: %v", err) 578 } 579 580 nodes := map[string]struct{}{alloc.NodeID: struct{}{}} 581 txn.Defer(func() { s.watch.notifyAllocs(nodes) }) 582 txn.Commit() 583 return nil 584 } 585 586 // UpsertAllocs is used to evict a set of allocations 587 // and allocate new ones at the same time. 588 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { 589 txn := s.db.Txn(true) 590 defer txn.Abort() 591 nodes := make(map[string]struct{}) 592 593 // Handle the allocations 594 for _, alloc := range allocs { 595 existing, err := txn.First("allocs", "id", alloc.ID) 596 if err != nil { 597 return fmt.Errorf("alloc lookup failed: %v", err) 598 } 599 600 if existing == nil { 601 alloc.CreateIndex = index 602 alloc.ModifyIndex = index 603 } else { 604 exist := existing.(*structs.Allocation) 605 alloc.CreateIndex = exist.CreateIndex 606 alloc.ModifyIndex = index 607 alloc.ClientStatus = exist.ClientStatus 608 alloc.ClientDescription = exist.ClientDescription 609 } 610 nodes[alloc.NodeID] = struct{}{} 611 if err := txn.Insert("allocs", alloc); err != nil { 612 return fmt.Errorf("alloc insert failed: %v", err) 613 } 614 } 615 616 // Update the indexes 617 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 618 return fmt.Errorf("index update failed: %v", err) 619 } 620 621 txn.Defer(func() { s.watch.notifyAllocs(nodes) }) 622 txn.Commit() 623 return nil 624 } 625 626 // AllocByID is used to lookup an allocation by its ID 627 func (s *StateStore) AllocByID(id string) (*structs.Allocation, error) { 628 txn := s.db.Txn(false) 629 630 existing, err := txn.First("allocs", "id", id) 631 if err != nil { 632 return nil, fmt.Errorf("alloc lookup failed: %v", err) 633 } 634 635 if existing != nil { 636 return existing.(*structs.Allocation), nil 637 } 638 return nil, nil 639 } 640 641 // AllocsByNode returns all the allocations by node 642 func (s *StateStore) AllocsByNode(node string) ([]*structs.Allocation, error) { 643 txn := s.db.Txn(false) 644 645 // Get an iterator over the node allocations 646 iter, err := txn.Get("allocs", "node", node) 647 if err != nil { 648 return nil, err 649 } 650 651 var out []*structs.Allocation 652 for { 653 raw := iter.Next() 654 if raw == nil { 655 break 656 } 657 out = append(out, raw.(*structs.Allocation)) 658 } 659 return out, nil 660 } 661 662 // AllocsByJob returns all the allocations by job id 663 func (s *StateStore) AllocsByJob(jobID string) ([]*structs.Allocation, error) { 664 txn := s.db.Txn(false) 665 666 // Get an iterator over the node allocations 667 iter, err := txn.Get("allocs", "job", jobID) 668 if err != nil { 669 return nil, err 670 } 671 672 var out []*structs.Allocation 673 for { 674 raw := iter.Next() 675 if raw == nil { 676 break 677 } 678 out = append(out, raw.(*structs.Allocation)) 679 } 680 return out, nil 681 } 682 683 // AllocsByEval returns all the allocations by eval id 684 func (s *StateStore) AllocsByEval(evalID string) ([]*structs.Allocation, error) { 685 txn := s.db.Txn(false) 686 687 // Get an iterator over the eval allocations 688 iter, err := txn.Get("allocs", "eval", evalID) 689 if err != nil { 690 return nil, err 691 } 692 693 var out []*structs.Allocation 694 for { 695 raw := iter.Next() 696 if raw == nil { 697 break 698 } 699 out = append(out, raw.(*structs.Allocation)) 700 } 701 return out, nil 702 } 703 704 // Allocs returns an iterator over all the evaluations 705 func (s *StateStore) Allocs() (memdb.ResultIterator, error) { 706 txn := s.db.Txn(false) 707 708 // Walk the entire table 709 iter, err := txn.Get("allocs", "id") 710 if err != nil { 711 return nil, err 712 } 713 return iter, nil 714 } 715 716 // Index finds the matching index value 717 func (s *StateStore) Index(name string) (uint64, error) { 718 txn := s.db.Txn(false) 719 720 // Lookup the first matching index 721 out, err := txn.First("index", "id", name) 722 if err != nil { 723 return 0, err 724 } 725 if out == nil { 726 return 0, nil 727 } 728 return out.(*IndexEntry).Value, nil 729 } 730 731 // Indexes returns an iterator over all the indexes 732 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 733 txn := s.db.Txn(false) 734 735 // Walk the entire nodes table 736 iter, err := txn.Get("index", "id") 737 if err != nil { 738 return nil, err 739 } 740 return iter, nil 741 } 742 743 // NodeRestore is used to restore a node 744 func (r *StateRestore) NodeRestore(node *structs.Node) error { 745 if err := r.txn.Insert("nodes", node); err != nil { 746 return fmt.Errorf("node insert failed: %v", err) 747 } 748 return nil 749 } 750 751 // JobRestore is used to restore a job 752 func (r *StateRestore) JobRestore(job *structs.Job) error { 753 if err := r.txn.Insert("jobs", job); err != nil { 754 return fmt.Errorf("job insert failed: %v", err) 755 } 756 return nil 757 } 758 759 // EvalRestore is used to restore an evaluation 760 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 761 if err := r.txn.Insert("evals", eval); err != nil { 762 return fmt.Errorf("eval insert failed: %v", err) 763 } 764 return nil 765 } 766 767 // AllocRestore is used to restore an allocation 768 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 769 r.allocNodes[alloc.NodeID] = struct{}{} 770 if err := r.txn.Insert("allocs", alloc); err != nil { 771 return fmt.Errorf("alloc insert failed: %v", err) 772 } 773 return nil 774 } 775 776 // IndexRestore is used to restore an index 777 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 778 if err := r.txn.Insert("index", idx); err != nil { 779 return fmt.Errorf("index insert failed: %v", err) 780 } 781 return nil 782 }