github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "sync" 8 9 "github.com/hashicorp/go-memdb" 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/hashicorp/nomad/nomad/watch" 12 ) 13 14 // IndexEntry is used with the "index" table 15 // for managing the latest Raft index affecting a table. 16 type IndexEntry struct { 17 Key string 18 Value uint64 19 } 20 21 // The StateStore is responsible for maintaining all the Nomad 22 // state. It is manipulated by the FSM which maintains consistency 23 // through the use of Raft. The goals of the StateStore are to provide 24 // high concurrency for read operations without blocking writes, and 25 // to provide write availability in the face of reads. EVERY object 26 // returned as a result of a read against the state store should be 27 // considered a constant and NEVER modified in place. 28 type StateStore struct { 29 logger *log.Logger 30 db *memdb.MemDB 31 watch *stateWatch 32 } 33 34 // NewStateStore is used to create a new state store 35 func NewStateStore(logOutput io.Writer) (*StateStore, error) { 36 // Create the MemDB 37 db, err := memdb.NewMemDB(stateStoreSchema()) 38 if err != nil { 39 return nil, fmt.Errorf("state store setup failed: %v", err) 40 } 41 42 // Create the state store 43 s := &StateStore{ 44 logger: log.New(logOutput, "", log.LstdFlags), 45 db: db, 46 watch: newStateWatch(), 47 } 48 return s, nil 49 } 50 51 // Snapshot is used to create a point in time snapshot. Because 52 // we use MemDB, we just need to snapshot the state of the underlying 53 // database. 54 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 55 snap := &StateSnapshot{ 56 StateStore: StateStore{ 57 logger: s.logger, 58 db: s.db.Snapshot(), 59 watch: s.watch, 60 }, 61 } 62 return snap, nil 63 } 64 65 // Restore is used to optimize the efficiency of rebuilding 66 // state by minimizing the number of transactions and checking 67 // overhead. 68 func (s *StateStore) Restore() (*StateRestore, error) { 69 txn := s.db.Txn(true) 70 r := &StateRestore{ 71 txn: txn, 72 watch: s.watch, 73 items: watch.NewItems(), 74 } 75 return r, nil 76 } 77 78 // Watch subscribes a channel to a set of watch items. 79 func (s *StateStore) Watch(items watch.Items, notify chan struct{}) { 80 s.watch.watch(items, notify) 81 } 82 83 // StopWatch unsubscribes a channel from a set of watch items. 84 func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) { 85 s.watch.stopWatch(items, notify) 86 } 87 88 // UpsertNode is used to register a node or update a node definition 89 // This is assumed to be triggered by the client, so we retain the value 90 // of drain which is set by the scheduler. 91 func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { 92 txn := s.db.Txn(true) 93 defer txn.Abort() 94 95 watcher := watch.NewItems() 96 watcher.Add(watch.Item{Table: "nodes"}) 97 watcher.Add(watch.Item{Node: node.ID}) 98 99 // Check if the node already exists 100 existing, err := txn.First("nodes", "id", node.ID) 101 if err != nil { 102 return fmt.Errorf("node lookup failed: %v", err) 103 } 104 105 // Setup the indexes correctly 106 if existing != nil { 107 exist := existing.(*structs.Node) 108 node.CreateIndex = exist.CreateIndex 109 node.ModifyIndex = index 110 node.Drain = exist.Drain // Retain the drain mode 111 } else { 112 node.CreateIndex = index 113 node.ModifyIndex = index 114 } 115 116 // Insert the node 117 if err := txn.Insert("nodes", node); err != nil { 118 return fmt.Errorf("node insert failed: %v", err) 119 } 120 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 121 return fmt.Errorf("index update failed: %v", err) 122 } 123 124 txn.Defer(func() { s.watch.notify(watcher) }) 125 txn.Commit() 126 return nil 127 } 128 129 // DeleteNode is used to deregister a node 130 func (s *StateStore) DeleteNode(index uint64, nodeID string) error { 131 txn := s.db.Txn(true) 132 defer txn.Abort() 133 134 // Lookup the node 135 existing, err := txn.First("nodes", "id", nodeID) 136 if err != nil { 137 return fmt.Errorf("node lookup failed: %v", err) 138 } 139 if existing == nil { 140 return fmt.Errorf("node not found") 141 } 142 143 watcher := watch.NewItems() 144 watcher.Add(watch.Item{Table: "nodes"}) 145 watcher.Add(watch.Item{Node: nodeID}) 146 147 // Delete the node 148 if err := txn.Delete("nodes", existing); err != nil { 149 return fmt.Errorf("node delete failed: %v", err) 150 } 151 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 152 return fmt.Errorf("index update failed: %v", err) 153 } 154 155 txn.Defer(func() { s.watch.notify(watcher) }) 156 txn.Commit() 157 return nil 158 } 159 160 // UpdateNodeStatus is used to update the status of a node 161 func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error { 162 txn := s.db.Txn(true) 163 defer txn.Abort() 164 165 watcher := watch.NewItems() 166 watcher.Add(watch.Item{Table: "nodes"}) 167 watcher.Add(watch.Item{Node: nodeID}) 168 169 // Lookup the node 170 existing, err := txn.First("nodes", "id", nodeID) 171 if err != nil { 172 return fmt.Errorf("node lookup failed: %v", err) 173 } 174 if existing == nil { 175 return fmt.Errorf("node not found") 176 } 177 178 // Copy the existing node 179 existingNode := existing.(*structs.Node) 180 copyNode := new(structs.Node) 181 *copyNode = *existingNode 182 183 // Update the status in the copy 184 copyNode.Status = status 185 copyNode.ModifyIndex = index 186 187 // Insert the node 188 if err := txn.Insert("nodes", copyNode); err != nil { 189 return fmt.Errorf("node update failed: %v", err) 190 } 191 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 192 return fmt.Errorf("index update failed: %v", err) 193 } 194 195 txn.Defer(func() { s.watch.notify(watcher) }) 196 txn.Commit() 197 return nil 198 } 199 200 // UpdateNodeDrain is used to update the drain of a node 201 func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error { 202 txn := s.db.Txn(true) 203 defer txn.Abort() 204 205 watcher := watch.NewItems() 206 watcher.Add(watch.Item{Table: "nodes"}) 207 watcher.Add(watch.Item{Node: nodeID}) 208 209 // Lookup the node 210 existing, err := txn.First("nodes", "id", nodeID) 211 if err != nil { 212 return fmt.Errorf("node lookup failed: %v", err) 213 } 214 if existing == nil { 215 return fmt.Errorf("node not found") 216 } 217 218 // Copy the existing node 219 existingNode := existing.(*structs.Node) 220 copyNode := new(structs.Node) 221 *copyNode = *existingNode 222 223 // Update the drain in the copy 224 copyNode.Drain = drain 225 copyNode.ModifyIndex = index 226 227 // Insert the node 228 if err := txn.Insert("nodes", copyNode); err != nil { 229 return fmt.Errorf("node update failed: %v", err) 230 } 231 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 232 return fmt.Errorf("index update failed: %v", err) 233 } 234 235 txn.Defer(func() { s.watch.notify(watcher) }) 236 txn.Commit() 237 return nil 238 } 239 240 // NodeByID is used to lookup a node by ID 241 func (s *StateStore) NodeByID(nodeID string) (*structs.Node, error) { 242 txn := s.db.Txn(false) 243 244 existing, err := txn.First("nodes", "id", nodeID) 245 if err != nil { 246 return nil, fmt.Errorf("node lookup failed: %v", err) 247 } 248 249 if existing != nil { 250 return existing.(*structs.Node), nil 251 } 252 return nil, nil 253 } 254 255 // NodesByIDPrefix is used to lookup nodes by prefix 256 func (s *StateStore) NodesByIDPrefix(nodeID string) (memdb.ResultIterator, error) { 257 txn := s.db.Txn(false) 258 259 iter, err := txn.Get("nodes", "id_prefix", nodeID) 260 if err != nil { 261 return nil, fmt.Errorf("node lookup failed: %v", err) 262 } 263 264 return iter, nil 265 } 266 267 // Nodes returns an iterator over all the nodes 268 func (s *StateStore) Nodes() (memdb.ResultIterator, error) { 269 txn := s.db.Txn(false) 270 271 // Walk the entire nodes table 272 iter, err := txn.Get("nodes", "id") 273 if err != nil { 274 return nil, err 275 } 276 return iter, nil 277 } 278 279 // UpsertJob is used to register a job or update a job definition 280 func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { 281 txn := s.db.Txn(true) 282 defer txn.Abort() 283 284 watcher := watch.NewItems() 285 watcher.Add(watch.Item{Table: "jobs"}) 286 watcher.Add(watch.Item{Job: job.ID}) 287 288 // Check if the job already exists 289 existing, err := txn.First("jobs", "id", job.ID) 290 if err != nil { 291 return fmt.Errorf("job lookup failed: %v", err) 292 } 293 294 // Setup the indexes correctly 295 if existing != nil { 296 job.CreateIndex = existing.(*structs.Job).CreateIndex 297 job.ModifyIndex = index 298 job.JobModifyIndex = index 299 300 // Compute the job status 301 var err error 302 job.Status, err = s.getJobStatus(txn, job, false) 303 if err != nil { 304 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 305 } 306 } else { 307 job.CreateIndex = index 308 job.ModifyIndex = index 309 job.JobModifyIndex = index 310 311 // If we are inserting the job for the first time, we don't need to 312 // calculate the jobs status as it is known. 313 if job.IsPeriodic() { 314 job.Status = structs.JobStatusRunning 315 } else { 316 job.Status = structs.JobStatusPending 317 } 318 } 319 320 // Insert the job 321 if err := txn.Insert("jobs", job); err != nil { 322 return fmt.Errorf("job insert failed: %v", err) 323 } 324 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 325 return fmt.Errorf("index update failed: %v", err) 326 } 327 328 txn.Defer(func() { s.watch.notify(watcher) }) 329 txn.Commit() 330 return nil 331 } 332 333 // DeleteJob is used to deregister a job 334 func (s *StateStore) DeleteJob(index uint64, jobID string) error { 335 txn := s.db.Txn(true) 336 defer txn.Abort() 337 338 // Lookup the node 339 existing, err := txn.First("jobs", "id", jobID) 340 if err != nil { 341 return fmt.Errorf("job lookup failed: %v", err) 342 } 343 if existing == nil { 344 return fmt.Errorf("job not found") 345 } 346 347 watcher := watch.NewItems() 348 watcher.Add(watch.Item{Table: "jobs"}) 349 watcher.Add(watch.Item{Job: jobID}) 350 351 // Delete the node 352 if err := txn.Delete("jobs", existing); err != nil { 353 return fmt.Errorf("job delete failed: %v", err) 354 } 355 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 356 return fmt.Errorf("index update failed: %v", err) 357 } 358 359 txn.Defer(func() { s.watch.notify(watcher) }) 360 txn.Commit() 361 return nil 362 } 363 364 // JobByID is used to lookup a job by its ID 365 func (s *StateStore) JobByID(id string) (*structs.Job, error) { 366 txn := s.db.Txn(false) 367 368 existing, err := txn.First("jobs", "id", id) 369 if err != nil { 370 return nil, fmt.Errorf("job lookup failed: %v", err) 371 } 372 373 if existing != nil { 374 return existing.(*structs.Job), nil 375 } 376 return nil, nil 377 } 378 379 // JobsByIDPrefix is used to lookup a job by prefix 380 func (s *StateStore) JobsByIDPrefix(id string) (memdb.ResultIterator, error) { 381 txn := s.db.Txn(false) 382 383 iter, err := txn.Get("jobs", "id_prefix", id) 384 if err != nil { 385 return nil, fmt.Errorf("job lookup failed: %v", err) 386 } 387 388 return iter, nil 389 } 390 391 // Jobs returns an iterator over all the jobs 392 func (s *StateStore) Jobs() (memdb.ResultIterator, error) { 393 txn := s.db.Txn(false) 394 395 // Walk the entire jobs table 396 iter, err := txn.Get("jobs", "id") 397 if err != nil { 398 return nil, err 399 } 400 return iter, nil 401 } 402 403 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 404 func (s *StateStore) JobsByPeriodic(periodic bool) (memdb.ResultIterator, error) { 405 txn := s.db.Txn(false) 406 407 iter, err := txn.Get("jobs", "periodic", periodic) 408 if err != nil { 409 return nil, err 410 } 411 return iter, nil 412 } 413 414 // JobsByScheduler returns an iterator over all the jobs with the specific 415 // scheduler type. 416 func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator, error) { 417 txn := s.db.Txn(false) 418 419 // Return an iterator for jobs with the specific type. 420 iter, err := txn.Get("jobs", "type", schedulerType) 421 if err != nil { 422 return nil, err 423 } 424 return iter, nil 425 } 426 427 // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage 428 // collection. 429 func (s *StateStore) JobsByGC(gc bool) (memdb.ResultIterator, error) { 430 txn := s.db.Txn(false) 431 432 iter, err := txn.Get("jobs", "gc", gc) 433 if err != nil { 434 return nil, err 435 } 436 return iter, nil 437 } 438 439 // UpsertPeriodicLaunch is used to register a launch or update it. 440 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 441 txn := s.db.Txn(true) 442 defer txn.Abort() 443 444 watcher := watch.NewItems() 445 watcher.Add(watch.Item{Table: "periodic_launch"}) 446 watcher.Add(watch.Item{Job: launch.ID}) 447 448 // Check if the job already exists 449 existing, err := txn.First("periodic_launch", "id", launch.ID) 450 if err != nil { 451 return fmt.Errorf("periodic launch lookup failed: %v", err) 452 } 453 454 // Setup the indexes correctly 455 if existing != nil { 456 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 457 launch.ModifyIndex = index 458 } else { 459 launch.CreateIndex = index 460 launch.ModifyIndex = index 461 } 462 463 // Insert the job 464 if err := txn.Insert("periodic_launch", launch); err != nil { 465 return fmt.Errorf("launch insert failed: %v", err) 466 } 467 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 468 return fmt.Errorf("index update failed: %v", err) 469 } 470 471 txn.Defer(func() { s.watch.notify(watcher) }) 472 txn.Commit() 473 return nil 474 } 475 476 // DeletePeriodicLaunch is used to delete the periodic launch 477 func (s *StateStore) DeletePeriodicLaunch(index uint64, jobID string) error { 478 txn := s.db.Txn(true) 479 defer txn.Abort() 480 481 // Lookup the launch 482 existing, err := txn.First("periodic_launch", "id", jobID) 483 if err != nil { 484 return fmt.Errorf("launch lookup failed: %v", err) 485 } 486 if existing == nil { 487 return fmt.Errorf("launch not found") 488 } 489 490 watcher := watch.NewItems() 491 watcher.Add(watch.Item{Table: "periodic_launch"}) 492 watcher.Add(watch.Item{Job: jobID}) 493 494 // Delete the launch 495 if err := txn.Delete("periodic_launch", existing); err != nil { 496 return fmt.Errorf("launch delete failed: %v", err) 497 } 498 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 499 return fmt.Errorf("index update failed: %v", err) 500 } 501 502 txn.Defer(func() { s.watch.notify(watcher) }) 503 txn.Commit() 504 return nil 505 } 506 507 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 508 // ID. 509 func (s *StateStore) PeriodicLaunchByID(id string) (*structs.PeriodicLaunch, error) { 510 txn := s.db.Txn(false) 511 512 existing, err := txn.First("periodic_launch", "id", id) 513 if err != nil { 514 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 515 } 516 517 if existing != nil { 518 return existing.(*structs.PeriodicLaunch), nil 519 } 520 return nil, nil 521 } 522 523 // PeriodicLaunches returns an iterator over all the periodic launches 524 func (s *StateStore) PeriodicLaunches() (memdb.ResultIterator, error) { 525 txn := s.db.Txn(false) 526 527 // Walk the entire table 528 iter, err := txn.Get("periodic_launch", "id") 529 if err != nil { 530 return nil, err 531 } 532 return iter, nil 533 } 534 535 // UpsertEvaluation is used to upsert an evaluation 536 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { 537 txn := s.db.Txn(true) 538 defer txn.Abort() 539 540 watcher := watch.NewItems() 541 watcher.Add(watch.Item{Table: "evals"}) 542 543 // Do a nested upsert 544 jobs := make(map[string]string, len(evals)) 545 for _, eval := range evals { 546 watcher.Add(watch.Item{Eval: eval.ID}) 547 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 548 return err 549 } 550 551 jobs[eval.JobID] = "" 552 } 553 554 // Set the job's status 555 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 556 return fmt.Errorf("setting job status failed: %v", err) 557 } 558 559 txn.Defer(func() { s.watch.notify(watcher) }) 560 txn.Commit() 561 return nil 562 } 563 564 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 565 func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { 566 // Lookup the evaluation 567 existing, err := txn.First("evals", "id", eval.ID) 568 if err != nil { 569 return fmt.Errorf("eval lookup failed: %v", err) 570 } 571 572 // Update the indexes 573 if existing != nil { 574 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 575 eval.ModifyIndex = index 576 } else { 577 eval.CreateIndex = index 578 eval.ModifyIndex = index 579 } 580 581 // Insert the eval 582 if err := txn.Insert("evals", eval); err != nil { 583 return fmt.Errorf("eval insert failed: %v", err) 584 } 585 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 586 return fmt.Errorf("index update failed: %v", err) 587 } 588 return nil 589 } 590 591 // DeleteEval is used to delete an evaluation 592 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 593 txn := s.db.Txn(true) 594 defer txn.Abort() 595 watcher := watch.NewItems() 596 watcher.Add(watch.Item{Table: "evals"}) 597 watcher.Add(watch.Item{Table: "allocs"}) 598 599 jobs := make(map[string]string, len(evals)) 600 for _, eval := range evals { 601 existing, err := txn.First("evals", "id", eval) 602 if err != nil { 603 return fmt.Errorf("eval lookup failed: %v", err) 604 } 605 if existing == nil { 606 continue 607 } 608 if err := txn.Delete("evals", existing); err != nil { 609 return fmt.Errorf("eval delete failed: %v", err) 610 } 611 watcher.Add(watch.Item{Eval: eval}) 612 jobs[existing.(*structs.Evaluation).JobID] = "" 613 } 614 615 for _, alloc := range allocs { 616 existing, err := txn.First("allocs", "id", alloc) 617 if err != nil { 618 return fmt.Errorf("alloc lookup failed: %v", err) 619 } 620 if existing == nil { 621 continue 622 } 623 if err := txn.Delete("allocs", existing); err != nil { 624 return fmt.Errorf("alloc delete failed: %v", err) 625 } 626 realAlloc := existing.(*structs.Allocation) 627 watcher.Add(watch.Item{Alloc: realAlloc.ID}) 628 watcher.Add(watch.Item{AllocEval: realAlloc.EvalID}) 629 watcher.Add(watch.Item{AllocJob: realAlloc.JobID}) 630 watcher.Add(watch.Item{AllocNode: realAlloc.NodeID}) 631 } 632 633 // Update the indexes 634 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 635 return fmt.Errorf("index update failed: %v", err) 636 } 637 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 638 return fmt.Errorf("index update failed: %v", err) 639 } 640 641 // Set the job's status 642 if err := s.setJobStatuses(index, watcher, txn, jobs, true); err != nil { 643 return fmt.Errorf("setting job status failed: %v", err) 644 } 645 646 txn.Defer(func() { s.watch.notify(watcher) }) 647 txn.Commit() 648 return nil 649 } 650 651 // EvalByID is used to lookup an eval by its ID 652 func (s *StateStore) EvalByID(id string) (*structs.Evaluation, error) { 653 txn := s.db.Txn(false) 654 655 existing, err := txn.First("evals", "id", id) 656 if err != nil { 657 return nil, fmt.Errorf("eval lookup failed: %v", err) 658 } 659 660 if existing != nil { 661 return existing.(*structs.Evaluation), nil 662 } 663 return nil, nil 664 } 665 666 // EvalsByIDPrefix is used to lookup evaluations by prefix 667 func (s *StateStore) EvalsByIDPrefix(id string) (memdb.ResultIterator, error) { 668 txn := s.db.Txn(false) 669 670 iter, err := txn.Get("evals", "id_prefix", id) 671 if err != nil { 672 return nil, fmt.Errorf("eval lookup failed: %v", err) 673 } 674 675 return iter, nil 676 } 677 678 // EvalsByJob returns all the evaluations by job id 679 func (s *StateStore) EvalsByJob(jobID string) ([]*structs.Evaluation, error) { 680 txn := s.db.Txn(false) 681 682 // Get an iterator over the node allocations 683 iter, err := txn.Get("evals", "job", jobID) 684 if err != nil { 685 return nil, err 686 } 687 688 var out []*structs.Evaluation 689 for { 690 raw := iter.Next() 691 if raw == nil { 692 break 693 } 694 out = append(out, raw.(*structs.Evaluation)) 695 } 696 return out, nil 697 } 698 699 // Evals returns an iterator over all the evaluations 700 func (s *StateStore) Evals() (memdb.ResultIterator, error) { 701 txn := s.db.Txn(false) 702 703 // Walk the entire table 704 iter, err := txn.Get("evals", "id") 705 if err != nil { 706 return nil, err 707 } 708 return iter, nil 709 } 710 711 // UpdateAllocFromClient is used to update an allocation based on input 712 // from a client. While the schedulers are the authority on the allocation for 713 // most things, some updates are authoritative from the client. Specifically, 714 // the desired state comes from the schedulers, while the actual state comes 715 // from clients. 716 func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error { 717 txn := s.db.Txn(true) 718 defer txn.Abort() 719 720 // Setup the watcher 721 watcher := watch.NewItems() 722 watcher.Add(watch.Item{Table: "allocs"}) 723 724 // Handle each of the updated allocations 725 for _, alloc := range allocs { 726 if err := s.nestedUpdateAllocFromClient(txn, watcher, index, alloc); err != nil { 727 return err 728 } 729 } 730 731 // Update the indexes 732 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 733 return fmt.Errorf("index update failed: %v", err) 734 } 735 736 txn.Defer(func() { s.watch.notify(watcher) }) 737 txn.Commit() 738 return nil 739 } 740 741 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 742 func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, watcher watch.Items, index uint64, alloc *structs.Allocation) error { 743 // Look for existing alloc 744 existing, err := txn.First("allocs", "id", alloc.ID) 745 if err != nil { 746 return fmt.Errorf("alloc lookup failed: %v", err) 747 } 748 749 // Nothing to do if this does not exist 750 if existing == nil { 751 return nil 752 } 753 exist := existing.(*structs.Allocation) 754 755 // Trigger the watcher 756 watcher.Add(watch.Item{Alloc: alloc.ID}) 757 watcher.Add(watch.Item{AllocEval: exist.EvalID}) 758 watcher.Add(watch.Item{AllocJob: exist.JobID}) 759 watcher.Add(watch.Item{AllocNode: exist.NodeID}) 760 761 // Copy everything from the existing allocation 762 copyAlloc := new(structs.Allocation) 763 *copyAlloc = *exist 764 765 // Pull in anything the client is the authority on 766 copyAlloc.ClientStatus = alloc.ClientStatus 767 copyAlloc.ClientDescription = alloc.ClientDescription 768 copyAlloc.TaskStates = alloc.TaskStates 769 770 // Update the modify index 771 copyAlloc.ModifyIndex = index 772 773 // Update the allocation 774 if err := txn.Insert("allocs", copyAlloc); err != nil { 775 return fmt.Errorf("alloc insert failed: %v", err) 776 } 777 778 // Set the job's status 779 forceStatus := "" 780 if !copyAlloc.TerminalStatus() { 781 forceStatus = structs.JobStatusRunning 782 } 783 jobs := map[string]string{exist.JobID: forceStatus} 784 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 785 return fmt.Errorf("setting job status failed: %v", err) 786 } 787 return nil 788 } 789 790 // UpsertAllocs is used to evict a set of allocations 791 // and allocate new ones at the same time. 792 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { 793 txn := s.db.Txn(true) 794 defer txn.Abort() 795 796 watcher := watch.NewItems() 797 watcher.Add(watch.Item{Table: "allocs"}) 798 799 // Handle the allocations 800 jobs := make(map[string]string, 1) 801 for _, alloc := range allocs { 802 existing, err := txn.First("allocs", "id", alloc.ID) 803 if err != nil { 804 return fmt.Errorf("alloc lookup failed: %v", err) 805 } 806 807 if existing == nil { 808 alloc.CreateIndex = index 809 alloc.ModifyIndex = index 810 alloc.AllocModifyIndex = index 811 } else { 812 exist := existing.(*structs.Allocation) 813 alloc.CreateIndex = exist.CreateIndex 814 alloc.ModifyIndex = index 815 alloc.AllocModifyIndex = index 816 alloc.ClientStatus = exist.ClientStatus 817 alloc.ClientDescription = exist.ClientDescription 818 } 819 if err := txn.Insert("allocs", alloc); err != nil { 820 return fmt.Errorf("alloc insert failed: %v", err) 821 } 822 823 // If the allocation is running, force the job to running status. 824 forceStatus := "" 825 if !alloc.TerminalStatus() { 826 forceStatus = structs.JobStatusRunning 827 } 828 jobs[alloc.JobID] = forceStatus 829 830 watcher.Add(watch.Item{Alloc: alloc.ID}) 831 watcher.Add(watch.Item{AllocEval: alloc.EvalID}) 832 watcher.Add(watch.Item{AllocJob: alloc.JobID}) 833 watcher.Add(watch.Item{AllocNode: alloc.NodeID}) 834 } 835 836 // Update the indexes 837 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 838 return fmt.Errorf("index update failed: %v", err) 839 } 840 841 // Set the job's status 842 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 843 return fmt.Errorf("setting job status failed: %v", err) 844 } 845 846 txn.Defer(func() { s.watch.notify(watcher) }) 847 txn.Commit() 848 return nil 849 } 850 851 // AllocByID is used to lookup an allocation by its ID 852 func (s *StateStore) AllocByID(id string) (*structs.Allocation, error) { 853 txn := s.db.Txn(false) 854 855 existing, err := txn.First("allocs", "id", id) 856 if err != nil { 857 return nil, fmt.Errorf("alloc lookup failed: %v", err) 858 } 859 860 if existing != nil { 861 return existing.(*structs.Allocation), nil 862 } 863 return nil, nil 864 } 865 866 // AllocsByIDPrefix is used to lookup allocs by prefix 867 func (s *StateStore) AllocsByIDPrefix(id string) (memdb.ResultIterator, error) { 868 txn := s.db.Txn(false) 869 870 iter, err := txn.Get("allocs", "id_prefix", id) 871 if err != nil { 872 return nil, fmt.Errorf("alloc lookup failed: %v", err) 873 } 874 875 return iter, nil 876 } 877 878 // AllocsByNode returns all the allocations by node 879 func (s *StateStore) AllocsByNode(node string) ([]*structs.Allocation, error) { 880 txn := s.db.Txn(false) 881 882 // Get an iterator over the node allocations, using only the 883 // node prefix which ignores the terminal status 884 iter, err := txn.Get("allocs", "node_prefix", node) 885 if err != nil { 886 return nil, err 887 } 888 889 var out []*structs.Allocation 890 for { 891 raw := iter.Next() 892 if raw == nil { 893 break 894 } 895 out = append(out, raw.(*structs.Allocation)) 896 } 897 return out, nil 898 } 899 900 // AllocsByNode returns all the allocations by node and terminal status 901 func (s *StateStore) AllocsByNodeTerminal(node string, terminal bool) ([]*structs.Allocation, error) { 902 txn := s.db.Txn(false) 903 904 // Get an iterator over the node allocations 905 iter, err := txn.Get("allocs", "node", node, terminal) 906 if err != nil { 907 return nil, err 908 } 909 910 var out []*structs.Allocation 911 for { 912 raw := iter.Next() 913 if raw == nil { 914 break 915 } 916 out = append(out, raw.(*structs.Allocation)) 917 } 918 return out, nil 919 } 920 921 // AllocsByJob returns all the allocations by job id 922 func (s *StateStore) AllocsByJob(jobID string) ([]*structs.Allocation, error) { 923 txn := s.db.Txn(false) 924 925 // Get an iterator over the node allocations 926 iter, err := txn.Get("allocs", "job", jobID) 927 if err != nil { 928 return nil, err 929 } 930 931 var out []*structs.Allocation 932 for { 933 raw := iter.Next() 934 if raw == nil { 935 break 936 } 937 out = append(out, raw.(*structs.Allocation)) 938 } 939 return out, nil 940 } 941 942 // AllocsByEval returns all the allocations by eval id 943 func (s *StateStore) AllocsByEval(evalID string) ([]*structs.Allocation, error) { 944 txn := s.db.Txn(false) 945 946 // Get an iterator over the eval allocations 947 iter, err := txn.Get("allocs", "eval", evalID) 948 if err != nil { 949 return nil, err 950 } 951 952 var out []*structs.Allocation 953 for { 954 raw := iter.Next() 955 if raw == nil { 956 break 957 } 958 out = append(out, raw.(*structs.Allocation)) 959 } 960 return out, nil 961 } 962 963 // Allocs returns an iterator over all the evaluations 964 func (s *StateStore) Allocs() (memdb.ResultIterator, error) { 965 txn := s.db.Txn(false) 966 967 // Walk the entire table 968 iter, err := txn.Get("allocs", "id") 969 if err != nil { 970 return nil, err 971 } 972 return iter, nil 973 } 974 975 // LastIndex returns the greatest index value for all indexes 976 func (s *StateStore) LatestIndex() (uint64, error) { 977 indexes, err := s.Indexes() 978 if err != nil { 979 return 0, err 980 } 981 982 var max uint64 = 0 983 for { 984 raw := indexes.Next() 985 if raw == nil { 986 break 987 } 988 989 // Prepare the request struct 990 idx := raw.(*IndexEntry) 991 992 // Determine the max 993 if idx.Value > max { 994 max = idx.Value 995 } 996 } 997 998 return max, nil 999 } 1000 1001 // Index finds the matching index value 1002 func (s *StateStore) Index(name string) (uint64, error) { 1003 txn := s.db.Txn(false) 1004 1005 // Lookup the first matching index 1006 out, err := txn.First("index", "id", name) 1007 if err != nil { 1008 return 0, err 1009 } 1010 if out == nil { 1011 return 0, nil 1012 } 1013 return out.(*IndexEntry).Value, nil 1014 } 1015 1016 // Indexes returns an iterator over all the indexes 1017 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 1018 txn := s.db.Txn(false) 1019 1020 // Walk the entire nodes table 1021 iter, err := txn.Get("index", "id") 1022 if err != nil { 1023 return nil, err 1024 } 1025 return iter, nil 1026 } 1027 1028 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 1029 // It takes a map of job IDs to an optional forceStatus string. It returns an 1030 // error if the job doesn't exist or setJobStatus fails. 1031 func (s *StateStore) setJobStatuses(index uint64, watcher watch.Items, txn *memdb.Txn, 1032 jobs map[string]string, evalDelete bool) error { 1033 for job, forceStatus := range jobs { 1034 existing, err := txn.First("jobs", "id", job) 1035 if err != nil { 1036 return fmt.Errorf("job lookup failed: %v", err) 1037 } 1038 1039 if existing == nil { 1040 continue 1041 } 1042 1043 if err := s.setJobStatus(index, watcher, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 1044 return err 1045 } 1046 } 1047 1048 return nil 1049 } 1050 1051 // setJobStatus sets the status of the job by looking up associated evaluations 1052 // and allocations. evalDelete should be set to true if setJobStatus is being 1053 // called because an evaluation is being deleted (potentially because of garbage 1054 // collection). If forceStatus is non-empty, the job's status will be set to the 1055 // passed status. 1056 func (s *StateStore) setJobStatus(index uint64, watcher watch.Items, txn *memdb.Txn, 1057 job *structs.Job, evalDelete bool, forceStatus string) error { 1058 1059 // Capture the current status so we can check if there is a change 1060 oldStatus := job.Status 1061 newStatus := forceStatus 1062 1063 // If forceStatus is not set, compute the jobs status. 1064 if forceStatus == "" { 1065 var err error 1066 newStatus, err = s.getJobStatus(txn, job, evalDelete) 1067 if err != nil { 1068 return err 1069 } 1070 } 1071 1072 // Fast-path if nothing has changed. 1073 if oldStatus == newStatus { 1074 return nil 1075 } 1076 1077 // The job has changed, so add to watcher. 1078 watcher.Add(watch.Item{Table: "jobs"}) 1079 watcher.Add(watch.Item{Job: job.ID}) 1080 1081 // Copy and update the existing job 1082 updated := job.Copy() 1083 updated.Status = newStatus 1084 updated.ModifyIndex = index 1085 1086 // Insert the job 1087 if err := txn.Insert("jobs", updated); err != nil { 1088 return fmt.Errorf("job insert failed: %v", err) 1089 } 1090 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1091 return fmt.Errorf("index update failed: %v", err) 1092 } 1093 return nil 1094 } 1095 1096 func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) { 1097 allocs, err := txn.Get("allocs", "job", job.ID) 1098 if err != nil { 1099 return "", err 1100 } 1101 1102 // If there is a non-terminal allocation, the job is running. 1103 hasAlloc := false 1104 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 1105 hasAlloc = true 1106 if !alloc.(*structs.Allocation).TerminalStatus() { 1107 return structs.JobStatusRunning, nil 1108 } 1109 } 1110 1111 evals, err := txn.Get("evals", "job", job.ID) 1112 if err != nil { 1113 return "", err 1114 } 1115 1116 hasEval := false 1117 for eval := evals.Next(); eval != nil; eval = evals.Next() { 1118 hasEval = true 1119 if !eval.(*structs.Evaluation).TerminalStatus() { 1120 return structs.JobStatusPending, nil 1121 } 1122 } 1123 1124 // The job is dead if all the allocations and evals are terminal or if there 1125 // are no evals because of garbage collection. 1126 if evalDelete || hasEval || hasAlloc { 1127 return structs.JobStatusDead, nil 1128 } 1129 1130 // If there are no allocations or evaluations it is a new job. If the job is 1131 // periodic, we mark it as running as it will never have an 1132 // allocation/evaluation against it. 1133 if job.IsPeriodic() { 1134 return structs.JobStatusRunning, nil 1135 } 1136 return structs.JobStatusPending, nil 1137 } 1138 1139 // StateSnapshot is used to provide a point-in-time snapshot 1140 type StateSnapshot struct { 1141 StateStore 1142 } 1143 1144 // StateRestore is used to optimize the performance when 1145 // restoring state by only using a single large transaction 1146 // instead of thousands of sub transactions 1147 type StateRestore struct { 1148 txn *memdb.Txn 1149 watch *stateWatch 1150 items watch.Items 1151 } 1152 1153 // Abort is used to abort the restore operation 1154 func (s *StateRestore) Abort() { 1155 s.txn.Abort() 1156 } 1157 1158 // Commit is used to commit the restore operation 1159 func (s *StateRestore) Commit() { 1160 s.txn.Defer(func() { s.watch.notify(s.items) }) 1161 s.txn.Commit() 1162 } 1163 1164 // NodeRestore is used to restore a node 1165 func (r *StateRestore) NodeRestore(node *structs.Node) error { 1166 r.items.Add(watch.Item{Table: "nodes"}) 1167 r.items.Add(watch.Item{Node: node.ID}) 1168 if err := r.txn.Insert("nodes", node); err != nil { 1169 return fmt.Errorf("node insert failed: %v", err) 1170 } 1171 return nil 1172 } 1173 1174 // JobRestore is used to restore a job 1175 func (r *StateRestore) JobRestore(job *structs.Job) error { 1176 r.items.Add(watch.Item{Table: "jobs"}) 1177 r.items.Add(watch.Item{Job: job.ID}) 1178 if err := r.txn.Insert("jobs", job); err != nil { 1179 return fmt.Errorf("job insert failed: %v", err) 1180 } 1181 return nil 1182 } 1183 1184 // EvalRestore is used to restore an evaluation 1185 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 1186 r.items.Add(watch.Item{Table: "evals"}) 1187 r.items.Add(watch.Item{Eval: eval.ID}) 1188 if err := r.txn.Insert("evals", eval); err != nil { 1189 return fmt.Errorf("eval insert failed: %v", err) 1190 } 1191 return nil 1192 } 1193 1194 // AllocRestore is used to restore an allocation 1195 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 1196 r.items.Add(watch.Item{Table: "allocs"}) 1197 r.items.Add(watch.Item{Alloc: alloc.ID}) 1198 r.items.Add(watch.Item{AllocEval: alloc.EvalID}) 1199 r.items.Add(watch.Item{AllocJob: alloc.JobID}) 1200 r.items.Add(watch.Item{AllocNode: alloc.NodeID}) 1201 if err := r.txn.Insert("allocs", alloc); err != nil { 1202 return fmt.Errorf("alloc insert failed: %v", err) 1203 } 1204 return nil 1205 } 1206 1207 // IndexRestore is used to restore an index 1208 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 1209 if err := r.txn.Insert("index", idx); err != nil { 1210 return fmt.Errorf("index insert failed: %v", err) 1211 } 1212 return nil 1213 } 1214 1215 // PeriodicLaunchRestore is used to restore a periodic launch. 1216 func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { 1217 r.items.Add(watch.Item{Table: "periodic_launch"}) 1218 r.items.Add(watch.Item{Job: launch.ID}) 1219 if err := r.txn.Insert("periodic_launch", launch); err != nil { 1220 return fmt.Errorf("periodic launch insert failed: %v", err) 1221 } 1222 return nil 1223 } 1224 1225 // stateWatch holds shared state for watching updates. This is 1226 // outside of StateStore so it can be shared with snapshots. 1227 type stateWatch struct { 1228 items map[watch.Item]*NotifyGroup 1229 l sync.Mutex 1230 } 1231 1232 // newStateWatch creates a new stateWatch for change notification. 1233 func newStateWatch() *stateWatch { 1234 return &stateWatch{ 1235 items: make(map[watch.Item]*NotifyGroup), 1236 } 1237 } 1238 1239 // watch subscribes a channel to the given watch items. 1240 func (w *stateWatch) watch(items watch.Items, ch chan struct{}) { 1241 w.l.Lock() 1242 defer w.l.Unlock() 1243 1244 for item, _ := range items { 1245 grp, ok := w.items[item] 1246 if !ok { 1247 grp = new(NotifyGroup) 1248 w.items[item] = grp 1249 } 1250 grp.Wait(ch) 1251 } 1252 } 1253 1254 // stopWatch unsubscribes a channel from the given watch items. 1255 func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) { 1256 w.l.Lock() 1257 defer w.l.Unlock() 1258 1259 for item, _ := range items { 1260 if grp, ok := w.items[item]; ok { 1261 grp.Clear(ch) 1262 if grp.Empty() { 1263 delete(w.items, item) 1264 } 1265 } 1266 } 1267 } 1268 1269 // notify is used to fire notifications on the given watch items. 1270 func (w *stateWatch) notify(items watch.Items) { 1271 w.l.Lock() 1272 defer w.l.Unlock() 1273 1274 for wi, _ := range items { 1275 if grp, ok := w.items[wi]; ok { 1276 grp.Notify() 1277 } 1278 } 1279 }