github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "sync" 8 9 "github.com/hashicorp/go-memdb" 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/hashicorp/nomad/nomad/watch" 12 ) 13 14 // IndexEntry is used with the "index" table 15 // for managing the latest Raft index affecting a table. 16 type IndexEntry struct { 17 Key string 18 Value uint64 19 } 20 21 // The StateStore is responsible for maintaining all the Nomad 22 // state. It is manipulated by the FSM which maintains consistency 23 // through the use of Raft. The goals of the StateStore are to provide 24 // high concurrency for read operations without blocking writes, and 25 // to provide write availability in the face of reads. EVERY object 26 // returned as a result of a read against the state store should be 27 // considered a constant and NEVER modified in place. 28 type StateStore struct { 29 logger *log.Logger 30 db *memdb.MemDB 31 watch *stateWatch 32 } 33 34 // NewStateStore is used to create a new state store 35 func NewStateStore(logOutput io.Writer) (*StateStore, error) { 36 // Create the MemDB 37 db, err := memdb.NewMemDB(stateStoreSchema()) 38 if err != nil { 39 return nil, fmt.Errorf("state store setup failed: %v", err) 40 } 41 42 // Create the state store 43 s := &StateStore{ 44 logger: log.New(logOutput, "", log.LstdFlags), 45 db: db, 46 watch: newStateWatch(), 47 } 48 return s, nil 49 } 50 51 // Snapshot is used to create a point in time snapshot. Because 52 // we use MemDB, we just need to snapshot the state of the underlying 53 // database. 54 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 55 snap := &StateSnapshot{ 56 StateStore: StateStore{ 57 logger: s.logger, 58 db: s.db.Snapshot(), 59 watch: s.watch, 60 }, 61 } 62 return snap, nil 63 } 64 65 // Restore is used to optimize the efficiency of rebuilding 66 // state by minimizing the number of transactions and checking 67 // overhead. 68 func (s *StateStore) Restore() (*StateRestore, error) { 69 txn := s.db.Txn(true) 70 r := &StateRestore{ 71 txn: txn, 72 watch: s.watch, 73 items: watch.NewItems(), 74 } 75 return r, nil 76 } 77 78 // Watch subscribes a channel to a set of watch items. 79 func (s *StateStore) Watch(items watch.Items, notify chan struct{}) { 80 s.watch.watch(items, notify) 81 } 82 83 // StopWatch unsubscribes a channel from a set of watch items. 84 func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) { 85 s.watch.stopWatch(items, notify) 86 } 87 88 // UpsertJobSummary upserts a job summary into the state store. 89 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 90 txn := s.db.Txn(true) 91 defer txn.Abort() 92 93 // Update the index 94 if err := txn.Insert("job_summary", *jobSummary); err != nil { 95 return err 96 } 97 98 // Update the indexes table for job summary 99 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 100 return fmt.Errorf("index update failed: %v", err) 101 } 102 103 txn.Commit() 104 return nil 105 } 106 107 // DeleteJobSummary deletes the job summary with the given ID. This is for 108 // testing purposes only. 109 func (s *StateStore) DeleteJobSummary(index uint64, id string) error { 110 txn := s.db.Txn(true) 111 defer txn.Abort() 112 113 // Delete the job summary 114 if _, err := txn.DeleteAll("job_summary", "id", id); err != nil { 115 return fmt.Errorf("deleting job summary failed: %v", err) 116 } 117 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 118 return fmt.Errorf("index update failed: %v", err) 119 } 120 txn.Commit() 121 return nil 122 } 123 124 // UpsertNode is used to register a node or update a node definition 125 // This is assumed to be triggered by the client, so we retain the value 126 // of drain which is set by the scheduler. 127 func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { 128 txn := s.db.Txn(true) 129 defer txn.Abort() 130 131 watcher := watch.NewItems() 132 watcher.Add(watch.Item{Table: "nodes"}) 133 watcher.Add(watch.Item{Node: node.ID}) 134 135 // Check if the node already exists 136 existing, err := txn.First("nodes", "id", node.ID) 137 if err != nil { 138 return fmt.Errorf("node lookup failed: %v", err) 139 } 140 141 // Setup the indexes correctly 142 if existing != nil { 143 exist := existing.(*structs.Node) 144 node.CreateIndex = exist.CreateIndex 145 node.ModifyIndex = index 146 node.Drain = exist.Drain // Retain the drain mode 147 } else { 148 node.CreateIndex = index 149 node.ModifyIndex = index 150 } 151 152 // Insert the node 153 if err := txn.Insert("nodes", node); err != nil { 154 return fmt.Errorf("node insert failed: %v", err) 155 } 156 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 157 return fmt.Errorf("index update failed: %v", err) 158 } 159 160 txn.Defer(func() { s.watch.notify(watcher) }) 161 txn.Commit() 162 return nil 163 } 164 165 // DeleteNode is used to deregister a node 166 func (s *StateStore) DeleteNode(index uint64, nodeID string) error { 167 txn := s.db.Txn(true) 168 defer txn.Abort() 169 170 // Lookup the node 171 existing, err := txn.First("nodes", "id", nodeID) 172 if err != nil { 173 return fmt.Errorf("node lookup failed: %v", err) 174 } 175 if existing == nil { 176 return fmt.Errorf("node not found") 177 } 178 179 watcher := watch.NewItems() 180 watcher.Add(watch.Item{Table: "nodes"}) 181 watcher.Add(watch.Item{Node: nodeID}) 182 183 // Delete the node 184 if err := txn.Delete("nodes", existing); err != nil { 185 return fmt.Errorf("node delete failed: %v", err) 186 } 187 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 188 return fmt.Errorf("index update failed: %v", err) 189 } 190 191 txn.Defer(func() { s.watch.notify(watcher) }) 192 txn.Commit() 193 return nil 194 } 195 196 // UpdateNodeStatus is used to update the status of a node 197 func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error { 198 txn := s.db.Txn(true) 199 defer txn.Abort() 200 201 watcher := watch.NewItems() 202 watcher.Add(watch.Item{Table: "nodes"}) 203 watcher.Add(watch.Item{Node: nodeID}) 204 205 // Lookup the node 206 existing, err := txn.First("nodes", "id", nodeID) 207 if err != nil { 208 return fmt.Errorf("node lookup failed: %v", err) 209 } 210 if existing == nil { 211 return fmt.Errorf("node not found") 212 } 213 214 // Copy the existing node 215 existingNode := existing.(*structs.Node) 216 copyNode := new(structs.Node) 217 *copyNode = *existingNode 218 219 // Update the status in the copy 220 copyNode.Status = status 221 copyNode.ModifyIndex = index 222 223 // Insert the node 224 if err := txn.Insert("nodes", copyNode); err != nil { 225 return fmt.Errorf("node update failed: %v", err) 226 } 227 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 228 return fmt.Errorf("index update failed: %v", err) 229 } 230 231 txn.Defer(func() { s.watch.notify(watcher) }) 232 txn.Commit() 233 return nil 234 } 235 236 // UpdateNodeDrain is used to update the drain of a node 237 func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error { 238 txn := s.db.Txn(true) 239 defer txn.Abort() 240 241 watcher := watch.NewItems() 242 watcher.Add(watch.Item{Table: "nodes"}) 243 watcher.Add(watch.Item{Node: nodeID}) 244 245 // Lookup the node 246 existing, err := txn.First("nodes", "id", nodeID) 247 if err != nil { 248 return fmt.Errorf("node lookup failed: %v", err) 249 } 250 if existing == nil { 251 return fmt.Errorf("node not found") 252 } 253 254 // Copy the existing node 255 existingNode := existing.(*structs.Node) 256 copyNode := new(structs.Node) 257 *copyNode = *existingNode 258 259 // Update the drain in the copy 260 copyNode.Drain = drain 261 copyNode.ModifyIndex = index 262 263 // Insert the node 264 if err := txn.Insert("nodes", copyNode); err != nil { 265 return fmt.Errorf("node update failed: %v", err) 266 } 267 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 268 return fmt.Errorf("index update failed: %v", err) 269 } 270 271 txn.Defer(func() { s.watch.notify(watcher) }) 272 txn.Commit() 273 return nil 274 } 275 276 // NodeByID is used to lookup a node by ID 277 func (s *StateStore) NodeByID(nodeID string) (*structs.Node, error) { 278 txn := s.db.Txn(false) 279 280 existing, err := txn.First("nodes", "id", nodeID) 281 if err != nil { 282 return nil, fmt.Errorf("node lookup failed: %v", err) 283 } 284 285 if existing != nil { 286 return existing.(*structs.Node), nil 287 } 288 return nil, nil 289 } 290 291 // NodesByIDPrefix is used to lookup nodes by prefix 292 func (s *StateStore) NodesByIDPrefix(nodeID string) (memdb.ResultIterator, error) { 293 txn := s.db.Txn(false) 294 295 iter, err := txn.Get("nodes", "id_prefix", nodeID) 296 if err != nil { 297 return nil, fmt.Errorf("node lookup failed: %v", err) 298 } 299 300 return iter, nil 301 } 302 303 // Nodes returns an iterator over all the nodes 304 func (s *StateStore) Nodes() (memdb.ResultIterator, error) { 305 txn := s.db.Txn(false) 306 307 // Walk the entire nodes table 308 iter, err := txn.Get("nodes", "id") 309 if err != nil { 310 return nil, err 311 } 312 return iter, nil 313 } 314 315 // UpsertJob is used to register a job or update a job definition 316 func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { 317 txn := s.db.Txn(true) 318 defer txn.Abort() 319 320 watcher := watch.NewItems() 321 watcher.Add(watch.Item{Table: "jobs"}) 322 watcher.Add(watch.Item{Job: job.ID}) 323 324 // Check if the job already exists 325 existing, err := txn.First("jobs", "id", job.ID) 326 if err != nil { 327 return fmt.Errorf("job lookup failed: %v", err) 328 } 329 330 // Setup the indexes correctly 331 if existing != nil { 332 job.CreateIndex = existing.(*structs.Job).CreateIndex 333 job.ModifyIndex = index 334 job.JobModifyIndex = index 335 336 // Compute the job status 337 var err error 338 job.Status, err = s.getJobStatus(txn, job, false) 339 if err != nil { 340 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 341 } 342 } else { 343 job.CreateIndex = index 344 job.ModifyIndex = index 345 job.JobModifyIndex = index 346 347 // If we are inserting the job for the first time, we don't need to 348 // calculate the jobs status as it is known. 349 if job.IsPeriodic() { 350 job.Status = structs.JobStatusRunning 351 } else { 352 job.Status = structs.JobStatusPending 353 } 354 } 355 356 if err := s.updateSummaryWithJob(index, job, watcher, txn); err != nil { 357 return fmt.Errorf("unable to create job summary: %v", err) 358 } 359 360 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 361 // COMPAT 0.4.1 -> 0.5 362 s.addEphemeralDiskToTaskGroups(job) 363 364 // Insert the job 365 if err := txn.Insert("jobs", job); err != nil { 366 return fmt.Errorf("job insert failed: %v", err) 367 } 368 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 369 return fmt.Errorf("index update failed: %v", err) 370 } 371 372 txn.Defer(func() { s.watch.notify(watcher) }) 373 txn.Commit() 374 return nil 375 } 376 377 // DeleteJob is used to deregister a job 378 func (s *StateStore) DeleteJob(index uint64, jobID string) error { 379 txn := s.db.Txn(true) 380 defer txn.Abort() 381 382 // Lookup the node 383 existing, err := txn.First("jobs", "id", jobID) 384 if err != nil { 385 return fmt.Errorf("job lookup failed: %v", err) 386 } 387 if existing == nil { 388 return fmt.Errorf("job not found") 389 } 390 391 watcher := watch.NewItems() 392 watcher.Add(watch.Item{Table: "jobs"}) 393 watcher.Add(watch.Item{Job: jobID}) 394 watcher.Add(watch.Item{Table: "job_summary"}) 395 watcher.Add(watch.Item{JobSummary: jobID}) 396 397 // Delete the node 398 if err := txn.Delete("jobs", existing); err != nil { 399 return fmt.Errorf("job delete failed: %v", err) 400 } 401 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 402 return fmt.Errorf("index update failed: %v", err) 403 } 404 405 // Delete the job summary 406 if _, err = txn.DeleteAll("job_summary", "id", jobID); err != nil { 407 return fmt.Errorf("deleing job summary failed: %v", err) 408 } 409 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 410 return fmt.Errorf("index update failed: %v", err) 411 } 412 413 txn.Defer(func() { s.watch.notify(watcher) }) 414 txn.Commit() 415 return nil 416 } 417 418 // JobByID is used to lookup a job by its ID 419 func (s *StateStore) JobByID(id string) (*structs.Job, error) { 420 txn := s.db.Txn(false) 421 422 existing, err := txn.First("jobs", "id", id) 423 if err != nil { 424 return nil, fmt.Errorf("job lookup failed: %v", err) 425 } 426 427 if existing != nil { 428 return existing.(*structs.Job), nil 429 } 430 return nil, nil 431 } 432 433 // JobsByIDPrefix is used to lookup a job by prefix 434 func (s *StateStore) JobsByIDPrefix(id string) (memdb.ResultIterator, error) { 435 txn := s.db.Txn(false) 436 437 iter, err := txn.Get("jobs", "id_prefix", id) 438 if err != nil { 439 return nil, fmt.Errorf("job lookup failed: %v", err) 440 } 441 442 return iter, nil 443 } 444 445 // Jobs returns an iterator over all the jobs 446 func (s *StateStore) Jobs() (memdb.ResultIterator, error) { 447 txn := s.db.Txn(false) 448 449 // Walk the entire jobs table 450 iter, err := txn.Get("jobs", "id") 451 if err != nil { 452 return nil, err 453 } 454 return iter, nil 455 } 456 457 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 458 func (s *StateStore) JobsByPeriodic(periodic bool) (memdb.ResultIterator, error) { 459 txn := s.db.Txn(false) 460 461 iter, err := txn.Get("jobs", "periodic", periodic) 462 if err != nil { 463 return nil, err 464 } 465 return iter, nil 466 } 467 468 // JobsByScheduler returns an iterator over all the jobs with the specific 469 // scheduler type. 470 func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator, error) { 471 txn := s.db.Txn(false) 472 473 // Return an iterator for jobs with the specific type. 474 iter, err := txn.Get("jobs", "type", schedulerType) 475 if err != nil { 476 return nil, err 477 } 478 return iter, nil 479 } 480 481 // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage 482 // collection. 483 func (s *StateStore) JobsByGC(gc bool) (memdb.ResultIterator, error) { 484 txn := s.db.Txn(false) 485 486 iter, err := txn.Get("jobs", "gc", gc) 487 if err != nil { 488 return nil, err 489 } 490 return iter, nil 491 } 492 493 // JobSummary returns a job summary object which matches a specific id. 494 func (s *StateStore) JobSummaryByID(jobID string) (*structs.JobSummary, error) { 495 txn := s.db.Txn(false) 496 497 existing, err := txn.First("job_summary", "id", jobID) 498 if err != nil { 499 return nil, err 500 } 501 if existing != nil { 502 summary := existing.(structs.JobSummary) 503 return summary.Copy(), nil 504 } 505 506 return nil, nil 507 } 508 509 // JobSummaries walks the entire job summary table and returns all the job 510 // summary objects 511 func (s *StateStore) JobSummaries() (memdb.ResultIterator, error) { 512 txn := s.db.Txn(false) 513 514 iter, err := txn.Get("job_summary", "id") 515 if err != nil { 516 return nil, err 517 } 518 return iter, nil 519 } 520 521 // JobSummaryByPrefix is used to look up Job Summary by id prefix 522 func (s *StateStore) JobSummaryByPrefix(id string) (memdb.ResultIterator, error) { 523 txn := s.db.Txn(false) 524 525 iter, err := txn.Get("job_summary", "id_prefix", id) 526 if err != nil { 527 return nil, fmt.Errorf("eval lookup failed: %v", err) 528 } 529 530 return iter, nil 531 } 532 533 // UpsertPeriodicLaunch is used to register a launch or update it. 534 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 535 txn := s.db.Txn(true) 536 defer txn.Abort() 537 538 watcher := watch.NewItems() 539 watcher.Add(watch.Item{Table: "periodic_launch"}) 540 watcher.Add(watch.Item{Job: launch.ID}) 541 542 // Check if the job already exists 543 existing, err := txn.First("periodic_launch", "id", launch.ID) 544 if err != nil { 545 return fmt.Errorf("periodic launch lookup failed: %v", err) 546 } 547 548 // Setup the indexes correctly 549 if existing != nil { 550 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 551 launch.ModifyIndex = index 552 } else { 553 launch.CreateIndex = index 554 launch.ModifyIndex = index 555 } 556 557 // Insert the job 558 if err := txn.Insert("periodic_launch", launch); err != nil { 559 return fmt.Errorf("launch insert failed: %v", err) 560 } 561 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 562 return fmt.Errorf("index update failed: %v", err) 563 } 564 565 txn.Defer(func() { s.watch.notify(watcher) }) 566 txn.Commit() 567 return nil 568 } 569 570 // DeletePeriodicLaunch is used to delete the periodic launch 571 func (s *StateStore) DeletePeriodicLaunch(index uint64, jobID string) error { 572 txn := s.db.Txn(true) 573 defer txn.Abort() 574 575 // Lookup the launch 576 existing, err := txn.First("periodic_launch", "id", jobID) 577 if err != nil { 578 return fmt.Errorf("launch lookup failed: %v", err) 579 } 580 if existing == nil { 581 return fmt.Errorf("launch not found") 582 } 583 584 watcher := watch.NewItems() 585 watcher.Add(watch.Item{Table: "periodic_launch"}) 586 watcher.Add(watch.Item{Job: jobID}) 587 588 // Delete the launch 589 if err := txn.Delete("periodic_launch", existing); err != nil { 590 return fmt.Errorf("launch delete failed: %v", err) 591 } 592 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 593 return fmt.Errorf("index update failed: %v", err) 594 } 595 596 txn.Defer(func() { s.watch.notify(watcher) }) 597 txn.Commit() 598 return nil 599 } 600 601 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 602 // ID. 603 func (s *StateStore) PeriodicLaunchByID(id string) (*structs.PeriodicLaunch, error) { 604 txn := s.db.Txn(false) 605 606 existing, err := txn.First("periodic_launch", "id", id) 607 if err != nil { 608 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 609 } 610 611 if existing != nil { 612 return existing.(*structs.PeriodicLaunch), nil 613 } 614 return nil, nil 615 } 616 617 // PeriodicLaunches returns an iterator over all the periodic launches 618 func (s *StateStore) PeriodicLaunches() (memdb.ResultIterator, error) { 619 txn := s.db.Txn(false) 620 621 // Walk the entire table 622 iter, err := txn.Get("periodic_launch", "id") 623 if err != nil { 624 return nil, err 625 } 626 return iter, nil 627 } 628 629 // UpsertEvaluation is used to upsert an evaluation 630 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { 631 txn := s.db.Txn(true) 632 defer txn.Abort() 633 634 watcher := watch.NewItems() 635 watcher.Add(watch.Item{Table: "evals"}) 636 637 // Do a nested upsert 638 jobs := make(map[string]string, len(evals)) 639 for _, eval := range evals { 640 watcher.Add(watch.Item{Eval: eval.ID}) 641 watcher.Add(watch.Item{EvalJob: eval.JobID}) 642 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 643 return err 644 } 645 646 jobs[eval.JobID] = "" 647 } 648 649 // Set the job's status 650 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 651 return fmt.Errorf("setting job status failed: %v", err) 652 } 653 654 txn.Defer(func() { s.watch.notify(watcher) }) 655 txn.Commit() 656 return nil 657 } 658 659 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 660 func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { 661 // Lookup the evaluation 662 existing, err := txn.First("evals", "id", eval.ID) 663 if err != nil { 664 return fmt.Errorf("eval lookup failed: %v", err) 665 } 666 667 // Update the indexes 668 if existing != nil { 669 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 670 eval.ModifyIndex = index 671 } else { 672 eval.CreateIndex = index 673 eval.ModifyIndex = index 674 } 675 676 // Update the job summary 677 summaryRaw, err := txn.First("job_summary", "id", eval.JobID) 678 if err != nil { 679 return fmt.Errorf("job summary lookup failed: %v", err) 680 } 681 if summaryRaw != nil { 682 js := summaryRaw.(structs.JobSummary) 683 var hasSummaryChanged bool 684 for tg, num := range eval.QueuedAllocations { 685 if summary, ok := js.Summary[tg]; ok { 686 if summary.Queued != num { 687 summary.Queued = num 688 js.Summary[tg] = summary 689 hasSummaryChanged = true 690 } 691 } else { 692 s.logger.Printf("[ERR] state_store: unable to update queued for job %q and task group %q", eval.JobID, tg) 693 } 694 } 695 696 // Insert the job summary 697 if hasSummaryChanged { 698 js.ModifyIndex = index 699 if err := txn.Insert("job_summary", js); err != nil { 700 return fmt.Errorf("job summary insert failed: %v", err) 701 } 702 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 703 return fmt.Errorf("index update failed: %v", err) 704 } 705 } 706 } 707 708 // Insert the eval 709 if err := txn.Insert("evals", eval); err != nil { 710 return fmt.Errorf("eval insert failed: %v", err) 711 } 712 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 713 return fmt.Errorf("index update failed: %v", err) 714 } 715 return nil 716 } 717 718 // DeleteEval is used to delete an evaluation 719 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 720 txn := s.db.Txn(true) 721 defer txn.Abort() 722 watcher := watch.NewItems() 723 watcher.Add(watch.Item{Table: "evals"}) 724 watcher.Add(watch.Item{Table: "allocs"}) 725 726 jobs := make(map[string]string, len(evals)) 727 for _, eval := range evals { 728 existing, err := txn.First("evals", "id", eval) 729 if err != nil { 730 return fmt.Errorf("eval lookup failed: %v", err) 731 } 732 if existing == nil { 733 continue 734 } 735 if err := txn.Delete("evals", existing); err != nil { 736 return fmt.Errorf("eval delete failed: %v", err) 737 } 738 jobID := existing.(*structs.Evaluation).JobID 739 watcher.Add(watch.Item{Eval: eval}) 740 watcher.Add(watch.Item{EvalJob: jobID}) 741 jobs[jobID] = "" 742 } 743 744 for _, alloc := range allocs { 745 existing, err := txn.First("allocs", "id", alloc) 746 if err != nil { 747 return fmt.Errorf("alloc lookup failed: %v", err) 748 } 749 if existing == nil { 750 continue 751 } 752 if err := txn.Delete("allocs", existing); err != nil { 753 return fmt.Errorf("alloc delete failed: %v", err) 754 } 755 realAlloc := existing.(*structs.Allocation) 756 watcher.Add(watch.Item{Alloc: realAlloc.ID}) 757 watcher.Add(watch.Item{AllocEval: realAlloc.EvalID}) 758 watcher.Add(watch.Item{AllocJob: realAlloc.JobID}) 759 watcher.Add(watch.Item{AllocNode: realAlloc.NodeID}) 760 } 761 762 // Update the indexes 763 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 764 return fmt.Errorf("index update failed: %v", err) 765 } 766 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 767 return fmt.Errorf("index update failed: %v", err) 768 } 769 770 // Set the job's status 771 if err := s.setJobStatuses(index, watcher, txn, jobs, true); err != nil { 772 return fmt.Errorf("setting job status failed: %v", err) 773 } 774 775 txn.Defer(func() { s.watch.notify(watcher) }) 776 txn.Commit() 777 return nil 778 } 779 780 // EvalByID is used to lookup an eval by its ID 781 func (s *StateStore) EvalByID(id string) (*structs.Evaluation, error) { 782 txn := s.db.Txn(false) 783 784 existing, err := txn.First("evals", "id", id) 785 if err != nil { 786 return nil, fmt.Errorf("eval lookup failed: %v", err) 787 } 788 789 if existing != nil { 790 return existing.(*structs.Evaluation), nil 791 } 792 return nil, nil 793 } 794 795 // EvalsByIDPrefix is used to lookup evaluations by prefix 796 func (s *StateStore) EvalsByIDPrefix(id string) (memdb.ResultIterator, error) { 797 txn := s.db.Txn(false) 798 799 iter, err := txn.Get("evals", "id_prefix", id) 800 if err != nil { 801 return nil, fmt.Errorf("eval lookup failed: %v", err) 802 } 803 804 return iter, nil 805 } 806 807 // EvalsByJob returns all the evaluations by job id 808 func (s *StateStore) EvalsByJob(jobID string) ([]*structs.Evaluation, error) { 809 txn := s.db.Txn(false) 810 811 // Get an iterator over the node allocations 812 iter, err := txn.Get("evals", "job", jobID) 813 if err != nil { 814 return nil, err 815 } 816 817 var out []*structs.Evaluation 818 for { 819 raw := iter.Next() 820 if raw == nil { 821 break 822 } 823 out = append(out, raw.(*structs.Evaluation)) 824 } 825 return out, nil 826 } 827 828 // Evals returns an iterator over all the evaluations 829 func (s *StateStore) Evals() (memdb.ResultIterator, error) { 830 txn := s.db.Txn(false) 831 832 // Walk the entire table 833 iter, err := txn.Get("evals", "id") 834 if err != nil { 835 return nil, err 836 } 837 return iter, nil 838 } 839 840 // UpdateAllocsFromClient is used to update an allocation based on input 841 842 // from a client. While the schedulers are the authority on the allocation for 843 // most things, some updates are authoritative from the client. Specifically, 844 // the desired state comes from the schedulers, while the actual state comes 845 // from clients. 846 func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error { 847 txn := s.db.Txn(true) 848 defer txn.Abort() 849 850 // Setup the watcher 851 watcher := watch.NewItems() 852 watcher.Add(watch.Item{Table: "allocs"}) 853 854 // Handle each of the updated allocations 855 for _, alloc := range allocs { 856 if err := s.nestedUpdateAllocFromClient(txn, watcher, index, alloc); err != nil { 857 return err 858 } 859 } 860 861 // Update the indexes 862 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 863 return fmt.Errorf("index update failed: %v", err) 864 } 865 866 txn.Defer(func() { s.watch.notify(watcher) }) 867 txn.Commit() 868 return nil 869 } 870 871 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 872 func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, watcher watch.Items, index uint64, alloc *structs.Allocation) error { 873 // Look for existing alloc 874 existing, err := txn.First("allocs", "id", alloc.ID) 875 if err != nil { 876 return fmt.Errorf("alloc lookup failed: %v", err) 877 } 878 879 // Nothing to do if this does not exist 880 if existing == nil { 881 return nil 882 } 883 exist := existing.(*structs.Allocation) 884 // Trigger the watcher 885 watcher.Add(watch.Item{Alloc: alloc.ID}) 886 watcher.Add(watch.Item{AllocEval: exist.EvalID}) 887 watcher.Add(watch.Item{AllocJob: exist.JobID}) 888 watcher.Add(watch.Item{AllocNode: exist.NodeID}) 889 890 // Copy everything from the existing allocation 891 copyAlloc := new(structs.Allocation) 892 *copyAlloc = *exist 893 894 // Pull in anything the client is the authority on 895 copyAlloc.ClientStatus = alloc.ClientStatus 896 copyAlloc.ClientDescription = alloc.ClientDescription 897 copyAlloc.TaskStates = alloc.TaskStates 898 899 // Update the modify index 900 copyAlloc.ModifyIndex = index 901 902 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, watcher, txn); err != nil { 903 return fmt.Errorf("error updating job summary: %v", err) 904 } 905 906 // Update the allocation 907 if err := txn.Insert("allocs", copyAlloc); err != nil { 908 return fmt.Errorf("alloc insert failed: %v", err) 909 } 910 911 // Set the job's status 912 forceStatus := "" 913 if !copyAlloc.TerminalStatus() { 914 forceStatus = structs.JobStatusRunning 915 } 916 jobs := map[string]string{exist.JobID: forceStatus} 917 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 918 return fmt.Errorf("setting job status failed: %v", err) 919 } 920 return nil 921 } 922 923 // UpsertAllocs is used to evict a set of allocations 924 // and allocate new ones at the same time. 925 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { 926 txn := s.db.Txn(true) 927 defer txn.Abort() 928 929 watcher := watch.NewItems() 930 watcher.Add(watch.Item{Table: "allocs"}) 931 932 // Handle the allocations 933 jobs := make(map[string]string, 1) 934 for _, alloc := range allocs { 935 existing, err := txn.First("allocs", "id", alloc.ID) 936 if err != nil { 937 return fmt.Errorf("alloc lookup failed: %v", err) 938 } 939 exist, _ := existing.(*structs.Allocation) 940 941 if exist == nil { 942 alloc.CreateIndex = index 943 alloc.ModifyIndex = index 944 alloc.AllocModifyIndex = index 945 } else { 946 alloc.CreateIndex = exist.CreateIndex 947 alloc.ModifyIndex = index 948 alloc.AllocModifyIndex = index 949 950 // If the scheduler is marking this allocation as lost we do not 951 // want to reuse the status of the existing allocation. 952 if alloc.ClientStatus != structs.AllocClientStatusLost { 953 alloc.ClientStatus = exist.ClientStatus 954 alloc.ClientDescription = exist.ClientDescription 955 } 956 957 // The job has been denormalized so re-attach the original job 958 if alloc.Job == nil { 959 alloc.Job = exist.Job 960 } 961 } 962 963 if err := s.updateSummaryWithAlloc(index, alloc, exist, watcher, txn); err != nil { 964 return fmt.Errorf("error updating job summary: %v", err) 965 } 966 967 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 968 // COMPAT 0.4.1 -> 0.5 969 if alloc.Job != nil { 970 s.addEphemeralDiskToTaskGroups(alloc.Job) 971 } 972 973 if err := txn.Insert("allocs", alloc); err != nil { 974 return fmt.Errorf("alloc insert failed: %v", err) 975 } 976 977 // If the allocation is running, force the job to running status. 978 forceStatus := "" 979 if !alloc.TerminalStatus() { 980 forceStatus = structs.JobStatusRunning 981 } 982 jobs[alloc.JobID] = forceStatus 983 984 watcher.Add(watch.Item{Alloc: alloc.ID}) 985 watcher.Add(watch.Item{AllocEval: alloc.EvalID}) 986 watcher.Add(watch.Item{AllocJob: alloc.JobID}) 987 watcher.Add(watch.Item{AllocNode: alloc.NodeID}) 988 } 989 990 // Update the indexes 991 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 992 return fmt.Errorf("index update failed: %v", err) 993 } 994 995 // Set the job's status 996 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 997 return fmt.Errorf("setting job status failed: %v", err) 998 } 999 1000 txn.Defer(func() { s.watch.notify(watcher) }) 1001 txn.Commit() 1002 return nil 1003 } 1004 1005 // AllocByID is used to lookup an allocation by its ID 1006 func (s *StateStore) AllocByID(id string) (*structs.Allocation, error) { 1007 txn := s.db.Txn(false) 1008 1009 existing, err := txn.First("allocs", "id", id) 1010 if err != nil { 1011 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1012 } 1013 1014 if existing != nil { 1015 return existing.(*structs.Allocation), nil 1016 } 1017 return nil, nil 1018 } 1019 1020 // AllocsByIDPrefix is used to lookup allocs by prefix 1021 func (s *StateStore) AllocsByIDPrefix(id string) (memdb.ResultIterator, error) { 1022 txn := s.db.Txn(false) 1023 1024 iter, err := txn.Get("allocs", "id_prefix", id) 1025 if err != nil { 1026 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1027 } 1028 1029 return iter, nil 1030 } 1031 1032 // AllocsByNode returns all the allocations by node 1033 func (s *StateStore) AllocsByNode(node string) ([]*structs.Allocation, error) { 1034 txn := s.db.Txn(false) 1035 1036 // Get an iterator over the node allocations, using only the 1037 // node prefix which ignores the terminal status 1038 iter, err := txn.Get("allocs", "node_prefix", node) 1039 if err != nil { 1040 return nil, err 1041 } 1042 1043 var out []*structs.Allocation 1044 for { 1045 raw := iter.Next() 1046 if raw == nil { 1047 break 1048 } 1049 out = append(out, raw.(*structs.Allocation)) 1050 } 1051 return out, nil 1052 } 1053 1054 // AllocsByNode returns all the allocations by node and terminal status 1055 func (s *StateStore) AllocsByNodeTerminal(node string, terminal bool) ([]*structs.Allocation, error) { 1056 txn := s.db.Txn(false) 1057 1058 // Get an iterator over the node allocations 1059 iter, err := txn.Get("allocs", "node", node, terminal) 1060 if err != nil { 1061 return nil, err 1062 } 1063 1064 var out []*structs.Allocation 1065 for { 1066 raw := iter.Next() 1067 if raw == nil { 1068 break 1069 } 1070 out = append(out, raw.(*structs.Allocation)) 1071 } 1072 return out, nil 1073 } 1074 1075 // AllocsByJob returns all the allocations by job id 1076 func (s *StateStore) AllocsByJob(jobID string) ([]*structs.Allocation, error) { 1077 txn := s.db.Txn(false) 1078 1079 // Get an iterator over the node allocations 1080 iter, err := txn.Get("allocs", "job", jobID) 1081 if err != nil { 1082 return nil, err 1083 } 1084 1085 var out []*structs.Allocation 1086 for { 1087 raw := iter.Next() 1088 if raw == nil { 1089 break 1090 } 1091 out = append(out, raw.(*structs.Allocation)) 1092 } 1093 return out, nil 1094 } 1095 1096 // AllocsByEval returns all the allocations by eval id 1097 func (s *StateStore) AllocsByEval(evalID string) ([]*structs.Allocation, error) { 1098 txn := s.db.Txn(false) 1099 1100 // Get an iterator over the eval allocations 1101 iter, err := txn.Get("allocs", "eval", evalID) 1102 if err != nil { 1103 return nil, err 1104 } 1105 1106 var out []*structs.Allocation 1107 for { 1108 raw := iter.Next() 1109 if raw == nil { 1110 break 1111 } 1112 out = append(out, raw.(*structs.Allocation)) 1113 } 1114 return out, nil 1115 } 1116 1117 // Allocs returns an iterator over all the evaluations 1118 func (s *StateStore) Allocs() (memdb.ResultIterator, error) { 1119 txn := s.db.Txn(false) 1120 1121 // Walk the entire table 1122 iter, err := txn.Get("allocs", "id") 1123 if err != nil { 1124 return nil, err 1125 } 1126 return iter, nil 1127 } 1128 1129 // UpsertVaultAccessors is used to register a set of Vault Accessors 1130 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 1131 txn := s.db.Txn(true) 1132 defer txn.Abort() 1133 1134 for _, accessor := range accessors { 1135 // Set the create index 1136 accessor.CreateIndex = index 1137 1138 // Insert the accessor 1139 if err := txn.Insert("vault_accessors", accessor); err != nil { 1140 return fmt.Errorf("accessor insert failed: %v", err) 1141 } 1142 } 1143 1144 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 1145 return fmt.Errorf("index update failed: %v", err) 1146 } 1147 1148 txn.Commit() 1149 return nil 1150 } 1151 1152 // DeleteVaultAccessors is used to delete a set of Vault Accessors 1153 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 1154 txn := s.db.Txn(true) 1155 defer txn.Abort() 1156 1157 // Lookup the accessor 1158 for _, accessor := range accessors { 1159 // Delete the accessor 1160 if err := txn.Delete("vault_accessors", accessor); err != nil { 1161 return fmt.Errorf("accessor delete failed: %v", err) 1162 } 1163 } 1164 1165 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 1166 return fmt.Errorf("index update failed: %v", err) 1167 } 1168 1169 txn.Commit() 1170 return nil 1171 } 1172 1173 // VaultAccessor returns the given Vault accessor 1174 func (s *StateStore) VaultAccessor(accessor string) (*structs.VaultAccessor, error) { 1175 txn := s.db.Txn(false) 1176 1177 existing, err := txn.First("vault_accessors", "id", accessor) 1178 if err != nil { 1179 return nil, fmt.Errorf("accessor lookup failed: %v", err) 1180 } 1181 1182 if existing != nil { 1183 return existing.(*structs.VaultAccessor), nil 1184 } 1185 1186 return nil, nil 1187 } 1188 1189 // VaultAccessors returns an iterator of Vault accessors. 1190 func (s *StateStore) VaultAccessors() (memdb.ResultIterator, error) { 1191 txn := s.db.Txn(false) 1192 1193 iter, err := txn.Get("vault_accessors", "id") 1194 if err != nil { 1195 return nil, err 1196 } 1197 return iter, nil 1198 } 1199 1200 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 1201 func (s *StateStore) VaultAccessorsByAlloc(allocID string) ([]*structs.VaultAccessor, error) { 1202 txn := s.db.Txn(false) 1203 1204 // Get an iterator over the accessors 1205 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 1206 if err != nil { 1207 return nil, err 1208 } 1209 1210 var out []*structs.VaultAccessor 1211 for { 1212 raw := iter.Next() 1213 if raw == nil { 1214 break 1215 } 1216 out = append(out, raw.(*structs.VaultAccessor)) 1217 } 1218 return out, nil 1219 } 1220 1221 // VaultAccessorsByNode returns all the Vault accessors by node id 1222 func (s *StateStore) VaultAccessorsByNode(nodeID string) ([]*structs.VaultAccessor, error) { 1223 txn := s.db.Txn(false) 1224 1225 // Get an iterator over the accessors 1226 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 1227 if err != nil { 1228 return nil, err 1229 } 1230 1231 var out []*structs.VaultAccessor 1232 for { 1233 raw := iter.Next() 1234 if raw == nil { 1235 break 1236 } 1237 out = append(out, raw.(*structs.VaultAccessor)) 1238 } 1239 return out, nil 1240 } 1241 1242 // LastIndex returns the greatest index value for all indexes 1243 func (s *StateStore) LatestIndex() (uint64, error) { 1244 indexes, err := s.Indexes() 1245 if err != nil { 1246 return 0, err 1247 } 1248 1249 var max uint64 = 0 1250 for { 1251 raw := indexes.Next() 1252 if raw == nil { 1253 break 1254 } 1255 1256 // Prepare the request struct 1257 idx := raw.(*IndexEntry) 1258 1259 // Determine the max 1260 if idx.Value > max { 1261 max = idx.Value 1262 } 1263 } 1264 1265 return max, nil 1266 } 1267 1268 // Index finds the matching index value 1269 func (s *StateStore) Index(name string) (uint64, error) { 1270 txn := s.db.Txn(false) 1271 1272 // Lookup the first matching index 1273 out, err := txn.First("index", "id", name) 1274 if err != nil { 1275 return 0, err 1276 } 1277 if out == nil { 1278 return 0, nil 1279 } 1280 return out.(*IndexEntry).Value, nil 1281 } 1282 1283 // RemoveIndex is a helper method to remove an index for testing purposes 1284 func (s *StateStore) RemoveIndex(name string) error { 1285 txn := s.db.Txn(true) 1286 defer txn.Abort() 1287 1288 if _, err := txn.DeleteAll("index", "id", name); err != nil { 1289 return err 1290 } 1291 1292 txn.Commit() 1293 return nil 1294 } 1295 1296 // Indexes returns an iterator over all the indexes 1297 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 1298 txn := s.db.Txn(false) 1299 1300 // Walk the entire nodes table 1301 iter, err := txn.Get("index", "id") 1302 if err != nil { 1303 return nil, err 1304 } 1305 return iter, nil 1306 } 1307 1308 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 1309 // store 1310 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 1311 txn := s.db.Txn(true) 1312 defer txn.Abort() 1313 1314 // Get all the jobs 1315 iter, err := txn.Get("jobs", "id") 1316 if err != nil { 1317 return err 1318 } 1319 for { 1320 rawJob := iter.Next() 1321 if rawJob == nil { 1322 break 1323 } 1324 job := rawJob.(*structs.Job) 1325 1326 // Create a job summary for the job 1327 summary := structs.JobSummary{ 1328 JobID: job.ID, 1329 Summary: make(map[string]structs.TaskGroupSummary), 1330 } 1331 for _, tg := range job.TaskGroups { 1332 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 1333 } 1334 1335 // Find all the allocations for the jobs 1336 iterAllocs, err := txn.Get("allocs", "job", job.ID) 1337 if err != nil { 1338 return err 1339 } 1340 1341 // Calculate the summary for the job 1342 for { 1343 rawAlloc := iterAllocs.Next() 1344 if rawAlloc == nil { 1345 break 1346 } 1347 alloc := rawAlloc.(*structs.Allocation) 1348 1349 // Ignore the allocation if it doesn't belong to the currently 1350 // registered job 1351 if alloc.Job.CreateIndex != job.CreateIndex { 1352 continue 1353 } 1354 1355 tg := summary.Summary[alloc.TaskGroup] 1356 switch alloc.ClientStatus { 1357 case structs.AllocClientStatusFailed: 1358 tg.Failed += 1 1359 case structs.AllocClientStatusLost: 1360 tg.Lost += 1 1361 case structs.AllocClientStatusComplete: 1362 tg.Complete += 1 1363 case structs.AllocClientStatusRunning: 1364 tg.Running += 1 1365 case structs.AllocClientStatusPending: 1366 tg.Starting += 1 1367 default: 1368 s.logger.Printf("[ERR] state_store: invalid client status: %v in allocation %q", alloc.ClientStatus, alloc.ID) 1369 } 1370 summary.Summary[alloc.TaskGroup] = tg 1371 } 1372 1373 // Set the create index of the summary same as the job's create index 1374 // and the modify index to the current index 1375 summary.CreateIndex = job.CreateIndex 1376 summary.ModifyIndex = index 1377 1378 // Insert the job summary 1379 if err := txn.Insert("job_summary", summary); err != nil { 1380 return fmt.Errorf("error inserting job summary: %v", err) 1381 } 1382 } 1383 1384 // Update the indexes table for job summary 1385 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1386 return fmt.Errorf("index update failed: %v", err) 1387 } 1388 txn.Commit() 1389 return nil 1390 } 1391 1392 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 1393 // It takes a map of job IDs to an optional forceStatus string. It returns an 1394 // error if the job doesn't exist or setJobStatus fails. 1395 func (s *StateStore) setJobStatuses(index uint64, watcher watch.Items, txn *memdb.Txn, 1396 jobs map[string]string, evalDelete bool) error { 1397 for job, forceStatus := range jobs { 1398 existing, err := txn.First("jobs", "id", job) 1399 if err != nil { 1400 return fmt.Errorf("job lookup failed: %v", err) 1401 } 1402 1403 if existing == nil { 1404 continue 1405 } 1406 1407 if err := s.setJobStatus(index, watcher, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 1408 return err 1409 } 1410 } 1411 1412 return nil 1413 } 1414 1415 // setJobStatus sets the status of the job by looking up associated evaluations 1416 // and allocations. evalDelete should be set to true if setJobStatus is being 1417 // called because an evaluation is being deleted (potentially because of garbage 1418 // collection). If forceStatus is non-empty, the job's status will be set to the 1419 // passed status. 1420 func (s *StateStore) setJobStatus(index uint64, watcher watch.Items, txn *memdb.Txn, 1421 job *structs.Job, evalDelete bool, forceStatus string) error { 1422 1423 // Capture the current status so we can check if there is a change 1424 oldStatus := job.Status 1425 newStatus := forceStatus 1426 1427 // If forceStatus is not set, compute the jobs status. 1428 if forceStatus == "" { 1429 var err error 1430 newStatus, err = s.getJobStatus(txn, job, evalDelete) 1431 if err != nil { 1432 return err 1433 } 1434 } 1435 1436 // Fast-path if nothing has changed. 1437 if oldStatus == newStatus { 1438 return nil 1439 } 1440 1441 // The job has changed, so add to watcher. 1442 watcher.Add(watch.Item{Table: "jobs"}) 1443 watcher.Add(watch.Item{Job: job.ID}) 1444 1445 // Copy and update the existing job 1446 updated := job.Copy() 1447 updated.Status = newStatus 1448 updated.ModifyIndex = index 1449 1450 // Insert the job 1451 if err := txn.Insert("jobs", updated); err != nil { 1452 return fmt.Errorf("job insert failed: %v", err) 1453 } 1454 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1455 return fmt.Errorf("index update failed: %v", err) 1456 } 1457 return nil 1458 } 1459 1460 func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) { 1461 allocs, err := txn.Get("allocs", "job", job.ID) 1462 if err != nil { 1463 return "", err 1464 } 1465 1466 // If there is a non-terminal allocation, the job is running. 1467 hasAlloc := false 1468 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 1469 hasAlloc = true 1470 if !alloc.(*structs.Allocation).TerminalStatus() { 1471 return structs.JobStatusRunning, nil 1472 } 1473 } 1474 1475 evals, err := txn.Get("evals", "job", job.ID) 1476 if err != nil { 1477 return "", err 1478 } 1479 1480 hasEval := false 1481 for eval := evals.Next(); eval != nil; eval = evals.Next() { 1482 hasEval = true 1483 if !eval.(*structs.Evaluation).TerminalStatus() { 1484 return structs.JobStatusPending, nil 1485 } 1486 } 1487 1488 // The job is dead if all the allocations and evals are terminal or if there 1489 // are no evals because of garbage collection. 1490 if evalDelete || hasEval || hasAlloc { 1491 return structs.JobStatusDead, nil 1492 } 1493 1494 // If there are no allocations or evaluations it is a new job. If the job is 1495 // periodic, we mark it as running as it will never have an 1496 // allocation/evaluation against it. 1497 if job.IsPeriodic() { 1498 return structs.JobStatusRunning, nil 1499 } 1500 return structs.JobStatusPending, nil 1501 } 1502 1503 // updateSummaryWithJob creates or updates job summaries when new jobs are 1504 // upserted or existing ones are updated 1505 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 1506 watcher watch.Items, txn *memdb.Txn) error { 1507 1508 existing, err := s.JobSummaryByID(job.ID) 1509 if err != nil { 1510 return fmt.Errorf("unable to retrieve summary for job: %v", err) 1511 } 1512 var hasSummaryChanged bool 1513 if existing == nil { 1514 existing = &structs.JobSummary{ 1515 JobID: job.ID, 1516 Summary: make(map[string]structs.TaskGroupSummary), 1517 CreateIndex: index, 1518 } 1519 hasSummaryChanged = true 1520 } 1521 for _, tg := range job.TaskGroups { 1522 if _, ok := existing.Summary[tg.Name]; !ok { 1523 newSummary := structs.TaskGroupSummary{ 1524 Complete: 0, 1525 Failed: 0, 1526 Running: 0, 1527 Starting: 0, 1528 } 1529 existing.Summary[tg.Name] = newSummary 1530 hasSummaryChanged = true 1531 } 1532 } 1533 1534 // The job summary has changed, so add to watcher and update the modify 1535 // index. 1536 if hasSummaryChanged { 1537 existing.ModifyIndex = index 1538 watcher.Add(watch.Item{Table: "job_summary"}) 1539 watcher.Add(watch.Item{JobSummary: job.ID}) 1540 1541 // Update the indexes table for job summary 1542 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1543 return fmt.Errorf("index update failed: %v", err) 1544 } 1545 if err := txn.Insert("job_summary", *existing); err != nil { 1546 return err 1547 } 1548 } 1549 1550 return nil 1551 } 1552 1553 // updateSummaryWithAlloc updates the job summary when allocations are updated 1554 // or inserted 1555 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 1556 existingAlloc *structs.Allocation, watcher watch.Items, txn *memdb.Txn) error { 1557 1558 // We don't have to update the summary if the job is missing 1559 if alloc.Job == nil { 1560 return nil 1561 } 1562 1563 summaryRaw, err := txn.First("job_summary", "id", alloc.JobID) 1564 if err != nil { 1565 return fmt.Errorf("unable to lookup job summary for job id %q: %v", err) 1566 } 1567 if summaryRaw == nil { 1568 // Check if the job is de-registered 1569 rawJob, err := txn.First("jobs", "id", alloc.JobID) 1570 if err != nil { 1571 return fmt.Errorf("unable to query job: %v", err) 1572 } 1573 1574 // If the job is de-registered then we skip updating it's summary 1575 if rawJob == nil { 1576 return nil 1577 } 1578 return fmt.Errorf("job summary for job %q is not present", alloc.JobID) 1579 } 1580 summary := summaryRaw.(structs.JobSummary) 1581 jobSummary := summary.Copy() 1582 1583 // Not updating the job summary because the allocation doesn't belong to the 1584 // currently registered job 1585 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 1586 return nil 1587 } 1588 1589 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 1590 if !ok { 1591 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 1592 } 1593 var summaryChanged bool 1594 if existingAlloc == nil { 1595 switch alloc.DesiredStatus { 1596 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 1597 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 1598 alloc.ID, alloc.DesiredStatus) 1599 } 1600 switch alloc.ClientStatus { 1601 case structs.AllocClientStatusPending: 1602 tgSummary.Starting += 1 1603 if tgSummary.Queued > 0 { 1604 tgSummary.Queued -= 1 1605 } 1606 summaryChanged = true 1607 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 1608 structs.AllocClientStatusComplete: 1609 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 1610 alloc.ID, alloc.ClientStatus) 1611 } 1612 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 1613 // Incrementing the client of the bin of the current state 1614 switch alloc.ClientStatus { 1615 case structs.AllocClientStatusRunning: 1616 tgSummary.Running += 1 1617 case structs.AllocClientStatusFailed: 1618 tgSummary.Failed += 1 1619 case structs.AllocClientStatusPending: 1620 tgSummary.Starting += 1 1621 case structs.AllocClientStatusComplete: 1622 tgSummary.Complete += 1 1623 case structs.AllocClientStatusLost: 1624 tgSummary.Lost += 1 1625 } 1626 1627 // Decrementing the count of the bin of the last state 1628 switch existingAlloc.ClientStatus { 1629 case structs.AllocClientStatusRunning: 1630 tgSummary.Running -= 1 1631 case structs.AllocClientStatusPending: 1632 tgSummary.Starting -= 1 1633 case structs.AllocClientStatusLost: 1634 tgSummary.Lost -= 1 1635 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 1636 default: 1637 s.logger.Printf("[ERR] state_store: invalid old state of allocation with id: %v, and state: %v", 1638 existingAlloc.ID, existingAlloc.ClientStatus) 1639 } 1640 summaryChanged = true 1641 } 1642 jobSummary.Summary[alloc.TaskGroup] = tgSummary 1643 1644 if summaryChanged { 1645 jobSummary.ModifyIndex = index 1646 watcher.Add(watch.Item{Table: "job_summary"}) 1647 watcher.Add(watch.Item{JobSummary: alloc.JobID}) 1648 1649 // Update the indexes table for job summary 1650 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1651 return fmt.Errorf("index update failed: %v", err) 1652 } 1653 1654 if err := txn.Insert("job_summary", *jobSummary); err != nil { 1655 return fmt.Errorf("updating job summary failed: %v", err) 1656 } 1657 } 1658 1659 return nil 1660 } 1661 1662 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 1663 func (s *StateStore) addEphemeralDiskToTaskGroups(job *structs.Job) { 1664 for _, tg := range job.TaskGroups { 1665 var diskMB int 1666 for _, task := range tg.Tasks { 1667 if task.Resources != nil { 1668 diskMB += task.Resources.DiskMB 1669 task.Resources.DiskMB = 0 1670 } 1671 } 1672 if tg.EphemeralDisk != nil { 1673 continue 1674 } 1675 tg.EphemeralDisk = &structs.EphemeralDisk{ 1676 SizeMB: diskMB, 1677 } 1678 } 1679 } 1680 1681 // StateSnapshot is used to provide a point-in-time snapshot 1682 type StateSnapshot struct { 1683 StateStore 1684 } 1685 1686 // StateRestore is used to optimize the performance when 1687 // restoring state by only using a single large transaction 1688 // instead of thousands of sub transactions 1689 type StateRestore struct { 1690 txn *memdb.Txn 1691 watch *stateWatch 1692 items watch.Items 1693 } 1694 1695 // Abort is used to abort the restore operation 1696 func (s *StateRestore) Abort() { 1697 s.txn.Abort() 1698 } 1699 1700 // Commit is used to commit the restore operation 1701 func (s *StateRestore) Commit() { 1702 s.txn.Defer(func() { s.watch.notify(s.items) }) 1703 s.txn.Commit() 1704 } 1705 1706 // NodeRestore is used to restore a node 1707 func (r *StateRestore) NodeRestore(node *structs.Node) error { 1708 r.items.Add(watch.Item{Table: "nodes"}) 1709 r.items.Add(watch.Item{Node: node.ID}) 1710 if err := r.txn.Insert("nodes", node); err != nil { 1711 return fmt.Errorf("node insert failed: %v", err) 1712 } 1713 return nil 1714 } 1715 1716 // JobRestore is used to restore a job 1717 func (r *StateRestore) JobRestore(job *structs.Job) error { 1718 r.items.Add(watch.Item{Table: "jobs"}) 1719 r.items.Add(watch.Item{Job: job.ID}) 1720 1721 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 1722 // COMPAT 0.4.1 -> 0.5 1723 r.addEphemeralDiskToTaskGroups(job) 1724 1725 if err := r.txn.Insert("jobs", job); err != nil { 1726 return fmt.Errorf("job insert failed: %v", err) 1727 } 1728 return nil 1729 } 1730 1731 // EvalRestore is used to restore an evaluation 1732 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 1733 r.items.Add(watch.Item{Table: "evals"}) 1734 r.items.Add(watch.Item{Eval: eval.ID}) 1735 r.items.Add(watch.Item{EvalJob: eval.JobID}) 1736 if err := r.txn.Insert("evals", eval); err != nil { 1737 return fmt.Errorf("eval insert failed: %v", err) 1738 } 1739 return nil 1740 } 1741 1742 // AllocRestore is used to restore an allocation 1743 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 1744 r.items.Add(watch.Item{Table: "allocs"}) 1745 r.items.Add(watch.Item{Alloc: alloc.ID}) 1746 r.items.Add(watch.Item{AllocEval: alloc.EvalID}) 1747 r.items.Add(watch.Item{AllocJob: alloc.JobID}) 1748 r.items.Add(watch.Item{AllocNode: alloc.NodeID}) 1749 1750 // Set the shared resources if it's not present 1751 // COMPAT 0.4.1 -> 0.5 1752 if alloc.SharedResources == nil { 1753 alloc.SharedResources = &structs.Resources{ 1754 DiskMB: alloc.Resources.DiskMB, 1755 } 1756 } 1757 1758 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 1759 if alloc.Job != nil { 1760 r.addEphemeralDiskToTaskGroups(alloc.Job) 1761 } 1762 1763 if err := r.txn.Insert("allocs", alloc); err != nil { 1764 return fmt.Errorf("alloc insert failed: %v", err) 1765 } 1766 return nil 1767 } 1768 1769 // IndexRestore is used to restore an index 1770 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 1771 if err := r.txn.Insert("index", idx); err != nil { 1772 return fmt.Errorf("index insert failed: %v", err) 1773 } 1774 return nil 1775 } 1776 1777 // PeriodicLaunchRestore is used to restore a periodic launch. 1778 func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { 1779 r.items.Add(watch.Item{Table: "periodic_launch"}) 1780 r.items.Add(watch.Item{Job: launch.ID}) 1781 if err := r.txn.Insert("periodic_launch", launch); err != nil { 1782 return fmt.Errorf("periodic launch insert failed: %v", err) 1783 } 1784 return nil 1785 } 1786 1787 // JobSummaryRestore is used to restore a job summary 1788 func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error { 1789 if err := r.txn.Insert("job_summary", *jobSummary); err != nil { 1790 return fmt.Errorf("job summary insert failed: %v", err) 1791 } 1792 return nil 1793 } 1794 1795 // VaultAccessorRestore is used to restore a vault accessor 1796 func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error { 1797 if err := r.txn.Insert("vault_accessors", accessor); err != nil { 1798 return fmt.Errorf("vault accessor insert failed: %v", err) 1799 } 1800 return nil 1801 } 1802 1803 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 1804 func (r *StateRestore) addEphemeralDiskToTaskGroups(job *structs.Job) { 1805 for _, tg := range job.TaskGroups { 1806 if tg.EphemeralDisk != nil { 1807 continue 1808 } 1809 var sizeMB int 1810 for _, task := range tg.Tasks { 1811 if task.Resources != nil { 1812 sizeMB += task.Resources.DiskMB 1813 task.Resources.DiskMB = 0 1814 } 1815 } 1816 tg.EphemeralDisk = &structs.EphemeralDisk{ 1817 SizeMB: sizeMB, 1818 } 1819 } 1820 } 1821 1822 // stateWatch holds shared state for watching updates. This is 1823 // outside of StateStore so it can be shared with snapshots. 1824 type stateWatch struct { 1825 items map[watch.Item]*NotifyGroup 1826 l sync.Mutex 1827 } 1828 1829 // newStateWatch creates a new stateWatch for change notification. 1830 func newStateWatch() *stateWatch { 1831 return &stateWatch{ 1832 items: make(map[watch.Item]*NotifyGroup), 1833 } 1834 } 1835 1836 // watch subscribes a channel to the given watch items. 1837 func (w *stateWatch) watch(items watch.Items, ch chan struct{}) { 1838 w.l.Lock() 1839 defer w.l.Unlock() 1840 1841 for item, _ := range items { 1842 grp, ok := w.items[item] 1843 if !ok { 1844 grp = new(NotifyGroup) 1845 w.items[item] = grp 1846 } 1847 grp.Wait(ch) 1848 } 1849 } 1850 1851 // stopWatch unsubscribes a channel from the given watch items. 1852 func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) { 1853 w.l.Lock() 1854 defer w.l.Unlock() 1855 1856 for item, _ := range items { 1857 if grp, ok := w.items[item]; ok { 1858 grp.Clear(ch) 1859 if grp.Empty() { 1860 delete(w.items, item) 1861 } 1862 } 1863 } 1864 } 1865 1866 // notify is used to fire notifications on the given watch items. 1867 func (w *stateWatch) notify(items watch.Items) { 1868 w.l.Lock() 1869 defer w.l.Unlock() 1870 1871 for wi, _ := range items { 1872 if grp, ok := w.items[wi]; ok { 1873 grp.Notify() 1874 } 1875 } 1876 }