github.com/mattyr/nomad@v0.3.3-0.20160919021406-3485a065154a/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "sync" 8 9 "github.com/hashicorp/go-memdb" 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/hashicorp/nomad/nomad/watch" 12 ) 13 14 // IndexEntry is used with the "index" table 15 // for managing the latest Raft index affecting a table. 16 type IndexEntry struct { 17 Key string 18 Value uint64 19 } 20 21 // The StateStore is responsible for maintaining all the Nomad 22 // state. It is manipulated by the FSM which maintains consistency 23 // through the use of Raft. The goals of the StateStore are to provide 24 // high concurrency for read operations without blocking writes, and 25 // to provide write availability in the face of reads. EVERY object 26 // returned as a result of a read against the state store should be 27 // considered a constant and NEVER modified in place. 28 type StateStore struct { 29 logger *log.Logger 30 db *memdb.MemDB 31 watch *stateWatch 32 } 33 34 // NewStateStore is used to create a new state store 35 func NewStateStore(logOutput io.Writer) (*StateStore, error) { 36 // Create the MemDB 37 db, err := memdb.NewMemDB(stateStoreSchema()) 38 if err != nil { 39 return nil, fmt.Errorf("state store setup failed: %v", err) 40 } 41 42 // Create the state store 43 s := &StateStore{ 44 logger: log.New(logOutput, "", log.LstdFlags), 45 db: db, 46 watch: newStateWatch(), 47 } 48 return s, nil 49 } 50 51 // Snapshot is used to create a point in time snapshot. Because 52 // we use MemDB, we just need to snapshot the state of the underlying 53 // database. 54 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 55 snap := &StateSnapshot{ 56 StateStore: StateStore{ 57 logger: s.logger, 58 db: s.db.Snapshot(), 59 watch: s.watch, 60 }, 61 } 62 return snap, nil 63 } 64 65 // Restore is used to optimize the efficiency of rebuilding 66 // state by minimizing the number of transactions and checking 67 // overhead. 68 func (s *StateStore) Restore() (*StateRestore, error) { 69 txn := s.db.Txn(true) 70 r := &StateRestore{ 71 txn: txn, 72 watch: s.watch, 73 items: watch.NewItems(), 74 } 75 return r, nil 76 } 77 78 // Watch subscribes a channel to a set of watch items. 79 func (s *StateStore) Watch(items watch.Items, notify chan struct{}) { 80 s.watch.watch(items, notify) 81 } 82 83 // StopWatch unsubscribes a channel from a set of watch items. 84 func (s *StateStore) StopWatch(items watch.Items, notify chan struct{}) { 85 s.watch.stopWatch(items, notify) 86 } 87 88 // UpsertJobSummary upserts a job summary into the state store. 89 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 90 txn := s.db.Txn(true) 91 defer txn.Abort() 92 93 // Update the index 94 if err := txn.Insert("job_summary", *jobSummary); err != nil { 95 return err 96 } 97 98 // Update the indexes table for job summary 99 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 100 return fmt.Errorf("index update failed: %v", err) 101 } 102 103 txn.Commit() 104 return nil 105 } 106 107 // DeleteJobSummary deletes the job summary with the given ID. This is for 108 // testing purposes only. 109 func (s *StateStore) DeleteJobSummary(index uint64, id string) error { 110 txn := s.db.Txn(true) 111 defer txn.Abort() 112 113 // Delete the job summary 114 if _, err := txn.DeleteAll("job_summary", "id", id); err != nil { 115 return fmt.Errorf("deleting job summary failed: %v", err) 116 } 117 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 118 return fmt.Errorf("index update failed: %v", err) 119 } 120 txn.Commit() 121 return nil 122 } 123 124 // UpsertNode is used to register a node or update a node definition 125 // This is assumed to be triggered by the client, so we retain the value 126 // of drain which is set by the scheduler. 127 func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { 128 txn := s.db.Txn(true) 129 defer txn.Abort() 130 131 watcher := watch.NewItems() 132 watcher.Add(watch.Item{Table: "nodes"}) 133 watcher.Add(watch.Item{Node: node.ID}) 134 135 // Check if the node already exists 136 existing, err := txn.First("nodes", "id", node.ID) 137 if err != nil { 138 return fmt.Errorf("node lookup failed: %v", err) 139 } 140 141 // Setup the indexes correctly 142 if existing != nil { 143 exist := existing.(*structs.Node) 144 node.CreateIndex = exist.CreateIndex 145 node.ModifyIndex = index 146 node.Drain = exist.Drain // Retain the drain mode 147 } else { 148 node.CreateIndex = index 149 node.ModifyIndex = index 150 } 151 152 // Insert the node 153 if err := txn.Insert("nodes", node); err != nil { 154 return fmt.Errorf("node insert failed: %v", err) 155 } 156 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 157 return fmt.Errorf("index update failed: %v", err) 158 } 159 160 txn.Defer(func() { s.watch.notify(watcher) }) 161 txn.Commit() 162 return nil 163 } 164 165 // DeleteNode is used to deregister a node 166 func (s *StateStore) DeleteNode(index uint64, nodeID string) error { 167 txn := s.db.Txn(true) 168 defer txn.Abort() 169 170 // Lookup the node 171 existing, err := txn.First("nodes", "id", nodeID) 172 if err != nil { 173 return fmt.Errorf("node lookup failed: %v", err) 174 } 175 if existing == nil { 176 return fmt.Errorf("node not found") 177 } 178 179 watcher := watch.NewItems() 180 watcher.Add(watch.Item{Table: "nodes"}) 181 watcher.Add(watch.Item{Node: nodeID}) 182 183 // Delete the node 184 if err := txn.Delete("nodes", existing); err != nil { 185 return fmt.Errorf("node delete failed: %v", err) 186 } 187 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 188 return fmt.Errorf("index update failed: %v", err) 189 } 190 191 txn.Defer(func() { s.watch.notify(watcher) }) 192 txn.Commit() 193 return nil 194 } 195 196 // UpdateNodeStatus is used to update the status of a node 197 func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error { 198 txn := s.db.Txn(true) 199 defer txn.Abort() 200 201 watcher := watch.NewItems() 202 watcher.Add(watch.Item{Table: "nodes"}) 203 watcher.Add(watch.Item{Node: nodeID}) 204 205 // Lookup the node 206 existing, err := txn.First("nodes", "id", nodeID) 207 if err != nil { 208 return fmt.Errorf("node lookup failed: %v", err) 209 } 210 if existing == nil { 211 return fmt.Errorf("node not found") 212 } 213 214 // Copy the existing node 215 existingNode := existing.(*structs.Node) 216 copyNode := new(structs.Node) 217 *copyNode = *existingNode 218 219 // Update the status in the copy 220 copyNode.Status = status 221 copyNode.ModifyIndex = index 222 223 // Insert the node 224 if err := txn.Insert("nodes", copyNode); err != nil { 225 return fmt.Errorf("node update failed: %v", err) 226 } 227 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 228 return fmt.Errorf("index update failed: %v", err) 229 } 230 231 txn.Defer(func() { s.watch.notify(watcher) }) 232 txn.Commit() 233 return nil 234 } 235 236 // UpdateNodeDrain is used to update the drain of a node 237 func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error { 238 txn := s.db.Txn(true) 239 defer txn.Abort() 240 241 watcher := watch.NewItems() 242 watcher.Add(watch.Item{Table: "nodes"}) 243 watcher.Add(watch.Item{Node: nodeID}) 244 245 // Lookup the node 246 existing, err := txn.First("nodes", "id", nodeID) 247 if err != nil { 248 return fmt.Errorf("node lookup failed: %v", err) 249 } 250 if existing == nil { 251 return fmt.Errorf("node not found") 252 } 253 254 // Copy the existing node 255 existingNode := existing.(*structs.Node) 256 copyNode := new(structs.Node) 257 *copyNode = *existingNode 258 259 // Update the drain in the copy 260 copyNode.Drain = drain 261 copyNode.ModifyIndex = index 262 263 // Insert the node 264 if err := txn.Insert("nodes", copyNode); err != nil { 265 return fmt.Errorf("node update failed: %v", err) 266 } 267 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 268 return fmt.Errorf("index update failed: %v", err) 269 } 270 271 txn.Defer(func() { s.watch.notify(watcher) }) 272 txn.Commit() 273 return nil 274 } 275 276 // NodeByID is used to lookup a node by ID 277 func (s *StateStore) NodeByID(nodeID string) (*structs.Node, error) { 278 txn := s.db.Txn(false) 279 280 existing, err := txn.First("nodes", "id", nodeID) 281 if err != nil { 282 return nil, fmt.Errorf("node lookup failed: %v", err) 283 } 284 285 if existing != nil { 286 return existing.(*structs.Node), nil 287 } 288 return nil, nil 289 } 290 291 // NodesByIDPrefix is used to lookup nodes by prefix 292 func (s *StateStore) NodesByIDPrefix(nodeID string) (memdb.ResultIterator, error) { 293 txn := s.db.Txn(false) 294 295 iter, err := txn.Get("nodes", "id_prefix", nodeID) 296 if err != nil { 297 return nil, fmt.Errorf("node lookup failed: %v", err) 298 } 299 300 return iter, nil 301 } 302 303 // Nodes returns an iterator over all the nodes 304 func (s *StateStore) Nodes() (memdb.ResultIterator, error) { 305 txn := s.db.Txn(false) 306 307 // Walk the entire nodes table 308 iter, err := txn.Get("nodes", "id") 309 if err != nil { 310 return nil, err 311 } 312 return iter, nil 313 } 314 315 // UpsertJob is used to register a job or update a job definition 316 func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { 317 txn := s.db.Txn(true) 318 defer txn.Abort() 319 320 watcher := watch.NewItems() 321 watcher.Add(watch.Item{Table: "jobs"}) 322 watcher.Add(watch.Item{Job: job.ID}) 323 324 // Check if the job already exists 325 existing, err := txn.First("jobs", "id", job.ID) 326 if err != nil { 327 return fmt.Errorf("job lookup failed: %v", err) 328 } 329 330 // Setup the indexes correctly 331 if existing != nil { 332 job.CreateIndex = existing.(*structs.Job).CreateIndex 333 job.ModifyIndex = index 334 job.JobModifyIndex = index 335 336 // Compute the job status 337 var err error 338 job.Status, err = s.getJobStatus(txn, job, false) 339 if err != nil { 340 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 341 } 342 } else { 343 job.CreateIndex = index 344 job.ModifyIndex = index 345 job.JobModifyIndex = index 346 347 // If we are inserting the job for the first time, we don't need to 348 // calculate the jobs status as it is known. 349 if job.IsPeriodic() { 350 job.Status = structs.JobStatusRunning 351 } else { 352 job.Status = structs.JobStatusPending 353 } 354 } 355 356 if err := s.updateSummaryWithJob(index, job, watcher, txn); err != nil { 357 return fmt.Errorf("unable to create job summary: %v", err) 358 } 359 360 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 361 // COMPAT 0.4.1 -> 0.5 362 s.addEphemeralDiskToTaskGroups(job) 363 364 // Insert the job 365 if err := txn.Insert("jobs", job); err != nil { 366 return fmt.Errorf("job insert failed: %v", err) 367 } 368 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 369 return fmt.Errorf("index update failed: %v", err) 370 } 371 372 txn.Defer(func() { s.watch.notify(watcher) }) 373 txn.Commit() 374 return nil 375 } 376 377 // DeleteJob is used to deregister a job 378 func (s *StateStore) DeleteJob(index uint64, jobID string) error { 379 txn := s.db.Txn(true) 380 defer txn.Abort() 381 382 // Lookup the node 383 existing, err := txn.First("jobs", "id", jobID) 384 if err != nil { 385 return fmt.Errorf("job lookup failed: %v", err) 386 } 387 if existing == nil { 388 return fmt.Errorf("job not found") 389 } 390 391 watcher := watch.NewItems() 392 watcher.Add(watch.Item{Table: "jobs"}) 393 watcher.Add(watch.Item{Job: jobID}) 394 watcher.Add(watch.Item{Table: "job_summary"}) 395 watcher.Add(watch.Item{JobSummary: jobID}) 396 397 // Delete the node 398 if err := txn.Delete("jobs", existing); err != nil { 399 return fmt.Errorf("job delete failed: %v", err) 400 } 401 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 402 return fmt.Errorf("index update failed: %v", err) 403 } 404 405 // Delete the job summary 406 if _, err = txn.DeleteAll("job_summary", "id", jobID); err != nil { 407 return fmt.Errorf("deleing job summary failed: %v", err) 408 } 409 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 410 return fmt.Errorf("index update failed: %v", err) 411 } 412 413 txn.Defer(func() { s.watch.notify(watcher) }) 414 txn.Commit() 415 return nil 416 } 417 418 // JobByID is used to lookup a job by its ID 419 func (s *StateStore) JobByID(id string) (*structs.Job, error) { 420 txn := s.db.Txn(false) 421 422 existing, err := txn.First("jobs", "id", id) 423 if err != nil { 424 return nil, fmt.Errorf("job lookup failed: %v", err) 425 } 426 427 if existing != nil { 428 return existing.(*structs.Job), nil 429 } 430 return nil, nil 431 } 432 433 // JobsByIDPrefix is used to lookup a job by prefix 434 func (s *StateStore) JobsByIDPrefix(id string) (memdb.ResultIterator, error) { 435 txn := s.db.Txn(false) 436 437 iter, err := txn.Get("jobs", "id_prefix", id) 438 if err != nil { 439 return nil, fmt.Errorf("job lookup failed: %v", err) 440 } 441 442 return iter, nil 443 } 444 445 // Jobs returns an iterator over all the jobs 446 func (s *StateStore) Jobs() (memdb.ResultIterator, error) { 447 txn := s.db.Txn(false) 448 449 // Walk the entire jobs table 450 iter, err := txn.Get("jobs", "id") 451 if err != nil { 452 return nil, err 453 } 454 return iter, nil 455 } 456 457 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 458 func (s *StateStore) JobsByPeriodic(periodic bool) (memdb.ResultIterator, error) { 459 txn := s.db.Txn(false) 460 461 iter, err := txn.Get("jobs", "periodic", periodic) 462 if err != nil { 463 return nil, err 464 } 465 return iter, nil 466 } 467 468 // JobsByScheduler returns an iterator over all the jobs with the specific 469 // scheduler type. 470 func (s *StateStore) JobsByScheduler(schedulerType string) (memdb.ResultIterator, error) { 471 txn := s.db.Txn(false) 472 473 // Return an iterator for jobs with the specific type. 474 iter, err := txn.Get("jobs", "type", schedulerType) 475 if err != nil { 476 return nil, err 477 } 478 return iter, nil 479 } 480 481 // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage 482 // collection. 483 func (s *StateStore) JobsByGC(gc bool) (memdb.ResultIterator, error) { 484 txn := s.db.Txn(false) 485 486 iter, err := txn.Get("jobs", "gc", gc) 487 if err != nil { 488 return nil, err 489 } 490 return iter, nil 491 } 492 493 // JobSummary returns a job summary object which matches a specific id. 494 func (s *StateStore) JobSummaryByID(jobID string) (*structs.JobSummary, error) { 495 txn := s.db.Txn(false) 496 497 existing, err := txn.First("job_summary", "id", jobID) 498 if err != nil { 499 return nil, err 500 } 501 if existing != nil { 502 summary := existing.(structs.JobSummary) 503 return summary.Copy(), nil 504 } 505 506 return nil, nil 507 } 508 509 // JobSummaries walks the entire job summary table and returns all the job 510 // summary objects 511 func (s *StateStore) JobSummaries() (memdb.ResultIterator, error) { 512 txn := s.db.Txn(false) 513 514 iter, err := txn.Get("job_summary", "id") 515 if err != nil { 516 return nil, err 517 } 518 return iter, nil 519 } 520 521 // JobSummaryByPrefix is used to look up Job Summary by id prefix 522 func (s *StateStore) JobSummaryByPrefix(id string) (memdb.ResultIterator, error) { 523 txn := s.db.Txn(false) 524 525 iter, err := txn.Get("job_summary", "id_prefix", id) 526 if err != nil { 527 return nil, fmt.Errorf("eval lookup failed: %v", err) 528 } 529 530 return iter, nil 531 } 532 533 // UpsertPeriodicLaunch is used to register a launch or update it. 534 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 535 txn := s.db.Txn(true) 536 defer txn.Abort() 537 538 watcher := watch.NewItems() 539 watcher.Add(watch.Item{Table: "periodic_launch"}) 540 watcher.Add(watch.Item{Job: launch.ID}) 541 542 // Check if the job already exists 543 existing, err := txn.First("periodic_launch", "id", launch.ID) 544 if err != nil { 545 return fmt.Errorf("periodic launch lookup failed: %v", err) 546 } 547 548 // Setup the indexes correctly 549 if existing != nil { 550 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 551 launch.ModifyIndex = index 552 } else { 553 launch.CreateIndex = index 554 launch.ModifyIndex = index 555 } 556 557 // Insert the job 558 if err := txn.Insert("periodic_launch", launch); err != nil { 559 return fmt.Errorf("launch insert failed: %v", err) 560 } 561 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 562 return fmt.Errorf("index update failed: %v", err) 563 } 564 565 txn.Defer(func() { s.watch.notify(watcher) }) 566 txn.Commit() 567 return nil 568 } 569 570 // DeletePeriodicLaunch is used to delete the periodic launch 571 func (s *StateStore) DeletePeriodicLaunch(index uint64, jobID string) error { 572 txn := s.db.Txn(true) 573 defer txn.Abort() 574 575 // Lookup the launch 576 existing, err := txn.First("periodic_launch", "id", jobID) 577 if err != nil { 578 return fmt.Errorf("launch lookup failed: %v", err) 579 } 580 if existing == nil { 581 return fmt.Errorf("launch not found") 582 } 583 584 watcher := watch.NewItems() 585 watcher.Add(watch.Item{Table: "periodic_launch"}) 586 watcher.Add(watch.Item{Job: jobID}) 587 588 // Delete the launch 589 if err := txn.Delete("periodic_launch", existing); err != nil { 590 return fmt.Errorf("launch delete failed: %v", err) 591 } 592 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 593 return fmt.Errorf("index update failed: %v", err) 594 } 595 596 txn.Defer(func() { s.watch.notify(watcher) }) 597 txn.Commit() 598 return nil 599 } 600 601 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 602 // ID. 603 func (s *StateStore) PeriodicLaunchByID(id string) (*structs.PeriodicLaunch, error) { 604 txn := s.db.Txn(false) 605 606 existing, err := txn.First("periodic_launch", "id", id) 607 if err != nil { 608 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 609 } 610 611 if existing != nil { 612 return existing.(*structs.PeriodicLaunch), nil 613 } 614 return nil, nil 615 } 616 617 // PeriodicLaunches returns an iterator over all the periodic launches 618 func (s *StateStore) PeriodicLaunches() (memdb.ResultIterator, error) { 619 txn := s.db.Txn(false) 620 621 // Walk the entire table 622 iter, err := txn.Get("periodic_launch", "id") 623 if err != nil { 624 return nil, err 625 } 626 return iter, nil 627 } 628 629 // UpsertEvaluation is used to upsert an evaluation 630 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { 631 txn := s.db.Txn(true) 632 defer txn.Abort() 633 634 watcher := watch.NewItems() 635 watcher.Add(watch.Item{Table: "evals"}) 636 637 // Do a nested upsert 638 jobs := make(map[string]string, len(evals)) 639 for _, eval := range evals { 640 watcher.Add(watch.Item{Eval: eval.ID}) 641 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 642 return err 643 } 644 645 jobs[eval.JobID] = "" 646 } 647 648 // Set the job's status 649 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 650 return fmt.Errorf("setting job status failed: %v", err) 651 } 652 653 txn.Defer(func() { s.watch.notify(watcher) }) 654 txn.Commit() 655 return nil 656 } 657 658 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 659 func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { 660 // Lookup the evaluation 661 existing, err := txn.First("evals", "id", eval.ID) 662 if err != nil { 663 return fmt.Errorf("eval lookup failed: %v", err) 664 } 665 666 // Update the indexes 667 if existing != nil { 668 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 669 eval.ModifyIndex = index 670 } else { 671 eval.CreateIndex = index 672 eval.ModifyIndex = index 673 } 674 675 // Update the job summary 676 summaryRaw, err := txn.First("job_summary", "id", eval.JobID) 677 if err != nil { 678 return fmt.Errorf("job summary lookup failed: %v", err) 679 } 680 if summaryRaw != nil { 681 js := summaryRaw.(structs.JobSummary) 682 var hasSummaryChanged bool 683 for tg, num := range eval.QueuedAllocations { 684 if summary, ok := js.Summary[tg]; ok { 685 if summary.Queued != num { 686 summary.Queued = num 687 js.Summary[tg] = summary 688 hasSummaryChanged = true 689 } 690 } else { 691 s.logger.Printf("[ERR] state_store: unable to update queued for job %q and task group %q", eval.JobID, tg) 692 } 693 } 694 695 // Insert the job summary 696 if hasSummaryChanged { 697 js.ModifyIndex = index 698 if err := txn.Insert("job_summary", js); err != nil { 699 return fmt.Errorf("job summary insert failed: %v", err) 700 } 701 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 702 return fmt.Errorf("index update failed: %v", err) 703 } 704 } 705 } 706 707 // Insert the eval 708 if err := txn.Insert("evals", eval); err != nil { 709 return fmt.Errorf("eval insert failed: %v", err) 710 } 711 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 712 return fmt.Errorf("index update failed: %v", err) 713 } 714 return nil 715 } 716 717 // DeleteEval is used to delete an evaluation 718 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 719 txn := s.db.Txn(true) 720 defer txn.Abort() 721 watcher := watch.NewItems() 722 watcher.Add(watch.Item{Table: "evals"}) 723 watcher.Add(watch.Item{Table: "allocs"}) 724 725 jobs := make(map[string]string, len(evals)) 726 for _, eval := range evals { 727 existing, err := txn.First("evals", "id", eval) 728 if err != nil { 729 return fmt.Errorf("eval lookup failed: %v", err) 730 } 731 if existing == nil { 732 continue 733 } 734 if err := txn.Delete("evals", existing); err != nil { 735 return fmt.Errorf("eval delete failed: %v", err) 736 } 737 watcher.Add(watch.Item{Eval: eval}) 738 jobs[existing.(*structs.Evaluation).JobID] = "" 739 } 740 741 for _, alloc := range allocs { 742 existing, err := txn.First("allocs", "id", alloc) 743 if err != nil { 744 return fmt.Errorf("alloc lookup failed: %v", err) 745 } 746 if existing == nil { 747 continue 748 } 749 if err := txn.Delete("allocs", existing); err != nil { 750 return fmt.Errorf("alloc delete failed: %v", err) 751 } 752 realAlloc := existing.(*structs.Allocation) 753 watcher.Add(watch.Item{Alloc: realAlloc.ID}) 754 watcher.Add(watch.Item{AllocEval: realAlloc.EvalID}) 755 watcher.Add(watch.Item{AllocJob: realAlloc.JobID}) 756 watcher.Add(watch.Item{AllocNode: realAlloc.NodeID}) 757 } 758 759 // Update the indexes 760 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 761 return fmt.Errorf("index update failed: %v", err) 762 } 763 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 764 return fmt.Errorf("index update failed: %v", err) 765 } 766 767 // Set the job's status 768 if err := s.setJobStatuses(index, watcher, txn, jobs, true); err != nil { 769 return fmt.Errorf("setting job status failed: %v", err) 770 } 771 772 txn.Defer(func() { s.watch.notify(watcher) }) 773 txn.Commit() 774 return nil 775 } 776 777 // EvalByID is used to lookup an eval by its ID 778 func (s *StateStore) EvalByID(id string) (*structs.Evaluation, error) { 779 txn := s.db.Txn(false) 780 781 existing, err := txn.First("evals", "id", id) 782 if err != nil { 783 return nil, fmt.Errorf("eval lookup failed: %v", err) 784 } 785 786 if existing != nil { 787 return existing.(*structs.Evaluation), nil 788 } 789 return nil, nil 790 } 791 792 // EvalsByIDPrefix is used to lookup evaluations by prefix 793 func (s *StateStore) EvalsByIDPrefix(id string) (memdb.ResultIterator, error) { 794 txn := s.db.Txn(false) 795 796 iter, err := txn.Get("evals", "id_prefix", id) 797 if err != nil { 798 return nil, fmt.Errorf("eval lookup failed: %v", err) 799 } 800 801 return iter, nil 802 } 803 804 // EvalsByJob returns all the evaluations by job id 805 func (s *StateStore) EvalsByJob(jobID string) ([]*structs.Evaluation, error) { 806 txn := s.db.Txn(false) 807 808 // Get an iterator over the node allocations 809 iter, err := txn.Get("evals", "job", jobID) 810 if err != nil { 811 return nil, err 812 } 813 814 var out []*structs.Evaluation 815 for { 816 raw := iter.Next() 817 if raw == nil { 818 break 819 } 820 out = append(out, raw.(*structs.Evaluation)) 821 } 822 return out, nil 823 } 824 825 // Evals returns an iterator over all the evaluations 826 func (s *StateStore) Evals() (memdb.ResultIterator, error) { 827 txn := s.db.Txn(false) 828 829 // Walk the entire table 830 iter, err := txn.Get("evals", "id") 831 if err != nil { 832 return nil, err 833 } 834 return iter, nil 835 } 836 837 // UpdateAllocsFromClient is used to update an allocation based on input 838 839 // from a client. While the schedulers are the authority on the allocation for 840 // most things, some updates are authoritative from the client. Specifically, 841 // the desired state comes from the schedulers, while the actual state comes 842 // from clients. 843 func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error { 844 txn := s.db.Txn(true) 845 defer txn.Abort() 846 847 // Setup the watcher 848 watcher := watch.NewItems() 849 watcher.Add(watch.Item{Table: "allocs"}) 850 851 // Handle each of the updated allocations 852 for _, alloc := range allocs { 853 if err := s.nestedUpdateAllocFromClient(txn, watcher, index, alloc); err != nil { 854 return err 855 } 856 } 857 858 // Update the indexes 859 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 860 return fmt.Errorf("index update failed: %v", err) 861 } 862 863 txn.Defer(func() { s.watch.notify(watcher) }) 864 txn.Commit() 865 return nil 866 } 867 868 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 869 func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, watcher watch.Items, index uint64, alloc *structs.Allocation) error { 870 // Look for existing alloc 871 existing, err := txn.First("allocs", "id", alloc.ID) 872 if err != nil { 873 return fmt.Errorf("alloc lookup failed: %v", err) 874 } 875 876 // Nothing to do if this does not exist 877 if existing == nil { 878 return nil 879 } 880 exist := existing.(*structs.Allocation) 881 // Trigger the watcher 882 watcher.Add(watch.Item{Alloc: alloc.ID}) 883 watcher.Add(watch.Item{AllocEval: exist.EvalID}) 884 watcher.Add(watch.Item{AllocJob: exist.JobID}) 885 watcher.Add(watch.Item{AllocNode: exist.NodeID}) 886 887 // Copy everything from the existing allocation 888 copyAlloc := new(structs.Allocation) 889 *copyAlloc = *exist 890 891 // Pull in anything the client is the authority on 892 copyAlloc.ClientStatus = alloc.ClientStatus 893 copyAlloc.ClientDescription = alloc.ClientDescription 894 copyAlloc.TaskStates = alloc.TaskStates 895 896 // Update the modify index 897 copyAlloc.ModifyIndex = index 898 899 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, watcher, txn); err != nil { 900 return fmt.Errorf("error updating job summary: %v", err) 901 } 902 903 // Update the allocation 904 if err := txn.Insert("allocs", copyAlloc); err != nil { 905 return fmt.Errorf("alloc insert failed: %v", err) 906 } 907 908 // Set the job's status 909 forceStatus := "" 910 if !copyAlloc.TerminalStatus() { 911 forceStatus = structs.JobStatusRunning 912 } 913 jobs := map[string]string{exist.JobID: forceStatus} 914 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 915 return fmt.Errorf("setting job status failed: %v", err) 916 } 917 return nil 918 } 919 920 // UpsertAllocs is used to evict a set of allocations 921 // and allocate new ones at the same time. 922 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { 923 txn := s.db.Txn(true) 924 defer txn.Abort() 925 926 watcher := watch.NewItems() 927 watcher.Add(watch.Item{Table: "allocs"}) 928 929 // Handle the allocations 930 jobs := make(map[string]string, 1) 931 for _, alloc := range allocs { 932 existing, err := txn.First("allocs", "id", alloc.ID) 933 if err != nil { 934 return fmt.Errorf("alloc lookup failed: %v", err) 935 } 936 exist, _ := existing.(*structs.Allocation) 937 938 if exist == nil { 939 alloc.CreateIndex = index 940 alloc.ModifyIndex = index 941 alloc.AllocModifyIndex = index 942 } else { 943 alloc.CreateIndex = exist.CreateIndex 944 alloc.ModifyIndex = index 945 alloc.AllocModifyIndex = index 946 947 // If the scheduler is marking this allocation as lost we do not 948 // want to reuse the status of the existing allocation. 949 if alloc.ClientStatus != structs.AllocClientStatusLost { 950 alloc.ClientStatus = exist.ClientStatus 951 alloc.ClientDescription = exist.ClientDescription 952 } 953 954 // The job has been denormalized so re-attach the original job 955 if alloc.Job == nil { 956 alloc.Job = exist.Job 957 } 958 } 959 960 if err := s.updateSummaryWithAlloc(index, alloc, exist, watcher, txn); err != nil { 961 return fmt.Errorf("error updating job summary: %v", err) 962 } 963 964 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 965 // COMPAT 0.4.1 -> 0.5 966 if alloc.Job != nil { 967 s.addEphemeralDiskToTaskGroups(alloc.Job) 968 } 969 970 if err := txn.Insert("allocs", alloc); err != nil { 971 return fmt.Errorf("alloc insert failed: %v", err) 972 } 973 974 // If the allocation is running, force the job to running status. 975 forceStatus := "" 976 if !alloc.TerminalStatus() { 977 forceStatus = structs.JobStatusRunning 978 } 979 jobs[alloc.JobID] = forceStatus 980 981 watcher.Add(watch.Item{Alloc: alloc.ID}) 982 watcher.Add(watch.Item{AllocEval: alloc.EvalID}) 983 watcher.Add(watch.Item{AllocJob: alloc.JobID}) 984 watcher.Add(watch.Item{AllocNode: alloc.NodeID}) 985 } 986 987 // Update the indexes 988 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 989 return fmt.Errorf("index update failed: %v", err) 990 } 991 992 // Set the job's status 993 if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { 994 return fmt.Errorf("setting job status failed: %v", err) 995 } 996 997 txn.Defer(func() { s.watch.notify(watcher) }) 998 txn.Commit() 999 return nil 1000 } 1001 1002 // AllocByID is used to lookup an allocation by its ID 1003 func (s *StateStore) AllocByID(id string) (*structs.Allocation, error) { 1004 txn := s.db.Txn(false) 1005 1006 existing, err := txn.First("allocs", "id", id) 1007 if err != nil { 1008 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1009 } 1010 1011 if existing != nil { 1012 return existing.(*structs.Allocation), nil 1013 } 1014 return nil, nil 1015 } 1016 1017 // AllocsByIDPrefix is used to lookup allocs by prefix 1018 func (s *StateStore) AllocsByIDPrefix(id string) (memdb.ResultIterator, error) { 1019 txn := s.db.Txn(false) 1020 1021 iter, err := txn.Get("allocs", "id_prefix", id) 1022 if err != nil { 1023 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1024 } 1025 1026 return iter, nil 1027 } 1028 1029 // AllocsByNode returns all the allocations by node 1030 func (s *StateStore) AllocsByNode(node string) ([]*structs.Allocation, error) { 1031 txn := s.db.Txn(false) 1032 1033 // Get an iterator over the node allocations, using only the 1034 // node prefix which ignores the terminal status 1035 iter, err := txn.Get("allocs", "node_prefix", node) 1036 if err != nil { 1037 return nil, err 1038 } 1039 1040 var out []*structs.Allocation 1041 for { 1042 raw := iter.Next() 1043 if raw == nil { 1044 break 1045 } 1046 out = append(out, raw.(*structs.Allocation)) 1047 } 1048 return out, nil 1049 } 1050 1051 // AllocsByNode returns all the allocations by node and terminal status 1052 func (s *StateStore) AllocsByNodeTerminal(node string, terminal bool) ([]*structs.Allocation, error) { 1053 txn := s.db.Txn(false) 1054 1055 // Get an iterator over the node allocations 1056 iter, err := txn.Get("allocs", "node", node, terminal) 1057 if err != nil { 1058 return nil, err 1059 } 1060 1061 var out []*structs.Allocation 1062 for { 1063 raw := iter.Next() 1064 if raw == nil { 1065 break 1066 } 1067 out = append(out, raw.(*structs.Allocation)) 1068 } 1069 return out, nil 1070 } 1071 1072 // AllocsByJob returns all the allocations by job id 1073 func (s *StateStore) AllocsByJob(jobID string) ([]*structs.Allocation, error) { 1074 txn := s.db.Txn(false) 1075 1076 // Get an iterator over the node allocations 1077 iter, err := txn.Get("allocs", "job", jobID) 1078 if err != nil { 1079 return nil, err 1080 } 1081 1082 var out []*structs.Allocation 1083 for { 1084 raw := iter.Next() 1085 if raw == nil { 1086 break 1087 } 1088 out = append(out, raw.(*structs.Allocation)) 1089 } 1090 return out, nil 1091 } 1092 1093 // AllocsByEval returns all the allocations by eval id 1094 func (s *StateStore) AllocsByEval(evalID string) ([]*structs.Allocation, error) { 1095 txn := s.db.Txn(false) 1096 1097 // Get an iterator over the eval allocations 1098 iter, err := txn.Get("allocs", "eval", evalID) 1099 if err != nil { 1100 return nil, err 1101 } 1102 1103 var out []*structs.Allocation 1104 for { 1105 raw := iter.Next() 1106 if raw == nil { 1107 break 1108 } 1109 out = append(out, raw.(*structs.Allocation)) 1110 } 1111 return out, nil 1112 } 1113 1114 // Allocs returns an iterator over all the evaluations 1115 func (s *StateStore) Allocs() (memdb.ResultIterator, error) { 1116 txn := s.db.Txn(false) 1117 1118 // Walk the entire table 1119 iter, err := txn.Get("allocs", "id") 1120 if err != nil { 1121 return nil, err 1122 } 1123 return iter, nil 1124 } 1125 1126 // UpsertVaultAccessors is used to register a set of Vault Accessors 1127 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 1128 txn := s.db.Txn(true) 1129 defer txn.Abort() 1130 1131 for _, accessor := range accessors { 1132 // Set the create index 1133 accessor.CreateIndex = index 1134 1135 // Insert the accessor 1136 if err := txn.Insert("vault_accessors", accessor); err != nil { 1137 return fmt.Errorf("accessor insert failed: %v", err) 1138 } 1139 } 1140 1141 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 1142 return fmt.Errorf("index update failed: %v", err) 1143 } 1144 1145 txn.Commit() 1146 return nil 1147 } 1148 1149 // DeleteVaultAccessors is used to delete a set of Vault Accessors 1150 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 1151 txn := s.db.Txn(true) 1152 defer txn.Abort() 1153 1154 // Lookup the accessor 1155 for _, accessor := range accessors { 1156 // Delete the accessor 1157 if err := txn.Delete("vault_accessors", accessor); err != nil { 1158 return fmt.Errorf("accessor delete failed: %v", err) 1159 } 1160 } 1161 1162 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 1163 return fmt.Errorf("index update failed: %v", err) 1164 } 1165 1166 txn.Commit() 1167 return nil 1168 } 1169 1170 // VaultAccessor returns the given Vault accessor 1171 func (s *StateStore) VaultAccessor(accessor string) (*structs.VaultAccessor, error) { 1172 txn := s.db.Txn(false) 1173 1174 existing, err := txn.First("vault_accessors", "id", accessor) 1175 if err != nil { 1176 return nil, fmt.Errorf("accessor lookup failed: %v", err) 1177 } 1178 1179 if existing != nil { 1180 return existing.(*structs.VaultAccessor), nil 1181 } 1182 1183 return nil, nil 1184 } 1185 1186 // VaultAccessors returns an iterator of Vault accessors. 1187 func (s *StateStore) VaultAccessors() (memdb.ResultIterator, error) { 1188 txn := s.db.Txn(false) 1189 1190 iter, err := txn.Get("vault_accessors", "id") 1191 if err != nil { 1192 return nil, err 1193 } 1194 return iter, nil 1195 } 1196 1197 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 1198 func (s *StateStore) VaultAccessorsByAlloc(allocID string) ([]*structs.VaultAccessor, error) { 1199 txn := s.db.Txn(false) 1200 1201 // Get an iterator over the accessors 1202 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 1203 if err != nil { 1204 return nil, err 1205 } 1206 1207 var out []*structs.VaultAccessor 1208 for { 1209 raw := iter.Next() 1210 if raw == nil { 1211 break 1212 } 1213 out = append(out, raw.(*structs.VaultAccessor)) 1214 } 1215 return out, nil 1216 } 1217 1218 // VaultAccessorsByNode returns all the Vault accessors by node id 1219 func (s *StateStore) VaultAccessorsByNode(nodeID string) ([]*structs.VaultAccessor, error) { 1220 txn := s.db.Txn(false) 1221 1222 // Get an iterator over the accessors 1223 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 1224 if err != nil { 1225 return nil, err 1226 } 1227 1228 var out []*structs.VaultAccessor 1229 for { 1230 raw := iter.Next() 1231 if raw == nil { 1232 break 1233 } 1234 out = append(out, raw.(*structs.VaultAccessor)) 1235 } 1236 return out, nil 1237 } 1238 1239 // LastIndex returns the greatest index value for all indexes 1240 func (s *StateStore) LatestIndex() (uint64, error) { 1241 indexes, err := s.Indexes() 1242 if err != nil { 1243 return 0, err 1244 } 1245 1246 var max uint64 = 0 1247 for { 1248 raw := indexes.Next() 1249 if raw == nil { 1250 break 1251 } 1252 1253 // Prepare the request struct 1254 idx := raw.(*IndexEntry) 1255 1256 // Determine the max 1257 if idx.Value > max { 1258 max = idx.Value 1259 } 1260 } 1261 1262 return max, nil 1263 } 1264 1265 // Index finds the matching index value 1266 func (s *StateStore) Index(name string) (uint64, error) { 1267 txn := s.db.Txn(false) 1268 1269 // Lookup the first matching index 1270 out, err := txn.First("index", "id", name) 1271 if err != nil { 1272 return 0, err 1273 } 1274 if out == nil { 1275 return 0, nil 1276 } 1277 return out.(*IndexEntry).Value, nil 1278 } 1279 1280 // RemoveIndex is a helper method to remove an index for testing purposes 1281 func (s *StateStore) RemoveIndex(name string) error { 1282 txn := s.db.Txn(true) 1283 defer txn.Abort() 1284 1285 if _, err := txn.DeleteAll("index", "id", name); err != nil { 1286 return err 1287 } 1288 1289 txn.Commit() 1290 return nil 1291 } 1292 1293 // Indexes returns an iterator over all the indexes 1294 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 1295 txn := s.db.Txn(false) 1296 1297 // Walk the entire nodes table 1298 iter, err := txn.Get("index", "id") 1299 if err != nil { 1300 return nil, err 1301 } 1302 return iter, nil 1303 } 1304 1305 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 1306 // store 1307 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 1308 txn := s.db.Txn(true) 1309 defer txn.Abort() 1310 1311 // Get all the jobs 1312 iter, err := txn.Get("jobs", "id") 1313 if err != nil { 1314 return err 1315 } 1316 for { 1317 rawJob := iter.Next() 1318 if rawJob == nil { 1319 break 1320 } 1321 job := rawJob.(*structs.Job) 1322 1323 // Create a job summary for the job 1324 summary := structs.JobSummary{ 1325 JobID: job.ID, 1326 Summary: make(map[string]structs.TaskGroupSummary), 1327 } 1328 for _, tg := range job.TaskGroups { 1329 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 1330 } 1331 1332 // Find all the allocations for the jobs 1333 iterAllocs, err := txn.Get("allocs", "job", job.ID) 1334 if err != nil { 1335 return err 1336 } 1337 1338 // Calculate the summary for the job 1339 for { 1340 rawAlloc := iterAllocs.Next() 1341 if rawAlloc == nil { 1342 break 1343 } 1344 alloc := rawAlloc.(*structs.Allocation) 1345 1346 // Ignore the allocation if it doesn't belong to the currently 1347 // registered job 1348 if alloc.Job.CreateIndex != job.CreateIndex { 1349 continue 1350 } 1351 1352 tg := summary.Summary[alloc.TaskGroup] 1353 switch alloc.ClientStatus { 1354 case structs.AllocClientStatusFailed: 1355 tg.Failed += 1 1356 case structs.AllocClientStatusLost: 1357 tg.Lost += 1 1358 case structs.AllocClientStatusComplete: 1359 tg.Complete += 1 1360 case structs.AllocClientStatusRunning: 1361 tg.Running += 1 1362 case structs.AllocClientStatusPending: 1363 tg.Starting += 1 1364 default: 1365 s.logger.Printf("[ERR] state_store: invalid client status: %v in allocation %q", alloc.ClientStatus, alloc.ID) 1366 } 1367 summary.Summary[alloc.TaskGroup] = tg 1368 } 1369 1370 // Set the create index of the summary same as the job's create index 1371 // and the modify index to the current index 1372 summary.CreateIndex = job.CreateIndex 1373 summary.ModifyIndex = index 1374 1375 // Insert the job summary 1376 if err := txn.Insert("job_summary", summary); err != nil { 1377 return fmt.Errorf("error inserting job summary: %v", err) 1378 } 1379 } 1380 1381 // Update the indexes table for job summary 1382 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1383 return fmt.Errorf("index update failed: %v", err) 1384 } 1385 txn.Commit() 1386 return nil 1387 } 1388 1389 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 1390 // It takes a map of job IDs to an optional forceStatus string. It returns an 1391 // error if the job doesn't exist or setJobStatus fails. 1392 func (s *StateStore) setJobStatuses(index uint64, watcher watch.Items, txn *memdb.Txn, 1393 jobs map[string]string, evalDelete bool) error { 1394 for job, forceStatus := range jobs { 1395 existing, err := txn.First("jobs", "id", job) 1396 if err != nil { 1397 return fmt.Errorf("job lookup failed: %v", err) 1398 } 1399 1400 if existing == nil { 1401 continue 1402 } 1403 1404 if err := s.setJobStatus(index, watcher, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 1405 return err 1406 } 1407 } 1408 1409 return nil 1410 } 1411 1412 // setJobStatus sets the status of the job by looking up associated evaluations 1413 // and allocations. evalDelete should be set to true if setJobStatus is being 1414 // called because an evaluation is being deleted (potentially because of garbage 1415 // collection). If forceStatus is non-empty, the job's status will be set to the 1416 // passed status. 1417 func (s *StateStore) setJobStatus(index uint64, watcher watch.Items, txn *memdb.Txn, 1418 job *structs.Job, evalDelete bool, forceStatus string) error { 1419 1420 // Capture the current status so we can check if there is a change 1421 oldStatus := job.Status 1422 newStatus := forceStatus 1423 1424 // If forceStatus is not set, compute the jobs status. 1425 if forceStatus == "" { 1426 var err error 1427 newStatus, err = s.getJobStatus(txn, job, evalDelete) 1428 if err != nil { 1429 return err 1430 } 1431 } 1432 1433 // Fast-path if nothing has changed. 1434 if oldStatus == newStatus { 1435 return nil 1436 } 1437 1438 // The job has changed, so add to watcher. 1439 watcher.Add(watch.Item{Table: "jobs"}) 1440 watcher.Add(watch.Item{Job: job.ID}) 1441 1442 // Copy and update the existing job 1443 updated := job.Copy() 1444 updated.Status = newStatus 1445 updated.ModifyIndex = index 1446 1447 // Insert the job 1448 if err := txn.Insert("jobs", updated); err != nil { 1449 return fmt.Errorf("job insert failed: %v", err) 1450 } 1451 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1452 return fmt.Errorf("index update failed: %v", err) 1453 } 1454 return nil 1455 } 1456 1457 func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) { 1458 allocs, err := txn.Get("allocs", "job", job.ID) 1459 if err != nil { 1460 return "", err 1461 } 1462 1463 // If there is a non-terminal allocation, the job is running. 1464 hasAlloc := false 1465 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 1466 hasAlloc = true 1467 if !alloc.(*structs.Allocation).TerminalStatus() { 1468 return structs.JobStatusRunning, nil 1469 } 1470 } 1471 1472 evals, err := txn.Get("evals", "job", job.ID) 1473 if err != nil { 1474 return "", err 1475 } 1476 1477 hasEval := false 1478 for eval := evals.Next(); eval != nil; eval = evals.Next() { 1479 hasEval = true 1480 if !eval.(*structs.Evaluation).TerminalStatus() { 1481 return structs.JobStatusPending, nil 1482 } 1483 } 1484 1485 // The job is dead if all the allocations and evals are terminal or if there 1486 // are no evals because of garbage collection. 1487 if evalDelete || hasEval || hasAlloc { 1488 return structs.JobStatusDead, nil 1489 } 1490 1491 // If there are no allocations or evaluations it is a new job. If the job is 1492 // periodic, we mark it as running as it will never have an 1493 // allocation/evaluation against it. 1494 if job.IsPeriodic() { 1495 return structs.JobStatusRunning, nil 1496 } 1497 return structs.JobStatusPending, nil 1498 } 1499 1500 // updateSummaryWithJob creates or updates job summaries when new jobs are 1501 // upserted or existing ones are updated 1502 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 1503 watcher watch.Items, txn *memdb.Txn) error { 1504 1505 existing, err := s.JobSummaryByID(job.ID) 1506 if err != nil { 1507 return fmt.Errorf("unable to retrieve summary for job: %v", err) 1508 } 1509 var hasSummaryChanged bool 1510 if existing == nil { 1511 existing = &structs.JobSummary{ 1512 JobID: job.ID, 1513 Summary: make(map[string]structs.TaskGroupSummary), 1514 CreateIndex: index, 1515 } 1516 hasSummaryChanged = true 1517 } 1518 for _, tg := range job.TaskGroups { 1519 if _, ok := existing.Summary[tg.Name]; !ok { 1520 newSummary := structs.TaskGroupSummary{ 1521 Complete: 0, 1522 Failed: 0, 1523 Running: 0, 1524 Starting: 0, 1525 } 1526 existing.Summary[tg.Name] = newSummary 1527 hasSummaryChanged = true 1528 } 1529 } 1530 1531 // The job summary has changed, so add to watcher and update the modify 1532 // index. 1533 if hasSummaryChanged { 1534 existing.ModifyIndex = index 1535 watcher.Add(watch.Item{Table: "job_summary"}) 1536 watcher.Add(watch.Item{JobSummary: job.ID}) 1537 1538 // Update the indexes table for job summary 1539 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1540 return fmt.Errorf("index update failed: %v", err) 1541 } 1542 if err := txn.Insert("job_summary", *existing); err != nil { 1543 return err 1544 } 1545 } 1546 1547 return nil 1548 } 1549 1550 // updateSummaryWithAlloc updates the job summary when allocations are updated 1551 // or inserted 1552 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 1553 existingAlloc *structs.Allocation, watcher watch.Items, txn *memdb.Txn) error { 1554 1555 // We don't have to update the summary if the job is missing 1556 if alloc.Job == nil { 1557 return nil 1558 } 1559 1560 summaryRaw, err := txn.First("job_summary", "id", alloc.JobID) 1561 if err != nil { 1562 return fmt.Errorf("unable to lookup job summary for job id %q: %v", err) 1563 } 1564 if summaryRaw == nil { 1565 // Check if the job is de-registered 1566 rawJob, err := txn.First("jobs", "id", alloc.JobID) 1567 if err != nil { 1568 return fmt.Errorf("unable to query job: %v", err) 1569 } 1570 1571 // If the job is de-registered then we skip updating it's summary 1572 if rawJob == nil { 1573 return nil 1574 } 1575 return fmt.Errorf("job summary for job %q is not present", alloc.JobID) 1576 } 1577 summary := summaryRaw.(structs.JobSummary) 1578 jobSummary := summary.Copy() 1579 1580 // Not updating the job summary because the allocation doesn't belong to the 1581 // currently registered job 1582 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 1583 return nil 1584 } 1585 1586 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 1587 if !ok { 1588 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 1589 } 1590 var summaryChanged bool 1591 if existingAlloc == nil { 1592 switch alloc.DesiredStatus { 1593 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 1594 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 1595 alloc.ID, alloc.DesiredStatus) 1596 } 1597 switch alloc.ClientStatus { 1598 case structs.AllocClientStatusPending: 1599 tgSummary.Starting += 1 1600 if tgSummary.Queued > 0 { 1601 tgSummary.Queued -= 1 1602 } 1603 summaryChanged = true 1604 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 1605 structs.AllocClientStatusComplete: 1606 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 1607 alloc.ID, alloc.ClientStatus) 1608 } 1609 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 1610 // Incrementing the client of the bin of the current state 1611 switch alloc.ClientStatus { 1612 case structs.AllocClientStatusRunning: 1613 tgSummary.Running += 1 1614 case structs.AllocClientStatusFailed: 1615 tgSummary.Failed += 1 1616 case structs.AllocClientStatusPending: 1617 tgSummary.Starting += 1 1618 case structs.AllocClientStatusComplete: 1619 tgSummary.Complete += 1 1620 case structs.AllocClientStatusLost: 1621 tgSummary.Lost += 1 1622 } 1623 1624 // Decrementing the count of the bin of the last state 1625 switch existingAlloc.ClientStatus { 1626 case structs.AllocClientStatusRunning: 1627 tgSummary.Running -= 1 1628 case structs.AllocClientStatusPending: 1629 tgSummary.Starting -= 1 1630 case structs.AllocClientStatusLost: 1631 tgSummary.Lost -= 1 1632 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 1633 default: 1634 s.logger.Printf("[ERR] state_store: invalid old state of allocation with id: %v, and state: %v", 1635 existingAlloc.ID, existingAlloc.ClientStatus) 1636 } 1637 summaryChanged = true 1638 } 1639 jobSummary.Summary[alloc.TaskGroup] = tgSummary 1640 1641 if summaryChanged { 1642 jobSummary.ModifyIndex = index 1643 watcher.Add(watch.Item{Table: "job_summary"}) 1644 watcher.Add(watch.Item{JobSummary: alloc.JobID}) 1645 1646 // Update the indexes table for job summary 1647 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1648 return fmt.Errorf("index update failed: %v", err) 1649 } 1650 1651 if err := txn.Insert("job_summary", *jobSummary); err != nil { 1652 return fmt.Errorf("updating job summary failed: %v", err) 1653 } 1654 } 1655 1656 return nil 1657 } 1658 1659 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 1660 func (s *StateStore) addEphemeralDiskToTaskGroups(job *structs.Job) { 1661 for _, tg := range job.TaskGroups { 1662 if tg.EphemeralDisk != nil { 1663 continue 1664 } 1665 var diskMB int 1666 for _, task := range tg.Tasks { 1667 if task.Resources != nil { 1668 diskMB += task.Resources.DiskMB 1669 task.Resources.DiskMB = 0 1670 } 1671 } 1672 tg.EphemeralDisk = &structs.EphemeralDisk{ 1673 SizeMB: diskMB, 1674 } 1675 } 1676 } 1677 1678 // StateSnapshot is used to provide a point-in-time snapshot 1679 type StateSnapshot struct { 1680 StateStore 1681 } 1682 1683 // StateRestore is used to optimize the performance when 1684 // restoring state by only using a single large transaction 1685 // instead of thousands of sub transactions 1686 type StateRestore struct { 1687 txn *memdb.Txn 1688 watch *stateWatch 1689 items watch.Items 1690 } 1691 1692 // Abort is used to abort the restore operation 1693 func (s *StateRestore) Abort() { 1694 s.txn.Abort() 1695 } 1696 1697 // Commit is used to commit the restore operation 1698 func (s *StateRestore) Commit() { 1699 s.txn.Defer(func() { s.watch.notify(s.items) }) 1700 s.txn.Commit() 1701 } 1702 1703 // NodeRestore is used to restore a node 1704 func (r *StateRestore) NodeRestore(node *structs.Node) error { 1705 r.items.Add(watch.Item{Table: "nodes"}) 1706 r.items.Add(watch.Item{Node: node.ID}) 1707 if err := r.txn.Insert("nodes", node); err != nil { 1708 return fmt.Errorf("node insert failed: %v", err) 1709 } 1710 return nil 1711 } 1712 1713 // JobRestore is used to restore a job 1714 func (r *StateRestore) JobRestore(job *structs.Job) error { 1715 r.items.Add(watch.Item{Table: "jobs"}) 1716 r.items.Add(watch.Item{Job: job.ID}) 1717 1718 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 1719 // COMPAT 0.4.1 -> 0.5 1720 r.addEphemeralDiskToTaskGroups(job) 1721 1722 if err := r.txn.Insert("jobs", job); err != nil { 1723 return fmt.Errorf("job insert failed: %v", err) 1724 } 1725 return nil 1726 } 1727 1728 // EvalRestore is used to restore an evaluation 1729 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 1730 r.items.Add(watch.Item{Table: "evals"}) 1731 r.items.Add(watch.Item{Eval: eval.ID}) 1732 if err := r.txn.Insert("evals", eval); err != nil { 1733 return fmt.Errorf("eval insert failed: %v", err) 1734 } 1735 return nil 1736 } 1737 1738 // AllocRestore is used to restore an allocation 1739 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 1740 r.items.Add(watch.Item{Table: "allocs"}) 1741 r.items.Add(watch.Item{Alloc: alloc.ID}) 1742 r.items.Add(watch.Item{AllocEval: alloc.EvalID}) 1743 r.items.Add(watch.Item{AllocJob: alloc.JobID}) 1744 r.items.Add(watch.Item{AllocNode: alloc.NodeID}) 1745 1746 // Set the shared resources if it's not present 1747 // COMPAT 0.4.1 -> 0.5 1748 if alloc.SharedResources == nil { 1749 alloc.SharedResources = &structs.Resources{ 1750 DiskMB: alloc.Resources.DiskMB, 1751 } 1752 } 1753 1754 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 1755 if alloc.Job != nil { 1756 r.addEphemeralDiskToTaskGroups(alloc.Job) 1757 } 1758 1759 if err := r.txn.Insert("allocs", alloc); err != nil { 1760 return fmt.Errorf("alloc insert failed: %v", err) 1761 } 1762 return nil 1763 } 1764 1765 // IndexRestore is used to restore an index 1766 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 1767 if err := r.txn.Insert("index", idx); err != nil { 1768 return fmt.Errorf("index insert failed: %v", err) 1769 } 1770 return nil 1771 } 1772 1773 // PeriodicLaunchRestore is used to restore a periodic launch. 1774 func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { 1775 r.items.Add(watch.Item{Table: "periodic_launch"}) 1776 r.items.Add(watch.Item{Job: launch.ID}) 1777 if err := r.txn.Insert("periodic_launch", launch); err != nil { 1778 return fmt.Errorf("periodic launch insert failed: %v", err) 1779 } 1780 return nil 1781 } 1782 1783 // JobSummaryRestore is used to restore a job summary 1784 func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error { 1785 if err := r.txn.Insert("job_summary", *jobSummary); err != nil { 1786 return fmt.Errorf("job summary insert failed: %v", err) 1787 } 1788 return nil 1789 } 1790 1791 // VaultAccessorRestore is used to restore a vault accessor 1792 func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error { 1793 if err := r.txn.Insert("vault_accessors", accessor); err != nil { 1794 return fmt.Errorf("vault accessor insert failed: %v", err) 1795 } 1796 return nil 1797 } 1798 1799 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 1800 func (r *StateRestore) addEphemeralDiskToTaskGroups(job *structs.Job) { 1801 for _, tg := range job.TaskGroups { 1802 if tg.EphemeralDisk != nil { 1803 continue 1804 } 1805 var sizeMB int 1806 for _, task := range tg.Tasks { 1807 if task.Resources != nil { 1808 sizeMB += task.Resources.DiskMB 1809 task.Resources.DiskMB = 0 1810 } 1811 } 1812 tg.EphemeralDisk = &structs.EphemeralDisk{ 1813 SizeMB: sizeMB, 1814 } 1815 } 1816 } 1817 1818 // stateWatch holds shared state for watching updates. This is 1819 // outside of StateStore so it can be shared with snapshots. 1820 type stateWatch struct { 1821 items map[watch.Item]*NotifyGroup 1822 l sync.Mutex 1823 } 1824 1825 // newStateWatch creates a new stateWatch for change notification. 1826 func newStateWatch() *stateWatch { 1827 return &stateWatch{ 1828 items: make(map[watch.Item]*NotifyGroup), 1829 } 1830 } 1831 1832 // watch subscribes a channel to the given watch items. 1833 func (w *stateWatch) watch(items watch.Items, ch chan struct{}) { 1834 w.l.Lock() 1835 defer w.l.Unlock() 1836 1837 for item, _ := range items { 1838 grp, ok := w.items[item] 1839 if !ok { 1840 grp = new(NotifyGroup) 1841 w.items[item] = grp 1842 } 1843 grp.Wait(ch) 1844 } 1845 } 1846 1847 // stopWatch unsubscribes a channel from the given watch items. 1848 func (w *stateWatch) stopWatch(items watch.Items, ch chan struct{}) { 1849 w.l.Lock() 1850 defer w.l.Unlock() 1851 1852 for item, _ := range items { 1853 if grp, ok := w.items[item]; ok { 1854 grp.Clear(ch) 1855 if grp.Empty() { 1856 delete(w.items, item) 1857 } 1858 } 1859 } 1860 } 1861 1862 // notify is used to fire notifications on the given watch items. 1863 func (w *stateWatch) notify(items watch.Items) { 1864 w.l.Lock() 1865 defer w.l.Unlock() 1866 1867 for wi, _ := range items { 1868 if grp, ok := w.items[wi]; ok { 1869 grp.Notify() 1870 } 1871 } 1872 }