github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 8 "github.com/hashicorp/go-memdb" 9 multierror "github.com/hashicorp/go-multierror" 10 "github.com/hashicorp/nomad/helper" 11 "github.com/hashicorp/nomad/nomad/structs" 12 ) 13 14 // IndexEntry is used with the "index" table 15 // for managing the latest Raft index affecting a table. 16 type IndexEntry struct { 17 Key string 18 Value uint64 19 } 20 21 // The StateStore is responsible for maintaining all the Nomad 22 // state. It is manipulated by the FSM which maintains consistency 23 // through the use of Raft. The goals of the StateStore are to provide 24 // high concurrency for read operations without blocking writes, and 25 // to provide write availability in the face of reads. EVERY object 26 // returned as a result of a read against the state store should be 27 // considered a constant and NEVER modified in place. 28 type StateStore struct { 29 logger *log.Logger 30 db *memdb.MemDB 31 32 // abandonCh is used to signal watchers that this state store has been 33 // abandoned (usually during a restore). This is only ever closed. 34 abandonCh chan struct{} 35 } 36 37 // NewStateStore is used to create a new state store 38 func NewStateStore(logOutput io.Writer) (*StateStore, error) { 39 // Create the MemDB 40 db, err := memdb.NewMemDB(stateStoreSchema()) 41 if err != nil { 42 return nil, fmt.Errorf("state store setup failed: %v", err) 43 } 44 45 // Create the state store 46 s := &StateStore{ 47 logger: log.New(logOutput, "", log.LstdFlags), 48 db: db, 49 abandonCh: make(chan struct{}), 50 } 51 return s, nil 52 } 53 54 // Snapshot is used to create a point in time snapshot. Because 55 // we use MemDB, we just need to snapshot the state of the underlying 56 // database. 57 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 58 snap := &StateSnapshot{ 59 StateStore: StateStore{ 60 logger: s.logger, 61 db: s.db.Snapshot(), 62 }, 63 } 64 return snap, nil 65 } 66 67 // Restore is used to optimize the efficiency of rebuilding 68 // state by minimizing the number of transactions and checking 69 // overhead. 70 func (s *StateStore) Restore() (*StateRestore, error) { 71 txn := s.db.Txn(true) 72 r := &StateRestore{ 73 txn: txn, 74 } 75 return r, nil 76 } 77 78 // AbandonCh returns a channel you can wait on to know if the state store was 79 // abandoned. 80 func (s *StateStore) AbandonCh() <-chan struct{} { 81 return s.abandonCh 82 } 83 84 // Abandon is used to signal that the given state store has been abandoned. 85 // Calling this more than one time will panic. 86 func (s *StateStore) Abandon() { 87 close(s.abandonCh) 88 } 89 90 // UpsertPlanResults is used to upsert the results of a plan. 91 func (s *StateStore) UpsertPlanResults(index uint64, results *structs.ApplyPlanResultsRequest) error { 92 txn := s.db.Txn(true) 93 defer txn.Abort() 94 95 // Upsert the newly created or updated deployment 96 if results.Deployment != nil { 97 if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil { 98 return err 99 } 100 } 101 102 // Update the status of deployments effected by the plan. 103 if len(results.DeploymentUpdates) != 0 { 104 s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn) 105 } 106 107 // Attach the job to all the allocations. It is pulled out in the payload to 108 // avoid the redundancy of encoding, but should be denormalized prior to 109 // being inserted into MemDB. 110 structs.DenormalizeAllocationJobs(results.Job, results.Alloc) 111 112 // Calculate the total resources of allocations. It is pulled out in the 113 // payload to avoid encoding something that can be computed, but should be 114 // denormalized prior to being inserted into MemDB. 115 for _, alloc := range results.Alloc { 116 if alloc.Resources != nil { 117 continue 118 } 119 120 alloc.Resources = new(structs.Resources) 121 for _, task := range alloc.TaskResources { 122 alloc.Resources.Add(task) 123 } 124 125 // Add the shared resources 126 alloc.Resources.Add(alloc.SharedResources) 127 } 128 129 // Upsert the allocations 130 if err := s.upsertAllocsImpl(index, results.Alloc, txn); err != nil { 131 return err 132 } 133 134 txn.Commit() 135 return nil 136 } 137 138 // upsertDeploymentUpdates updates the deployments given the passed status 139 // updates. 140 func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *memdb.Txn) error { 141 for _, u := range updates { 142 if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil { 143 return err 144 } 145 } 146 147 return nil 148 } 149 150 // UpsertJobSummary upserts a job summary into the state store. 151 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 152 txn := s.db.Txn(true) 153 defer txn.Abort() 154 155 // Check if the job summary already exists 156 existing, err := txn.First("job_summary", "id", jobSummary.JobID) 157 if err != nil { 158 return fmt.Errorf("job summary lookup failed: %v", err) 159 } 160 161 // Setup the indexes correctly 162 if existing != nil { 163 jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex 164 jobSummary.ModifyIndex = index 165 } else { 166 jobSummary.CreateIndex = index 167 jobSummary.ModifyIndex = index 168 } 169 170 // Update the index 171 if err := txn.Insert("job_summary", jobSummary); err != nil { 172 return err 173 } 174 175 // Update the indexes table for job summary 176 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 177 return fmt.Errorf("index update failed: %v", err) 178 } 179 180 txn.Commit() 181 return nil 182 } 183 184 // DeleteJobSummary deletes the job summary with the given ID. This is for 185 // testing purposes only. 186 func (s *StateStore) DeleteJobSummary(index uint64, id string) error { 187 txn := s.db.Txn(true) 188 defer txn.Abort() 189 190 // Delete the job summary 191 if _, err := txn.DeleteAll("job_summary", "id", id); err != nil { 192 return fmt.Errorf("deleting job summary failed: %v", err) 193 } 194 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 195 return fmt.Errorf("index update failed: %v", err) 196 } 197 txn.Commit() 198 return nil 199 } 200 201 // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to 202 // true, all prior deployments for the same job will be cancelled. 203 func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error { 204 txn := s.db.Txn(true) 205 defer txn.Abort() 206 if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil { 207 return err 208 } 209 txn.Commit() 210 return nil 211 } 212 213 func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *memdb.Txn) error { 214 // Check if the deployment already exists 215 existing, err := txn.First("deployment", "id", deployment.ID) 216 if err != nil { 217 return fmt.Errorf("deployment lookup failed: %v", err) 218 } 219 220 // Setup the indexes correctly 221 if existing != nil { 222 deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex 223 deployment.ModifyIndex = index 224 } else { 225 deployment.CreateIndex = index 226 deployment.ModifyIndex = index 227 } 228 229 // Insert the deployment 230 if err := txn.Insert("deployment", deployment); err != nil { 231 return err 232 } 233 234 // Update the indexes table for deployment 235 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 236 return fmt.Errorf("index update failed: %v", err) 237 } 238 239 // If the deployment is being marked as complete, set the job to stable. 240 if deployment.Status == structs.DeploymentStatusSuccessful { 241 if err := s.updateJobStabilityImpl(index, deployment.JobID, deployment.JobVersion, true, txn); err != nil { 242 return fmt.Errorf("failed to update job stability: %v", err) 243 } 244 } 245 246 return nil 247 } 248 249 func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) { 250 txn := s.db.Txn(false) 251 252 // Walk the entire deployments table 253 iter, err := txn.Get("deployment", "id") 254 if err != nil { 255 return nil, err 256 } 257 258 ws.Add(iter.WatchCh()) 259 return iter, nil 260 } 261 262 func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, deploymentID string) (memdb.ResultIterator, error) { 263 txn := s.db.Txn(false) 264 265 // Walk the entire deployments table 266 iter, err := txn.Get("deployment", "id_prefix", deploymentID) 267 if err != nil { 268 return nil, err 269 } 270 271 ws.Add(iter.WatchCh()) 272 return iter, nil 273 } 274 275 func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) { 276 txn := s.db.Txn(false) 277 return s.deploymentByIDImpl(ws, deploymentID, txn) 278 } 279 280 func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *memdb.Txn) (*structs.Deployment, error) { 281 watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID) 282 if err != nil { 283 return nil, fmt.Errorf("deployment lookup failed: %v", err) 284 } 285 ws.Add(watchCh) 286 287 if existing != nil { 288 return existing.(*structs.Deployment), nil 289 } 290 291 return nil, nil 292 } 293 294 func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, jobID string) ([]*structs.Deployment, error) { 295 txn := s.db.Txn(false) 296 297 // Get an iterator over the deployments 298 iter, err := txn.Get("deployment", "job", jobID) 299 if err != nil { 300 return nil, err 301 } 302 303 ws.Add(iter.WatchCh()) 304 305 var out []*structs.Deployment 306 for { 307 raw := iter.Next() 308 if raw == nil { 309 break 310 } 311 312 d := raw.(*structs.Deployment) 313 out = append(out, d) 314 } 315 316 return out, nil 317 } 318 319 // LatestDeploymentByJobID returns the latest deployment for the given job. The 320 // latest is determined strictly by CreateIndex. 321 func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, jobID string) (*structs.Deployment, error) { 322 txn := s.db.Txn(false) 323 324 // Get an iterator over the deployments 325 iter, err := txn.Get("deployment", "job", jobID) 326 if err != nil { 327 return nil, err 328 } 329 330 ws.Add(iter.WatchCh()) 331 332 var out *structs.Deployment 333 for { 334 raw := iter.Next() 335 if raw == nil { 336 break 337 } 338 339 d := raw.(*structs.Deployment) 340 if out == nil || out.CreateIndex < d.CreateIndex { 341 out = d 342 } 343 } 344 345 return out, nil 346 } 347 348 // DeleteDeployment is used to delete a set of deployments by ID 349 func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error { 350 txn := s.db.Txn(true) 351 defer txn.Abort() 352 353 if len(deploymentIDs) == 0 { 354 return nil 355 } 356 357 for _, deploymentID := range deploymentIDs { 358 // Lookup the deployment 359 existing, err := txn.First("deployment", "id", deploymentID) 360 if err != nil { 361 return fmt.Errorf("deployment lookup failed: %v", err) 362 } 363 if existing == nil { 364 return fmt.Errorf("deployment not found") 365 } 366 367 // Delete the deployment 368 if err := txn.Delete("deployment", existing); err != nil { 369 return fmt.Errorf("deployment delete failed: %v", err) 370 } 371 } 372 373 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 374 return fmt.Errorf("index update failed: %v", err) 375 } 376 377 txn.Commit() 378 return nil 379 } 380 381 // UpsertNode is used to register a node or update a node definition 382 // This is assumed to be triggered by the client, so we retain the value 383 // of drain which is set by the scheduler. 384 func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { 385 txn := s.db.Txn(true) 386 defer txn.Abort() 387 388 // Check if the node already exists 389 existing, err := txn.First("nodes", "id", node.ID) 390 if err != nil { 391 return fmt.Errorf("node lookup failed: %v", err) 392 } 393 394 // Setup the indexes correctly 395 if existing != nil { 396 exist := existing.(*structs.Node) 397 node.CreateIndex = exist.CreateIndex 398 node.ModifyIndex = index 399 node.Drain = exist.Drain // Retain the drain mode 400 } else { 401 node.CreateIndex = index 402 node.ModifyIndex = index 403 } 404 405 // Insert the node 406 if err := txn.Insert("nodes", node); err != nil { 407 return fmt.Errorf("node insert failed: %v", err) 408 } 409 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 410 return fmt.Errorf("index update failed: %v", err) 411 } 412 413 txn.Commit() 414 return nil 415 } 416 417 // DeleteNode is used to deregister a node 418 func (s *StateStore) DeleteNode(index uint64, nodeID string) error { 419 txn := s.db.Txn(true) 420 defer txn.Abort() 421 422 // Lookup the node 423 existing, err := txn.First("nodes", "id", nodeID) 424 if err != nil { 425 return fmt.Errorf("node lookup failed: %v", err) 426 } 427 if existing == nil { 428 return fmt.Errorf("node not found") 429 } 430 431 // Delete the node 432 if err := txn.Delete("nodes", existing); err != nil { 433 return fmt.Errorf("node delete failed: %v", err) 434 } 435 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 436 return fmt.Errorf("index update failed: %v", err) 437 } 438 439 txn.Commit() 440 return nil 441 } 442 443 // UpdateNodeStatus is used to update the status of a node 444 func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error { 445 txn := s.db.Txn(true) 446 defer txn.Abort() 447 448 // Lookup the node 449 existing, err := txn.First("nodes", "id", nodeID) 450 if err != nil { 451 return fmt.Errorf("node lookup failed: %v", err) 452 } 453 if existing == nil { 454 return fmt.Errorf("node not found") 455 } 456 457 // Copy the existing node 458 existingNode := existing.(*structs.Node) 459 copyNode := new(structs.Node) 460 *copyNode = *existingNode 461 462 // Update the status in the copy 463 copyNode.Status = status 464 copyNode.ModifyIndex = index 465 466 // Insert the node 467 if err := txn.Insert("nodes", copyNode); err != nil { 468 return fmt.Errorf("node update failed: %v", err) 469 } 470 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 471 return fmt.Errorf("index update failed: %v", err) 472 } 473 474 txn.Commit() 475 return nil 476 } 477 478 // UpdateNodeDrain is used to update the drain of a node 479 func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error { 480 txn := s.db.Txn(true) 481 defer txn.Abort() 482 483 // Lookup the node 484 existing, err := txn.First("nodes", "id", nodeID) 485 if err != nil { 486 return fmt.Errorf("node lookup failed: %v", err) 487 } 488 if existing == nil { 489 return fmt.Errorf("node not found") 490 } 491 492 // Copy the existing node 493 existingNode := existing.(*structs.Node) 494 copyNode := new(structs.Node) 495 *copyNode = *existingNode 496 497 // Update the drain in the copy 498 copyNode.Drain = drain 499 copyNode.ModifyIndex = index 500 501 // Insert the node 502 if err := txn.Insert("nodes", copyNode); err != nil { 503 return fmt.Errorf("node update failed: %v", err) 504 } 505 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 506 return fmt.Errorf("index update failed: %v", err) 507 } 508 509 txn.Commit() 510 return nil 511 } 512 513 // NodeByID is used to lookup a node by ID 514 func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) { 515 txn := s.db.Txn(false) 516 517 watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID) 518 if err != nil { 519 return nil, fmt.Errorf("node lookup failed: %v", err) 520 } 521 ws.Add(watchCh) 522 523 if existing != nil { 524 return existing.(*structs.Node), nil 525 } 526 return nil, nil 527 } 528 529 // NodesByIDPrefix is used to lookup nodes by prefix 530 func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 531 txn := s.db.Txn(false) 532 533 iter, err := txn.Get("nodes", "id_prefix", nodeID) 534 if err != nil { 535 return nil, fmt.Errorf("node lookup failed: %v", err) 536 } 537 ws.Add(iter.WatchCh()) 538 539 return iter, nil 540 } 541 542 // Nodes returns an iterator over all the nodes 543 func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 544 txn := s.db.Txn(false) 545 546 // Walk the entire nodes table 547 iter, err := txn.Get("nodes", "id") 548 if err != nil { 549 return nil, err 550 } 551 ws.Add(iter.WatchCh()) 552 return iter, nil 553 } 554 555 // UpsertJob is used to register a job or update a job definition 556 func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { 557 txn := s.db.Txn(true) 558 defer txn.Abort() 559 if err := s.upsertJobImpl(index, job, false, txn); err != nil { 560 return err 561 } 562 txn.Commit() 563 return nil 564 } 565 566 // upsertJobImpl is the inplementation for registering a job or updating a job definition 567 func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *memdb.Txn) error { 568 // Check if the job already exists 569 existing, err := txn.First("jobs", "id", job.ID) 570 if err != nil { 571 return fmt.Errorf("job lookup failed: %v", err) 572 } 573 574 // Setup the indexes correctly 575 if existing != nil { 576 job.CreateIndex = existing.(*structs.Job).CreateIndex 577 job.ModifyIndex = index 578 579 // Bump the version unless asked to keep it. This should only be done 580 // when changing an internal field such as Stable. A spec change should 581 // always come with a version bump 582 if !keepVersion { 583 job.JobModifyIndex = index 584 job.Version = existing.(*structs.Job).Version + 1 585 } 586 587 // Compute the job status 588 var err error 589 job.Status, err = s.getJobStatus(txn, job, false) 590 if err != nil { 591 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 592 } 593 } else { 594 job.CreateIndex = index 595 job.ModifyIndex = index 596 job.JobModifyIndex = index 597 job.Version = 0 598 599 if err := s.setJobStatus(index, txn, job, false, ""); err != nil { 600 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 601 } 602 603 // Have to get the job again since it could have been updated 604 updated, err := txn.First("jobs", "id", job.ID) 605 if err != nil { 606 return fmt.Errorf("job lookup failed: %v", err) 607 } 608 if updated != nil { 609 job = updated.(*structs.Job) 610 } 611 } 612 613 if err := s.updateSummaryWithJob(index, job, txn); err != nil { 614 return fmt.Errorf("unable to create job summary: %v", err) 615 } 616 617 if err := s.upsertJobVersion(index, job, txn); err != nil { 618 return fmt.Errorf("unable to upsert job into job_version table: %v", err) 619 } 620 621 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 622 // COMPAT 0.4.1 -> 0.5 623 s.addEphemeralDiskToTaskGroups(job) 624 625 // Insert the job 626 if err := txn.Insert("jobs", job); err != nil { 627 return fmt.Errorf("job insert failed: %v", err) 628 } 629 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 630 return fmt.Errorf("index update failed: %v", err) 631 } 632 633 return nil 634 } 635 636 // DeleteJob is used to deregister a job 637 func (s *StateStore) DeleteJob(index uint64, jobID string) error { 638 txn := s.db.Txn(true) 639 defer txn.Abort() 640 641 // Lookup the node 642 existing, err := txn.First("jobs", "id", jobID) 643 if err != nil { 644 return fmt.Errorf("job lookup failed: %v", err) 645 } 646 if existing == nil { 647 return fmt.Errorf("job not found") 648 } 649 650 // Check if we should update a parent job summary 651 job := existing.(*structs.Job) 652 if job.ParentID != "" { 653 summaryRaw, err := txn.First("job_summary", "id", job.ParentID) 654 if err != nil { 655 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 656 } 657 658 // Only continue if the summary exists. It could not exist if the parent 659 // job was removed 660 if summaryRaw != nil { 661 existing := summaryRaw.(*structs.JobSummary) 662 pSummary := existing.Copy() 663 if pSummary.Children != nil { 664 665 modified := false 666 switch job.Status { 667 case structs.JobStatusPending: 668 pSummary.Children.Pending-- 669 pSummary.Children.Dead++ 670 modified = true 671 case structs.JobStatusRunning: 672 pSummary.Children.Running-- 673 pSummary.Children.Dead++ 674 modified = true 675 case structs.JobStatusDead: 676 default: 677 return fmt.Errorf("unknown old job status %q", job.Status) 678 } 679 680 if modified { 681 // Update the modify index 682 pSummary.ModifyIndex = index 683 684 // Insert the summary 685 if err := txn.Insert("job_summary", pSummary); err != nil { 686 return fmt.Errorf("job summary insert failed: %v", err) 687 } 688 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 689 return fmt.Errorf("index update failed: %v", err) 690 } 691 } 692 } 693 } 694 } 695 696 // Delete the job 697 if err := txn.Delete("jobs", existing); err != nil { 698 return fmt.Errorf("job delete failed: %v", err) 699 } 700 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 701 return fmt.Errorf("index update failed: %v", err) 702 } 703 704 // Delete the job versions 705 if err := s.deleteJobVersions(index, job, txn); err != nil { 706 return err 707 } 708 709 // Delete the job summary 710 if _, err = txn.DeleteAll("job_summary", "id", jobID); err != nil { 711 return fmt.Errorf("deleing job summary failed: %v", err) 712 } 713 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 714 return fmt.Errorf("index update failed: %v", err) 715 } 716 717 txn.Commit() 718 return nil 719 } 720 721 // deleteJobVersions deletes all versions of the given job. 722 func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error { 723 iter, err := txn.Get("job_version", "id_prefix", job.ID) 724 if err != nil { 725 return err 726 } 727 728 for { 729 raw := iter.Next() 730 if raw == nil { 731 break 732 } 733 734 // Ensure the ID is an exact match 735 j := raw.(*structs.Job) 736 if j.ID != job.ID { 737 continue 738 } 739 740 if _, err = txn.DeleteAll("job_version", "id", job.ID, job.Version); err != nil { 741 return fmt.Errorf("deleting job versions failed: %v", err) 742 } 743 } 744 745 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 746 return fmt.Errorf("index update failed: %v", err) 747 } 748 749 return nil 750 } 751 752 // upsertJobVersion inserts a job into its historic version table and limits the 753 // number of job versions that are tracked. 754 func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *memdb.Txn) error { 755 // Insert the job 756 if err := txn.Insert("job_version", job); err != nil { 757 return fmt.Errorf("failed to insert job into job_version table: %v", err) 758 } 759 760 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 761 return fmt.Errorf("index update failed: %v", err) 762 } 763 764 // Get all the historic jobs for this ID 765 all, err := s.jobVersionByID(txn, nil, job.ID) 766 if err != nil { 767 return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err) 768 } 769 770 // If we are below the limit there is no GCing to be done 771 if len(all) <= structs.JobTrackedVersions { 772 return nil 773 } 774 775 // We have to delete a historic job to make room. 776 // Find index of the highest versioned stable job 777 stableIdx := -1 778 for i, j := range all { 779 if j.Stable { 780 stableIdx = i 781 break 782 } 783 } 784 785 // If the stable job is the oldest version, do a swap to bring it into the 786 // keep set. 787 max := structs.JobTrackedVersions 788 if stableIdx == max { 789 all[max-1], all[max] = all[max], all[max-1] 790 } 791 792 // Delete the job outside of the set that are being kept. 793 d := all[max] 794 if err := txn.Delete("job_version", d); err != nil { 795 return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version) 796 } 797 798 return nil 799 } 800 801 // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job 802 // version. 803 func (s *StateStore) JobByID(ws memdb.WatchSet, id string) (*structs.Job, error) { 804 txn := s.db.Txn(false) 805 806 watchCh, existing, err := txn.FirstWatch("jobs", "id", id) 807 if err != nil { 808 return nil, fmt.Errorf("job lookup failed: %v", err) 809 } 810 ws.Add(watchCh) 811 812 if existing != nil { 813 return existing.(*structs.Job), nil 814 } 815 return nil, nil 816 } 817 818 // JobsByIDPrefix is used to lookup a job by prefix 819 func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, id string) (memdb.ResultIterator, error) { 820 txn := s.db.Txn(false) 821 822 iter, err := txn.Get("jobs", "id_prefix", id) 823 if err != nil { 824 return nil, fmt.Errorf("job lookup failed: %v", err) 825 } 826 827 ws.Add(iter.WatchCh()) 828 829 return iter, nil 830 } 831 832 // JobVersionsByID returns all the tracked versions of a job. 833 func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, id string) ([]*structs.Job, error) { 834 txn := s.db.Txn(false) 835 return s.jobVersionByID(txn, &ws, id) 836 } 837 838 // jobVersionByID is the underlying implementation for retrieving all tracked 839 // versions of a job and is called under an existing transaction. A watch set 840 // can optionally be passed in to add the job histories to the watch set. 841 func (s *StateStore) jobVersionByID(txn *memdb.Txn, ws *memdb.WatchSet, id string) ([]*structs.Job, error) { 842 // Get all the historic jobs for this ID 843 iter, err := txn.Get("job_version", "id_prefix", id) 844 if err != nil { 845 return nil, err 846 } 847 848 if ws != nil { 849 ws.Add(iter.WatchCh()) 850 } 851 852 var all []*structs.Job 853 for { 854 raw := iter.Next() 855 if raw == nil { 856 break 857 } 858 859 // Ensure the ID is an exact match 860 j := raw.(*structs.Job) 861 if j.ID != id { 862 continue 863 } 864 865 all = append(all, j) 866 } 867 868 // Reverse so that highest versions first 869 for i, j := 0, len(all)-1; i < j; i, j = i+1, j-1 { 870 all[i], all[j] = all[j], all[i] 871 } 872 873 return all, nil 874 } 875 876 // JobByIDAndVersion returns the job identified by its ID and Version. The 877 // passed watchset may be nil. 878 func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, id string, version uint64) (*structs.Job, error) { 879 txn := s.db.Txn(false) 880 return s.jobByIDAndVersionImpl(ws, id, version, txn) 881 } 882 883 // jobByIDAndVersionImpl returns the job identified by its ID and Version. The 884 // passed watchset may be nil. 885 func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, id string, version uint64, txn *memdb.Txn) (*structs.Job, error) { 886 watchCh, existing, err := txn.FirstWatch("job_version", "id", id, version) 887 if err != nil { 888 return nil, err 889 } 890 891 if ws != nil { 892 ws.Add(watchCh) 893 } 894 895 if existing != nil { 896 job := existing.(*structs.Job) 897 return job, nil 898 } 899 900 return nil, nil 901 } 902 903 func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) { 904 txn := s.db.Txn(false) 905 906 // Walk the entire deployments table 907 iter, err := txn.Get("job_version", "id") 908 if err != nil { 909 return nil, err 910 } 911 912 ws.Add(iter.WatchCh()) 913 return iter, nil 914 } 915 916 // Jobs returns an iterator over all the jobs 917 func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 918 txn := s.db.Txn(false) 919 920 // Walk the entire jobs table 921 iter, err := txn.Get("jobs", "id") 922 if err != nil { 923 return nil, err 924 } 925 926 ws.Add(iter.WatchCh()) 927 928 return iter, nil 929 } 930 931 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 932 func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) { 933 txn := s.db.Txn(false) 934 935 iter, err := txn.Get("jobs", "periodic", periodic) 936 if err != nil { 937 return nil, err 938 } 939 940 ws.Add(iter.WatchCh()) 941 942 return iter, nil 943 } 944 945 // JobsByScheduler returns an iterator over all the jobs with the specific 946 // scheduler type. 947 func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) { 948 txn := s.db.Txn(false) 949 950 // Return an iterator for jobs with the specific type. 951 iter, err := txn.Get("jobs", "type", schedulerType) 952 if err != nil { 953 return nil, err 954 } 955 956 ws.Add(iter.WatchCh()) 957 958 return iter, nil 959 } 960 961 // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage 962 // collection. 963 func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) { 964 txn := s.db.Txn(false) 965 966 iter, err := txn.Get("jobs", "gc", gc) 967 if err != nil { 968 return nil, err 969 } 970 971 ws.Add(iter.WatchCh()) 972 973 return iter, nil 974 } 975 976 // JobSummary returns a job summary object which matches a specific id. 977 func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, jobID string) (*structs.JobSummary, error) { 978 txn := s.db.Txn(false) 979 980 watchCh, existing, err := txn.FirstWatch("job_summary", "id", jobID) 981 if err != nil { 982 return nil, err 983 } 984 985 ws.Add(watchCh) 986 987 if existing != nil { 988 summary := existing.(*structs.JobSummary) 989 return summary, nil 990 } 991 992 return nil, nil 993 } 994 995 // JobSummaries walks the entire job summary table and returns all the job 996 // summary objects 997 func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) { 998 txn := s.db.Txn(false) 999 1000 iter, err := txn.Get("job_summary", "id") 1001 if err != nil { 1002 return nil, err 1003 } 1004 1005 ws.Add(iter.WatchCh()) 1006 1007 return iter, nil 1008 } 1009 1010 // JobSummaryByPrefix is used to look up Job Summary by id prefix 1011 func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, id string) (memdb.ResultIterator, error) { 1012 txn := s.db.Txn(false) 1013 1014 iter, err := txn.Get("job_summary", "id_prefix", id) 1015 if err != nil { 1016 return nil, fmt.Errorf("eval lookup failed: %v", err) 1017 } 1018 1019 ws.Add(iter.WatchCh()) 1020 1021 return iter, nil 1022 } 1023 1024 // UpsertPeriodicLaunch is used to register a launch or update it. 1025 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 1026 txn := s.db.Txn(true) 1027 defer txn.Abort() 1028 1029 // Check if the job already exists 1030 existing, err := txn.First("periodic_launch", "id", launch.ID) 1031 if err != nil { 1032 return fmt.Errorf("periodic launch lookup failed: %v", err) 1033 } 1034 1035 // Setup the indexes correctly 1036 if existing != nil { 1037 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 1038 launch.ModifyIndex = index 1039 } else { 1040 launch.CreateIndex = index 1041 launch.ModifyIndex = index 1042 } 1043 1044 // Insert the job 1045 if err := txn.Insert("periodic_launch", launch); err != nil { 1046 return fmt.Errorf("launch insert failed: %v", err) 1047 } 1048 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 1049 return fmt.Errorf("index update failed: %v", err) 1050 } 1051 1052 txn.Commit() 1053 return nil 1054 } 1055 1056 // DeletePeriodicLaunch is used to delete the periodic launch 1057 func (s *StateStore) DeletePeriodicLaunch(index uint64, jobID string) error { 1058 txn := s.db.Txn(true) 1059 defer txn.Abort() 1060 1061 // Lookup the launch 1062 existing, err := txn.First("periodic_launch", "id", jobID) 1063 if err != nil { 1064 return fmt.Errorf("launch lookup failed: %v", err) 1065 } 1066 if existing == nil { 1067 return fmt.Errorf("launch not found") 1068 } 1069 1070 // Delete the launch 1071 if err := txn.Delete("periodic_launch", existing); err != nil { 1072 return fmt.Errorf("launch delete failed: %v", err) 1073 } 1074 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 1075 return fmt.Errorf("index update failed: %v", err) 1076 } 1077 1078 txn.Commit() 1079 return nil 1080 } 1081 1082 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 1083 // ID. 1084 func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, id string) (*structs.PeriodicLaunch, error) { 1085 txn := s.db.Txn(false) 1086 1087 watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", id) 1088 if err != nil { 1089 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 1090 } 1091 1092 ws.Add(watchCh) 1093 1094 if existing != nil { 1095 return existing.(*structs.PeriodicLaunch), nil 1096 } 1097 return nil, nil 1098 } 1099 1100 // PeriodicLaunches returns an iterator over all the periodic launches 1101 func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1102 txn := s.db.Txn(false) 1103 1104 // Walk the entire table 1105 iter, err := txn.Get("periodic_launch", "id") 1106 if err != nil { 1107 return nil, err 1108 } 1109 1110 ws.Add(iter.WatchCh()) 1111 1112 return iter, nil 1113 } 1114 1115 // UpsertEvals is used to upsert a set of evaluations 1116 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { 1117 txn := s.db.Txn(true) 1118 defer txn.Abort() 1119 1120 // Do a nested upsert 1121 jobs := make(map[string]string, len(evals)) 1122 for _, eval := range evals { 1123 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 1124 return err 1125 } 1126 1127 jobs[eval.JobID] = "" 1128 } 1129 1130 // Set the job's status 1131 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1132 return fmt.Errorf("setting job status failed: %v", err) 1133 } 1134 1135 txn.Commit() 1136 return nil 1137 } 1138 1139 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 1140 func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { 1141 // Lookup the evaluation 1142 existing, err := txn.First("evals", "id", eval.ID) 1143 if err != nil { 1144 return fmt.Errorf("eval lookup failed: %v", err) 1145 } 1146 1147 // Update the indexes 1148 if existing != nil { 1149 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 1150 eval.ModifyIndex = index 1151 } else { 1152 eval.CreateIndex = index 1153 eval.ModifyIndex = index 1154 } 1155 1156 // Update the job summary 1157 summaryRaw, err := txn.First("job_summary", "id", eval.JobID) 1158 if err != nil { 1159 return fmt.Errorf("job summary lookup failed: %v", err) 1160 } 1161 if summaryRaw != nil { 1162 js := summaryRaw.(*structs.JobSummary).Copy() 1163 hasSummaryChanged := false 1164 for tg, num := range eval.QueuedAllocations { 1165 if summary, ok := js.Summary[tg]; ok { 1166 if summary.Queued != num { 1167 summary.Queued = num 1168 js.Summary[tg] = summary 1169 hasSummaryChanged = true 1170 } 1171 } else { 1172 s.logger.Printf("[ERR] state_store: unable to update queued for job %q and task group %q", eval.JobID, tg) 1173 } 1174 } 1175 1176 // Insert the job summary 1177 if hasSummaryChanged { 1178 js.ModifyIndex = index 1179 if err := txn.Insert("job_summary", js); err != nil { 1180 return fmt.Errorf("job summary insert failed: %v", err) 1181 } 1182 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1183 return fmt.Errorf("index update failed: %v", err) 1184 } 1185 } 1186 } 1187 1188 // Check if the job has any blocked evaluations and cancel them 1189 if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 { 1190 // Get the blocked evaluation for a job if it exists 1191 iter, err := txn.Get("evals", "job", eval.JobID, structs.EvalStatusBlocked) 1192 if err != nil { 1193 return fmt.Errorf("failed to get blocked evals for job %q: %v", eval.JobID, err) 1194 } 1195 1196 var blocked []*structs.Evaluation 1197 for { 1198 raw := iter.Next() 1199 if raw == nil { 1200 break 1201 } 1202 blocked = append(blocked, raw.(*structs.Evaluation)) 1203 } 1204 1205 // Go through and update the evals 1206 for _, eval := range blocked { 1207 newEval := eval.Copy() 1208 newEval.Status = structs.EvalStatusCancelled 1209 newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID) 1210 newEval.ModifyIndex = index 1211 if err := txn.Insert("evals", newEval); err != nil { 1212 return fmt.Errorf("eval insert failed: %v", err) 1213 } 1214 } 1215 } 1216 1217 // Insert the eval 1218 if err := txn.Insert("evals", eval); err != nil { 1219 return fmt.Errorf("eval insert failed: %v", err) 1220 } 1221 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 1222 return fmt.Errorf("index update failed: %v", err) 1223 } 1224 return nil 1225 } 1226 1227 // DeleteEval is used to delete an evaluation 1228 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 1229 txn := s.db.Txn(true) 1230 defer txn.Abort() 1231 1232 jobs := make(map[string]string, len(evals)) 1233 for _, eval := range evals { 1234 existing, err := txn.First("evals", "id", eval) 1235 if err != nil { 1236 return fmt.Errorf("eval lookup failed: %v", err) 1237 } 1238 if existing == nil { 1239 continue 1240 } 1241 if err := txn.Delete("evals", existing); err != nil { 1242 return fmt.Errorf("eval delete failed: %v", err) 1243 } 1244 jobID := existing.(*structs.Evaluation).JobID 1245 jobs[jobID] = "" 1246 } 1247 1248 for _, alloc := range allocs { 1249 existing, err := txn.First("allocs", "id", alloc) 1250 if err != nil { 1251 return fmt.Errorf("alloc lookup failed: %v", err) 1252 } 1253 if existing == nil { 1254 continue 1255 } 1256 if err := txn.Delete("allocs", existing); err != nil { 1257 return fmt.Errorf("alloc delete failed: %v", err) 1258 } 1259 } 1260 1261 // Update the indexes 1262 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 1263 return fmt.Errorf("index update failed: %v", err) 1264 } 1265 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1266 return fmt.Errorf("index update failed: %v", err) 1267 } 1268 1269 // Set the job's status 1270 if err := s.setJobStatuses(index, txn, jobs, true); err != nil { 1271 return fmt.Errorf("setting job status failed: %v", err) 1272 } 1273 1274 txn.Commit() 1275 return nil 1276 } 1277 1278 // EvalByID is used to lookup an eval by its ID 1279 func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) { 1280 txn := s.db.Txn(false) 1281 1282 watchCh, existing, err := txn.FirstWatch("evals", "id", id) 1283 if err != nil { 1284 return nil, fmt.Errorf("eval lookup failed: %v", err) 1285 } 1286 1287 ws.Add(watchCh) 1288 1289 if existing != nil { 1290 return existing.(*structs.Evaluation), nil 1291 } 1292 return nil, nil 1293 } 1294 1295 // EvalsByIDPrefix is used to lookup evaluations by prefix 1296 func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, id string) (memdb.ResultIterator, error) { 1297 txn := s.db.Txn(false) 1298 1299 iter, err := txn.Get("evals", "id_prefix", id) 1300 if err != nil { 1301 return nil, fmt.Errorf("eval lookup failed: %v", err) 1302 } 1303 1304 ws.Add(iter.WatchCh()) 1305 1306 return iter, nil 1307 } 1308 1309 // EvalsByJob returns all the evaluations by job id 1310 func (s *StateStore) EvalsByJob(ws memdb.WatchSet, jobID string) ([]*structs.Evaluation, error) { 1311 txn := s.db.Txn(false) 1312 1313 // Get an iterator over the node allocations 1314 iter, err := txn.Get("evals", "job_prefix", jobID) 1315 if err != nil { 1316 return nil, err 1317 } 1318 1319 ws.Add(iter.WatchCh()) 1320 1321 var out []*structs.Evaluation 1322 for { 1323 raw := iter.Next() 1324 if raw == nil { 1325 break 1326 } 1327 1328 e := raw.(*structs.Evaluation) 1329 1330 // Filter non-exact matches 1331 if e.JobID != jobID { 1332 continue 1333 } 1334 1335 out = append(out, e) 1336 } 1337 return out, nil 1338 } 1339 1340 // Evals returns an iterator over all the evaluations 1341 func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1342 txn := s.db.Txn(false) 1343 1344 // Walk the entire table 1345 iter, err := txn.Get("evals", "id") 1346 if err != nil { 1347 return nil, err 1348 } 1349 1350 ws.Add(iter.WatchCh()) 1351 1352 return iter, nil 1353 } 1354 1355 // UpdateAllocsFromClient is used to update an allocation based on input 1356 // from a client. While the schedulers are the authority on the allocation for 1357 // most things, some updates are authoritative from the client. Specifically, 1358 // the desired state comes from the schedulers, while the actual state comes 1359 // from clients. 1360 func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error { 1361 txn := s.db.Txn(true) 1362 defer txn.Abort() 1363 1364 // Handle each of the updated allocations 1365 for _, alloc := range allocs { 1366 if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil { 1367 return err 1368 } 1369 } 1370 1371 // Update the indexes 1372 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1373 return fmt.Errorf("index update failed: %v", err) 1374 } 1375 1376 txn.Commit() 1377 return nil 1378 } 1379 1380 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 1381 func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, index uint64, alloc *structs.Allocation) error { 1382 // Look for existing alloc 1383 existing, err := txn.First("allocs", "id", alloc.ID) 1384 if err != nil { 1385 return fmt.Errorf("alloc lookup failed: %v", err) 1386 } 1387 1388 // Nothing to do if this does not exist 1389 if existing == nil { 1390 return nil 1391 } 1392 exist := existing.(*structs.Allocation) 1393 1394 // Copy everything from the existing allocation 1395 copyAlloc := exist.Copy() 1396 1397 // Pull in anything the client is the authority on 1398 copyAlloc.ClientStatus = alloc.ClientStatus 1399 copyAlloc.ClientDescription = alloc.ClientDescription 1400 copyAlloc.TaskStates = alloc.TaskStates 1401 copyAlloc.DeploymentStatus = alloc.DeploymentStatus 1402 1403 // Update the modify index 1404 copyAlloc.ModifyIndex = index 1405 1406 // TODO TEST 1407 if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil { 1408 return fmt.Errorf("error updating deployment: %v", err) 1409 } 1410 1411 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil { 1412 return fmt.Errorf("error updating job summary: %v", err) 1413 } 1414 1415 // Update the allocation 1416 if err := txn.Insert("allocs", copyAlloc); err != nil { 1417 return fmt.Errorf("alloc insert failed: %v", err) 1418 } 1419 1420 // Set the job's status 1421 forceStatus := "" 1422 if !copyAlloc.TerminalStatus() { 1423 forceStatus = structs.JobStatusRunning 1424 } 1425 jobs := map[string]string{exist.JobID: forceStatus} 1426 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1427 return fmt.Errorf("setting job status failed: %v", err) 1428 } 1429 return nil 1430 } 1431 1432 // UpsertAllocs is used to evict a set of allocations and allocate new ones at 1433 // the same time. 1434 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { 1435 txn := s.db.Txn(true) 1436 defer txn.Abort() 1437 if err := s.upsertAllocsImpl(index, allocs, txn); err != nil { 1438 return err 1439 } 1440 txn.Commit() 1441 return nil 1442 } 1443 1444 // upsertAllocs is the actual implementation of UpsertAllocs so that it may be 1445 // used with an existing transaction. 1446 func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *memdb.Txn) error { 1447 // Handle the allocations 1448 jobs := make(map[string]string, 1) 1449 for _, alloc := range allocs { 1450 existing, err := txn.First("allocs", "id", alloc.ID) 1451 if err != nil { 1452 return fmt.Errorf("alloc lookup failed: %v", err) 1453 } 1454 exist, _ := existing.(*structs.Allocation) 1455 1456 if exist == nil { 1457 alloc.CreateIndex = index 1458 alloc.ModifyIndex = index 1459 alloc.AllocModifyIndex = index 1460 1461 // Issue https://github.com/hashicorp/nomad/issues/2583 uncovered 1462 // the a race between a forced garbage collection and the scheduler 1463 // marking an allocation as terminal. The issue is that the 1464 // allocation from the scheduler has its job normalized and the FSM 1465 // will only denormalize if the allocation is not terminal. However 1466 // if the allocation is garbage collected, that will result in a 1467 // allocation being upserted for the first time without a job 1468 // attached. By returning an error here, it will cause the FSM to 1469 // error, causing the plan_apply to error and thus causing the 1470 // evaluation to be failed. This will force an index refresh that 1471 // should solve this issue. 1472 if alloc.Job == nil { 1473 return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID) 1474 } 1475 } else { 1476 alloc.CreateIndex = exist.CreateIndex 1477 alloc.ModifyIndex = index 1478 alloc.AllocModifyIndex = index 1479 1480 // Keep the clients task states 1481 alloc.TaskStates = exist.TaskStates 1482 1483 // If the scheduler is marking this allocation as lost we do not 1484 // want to reuse the status of the existing allocation. 1485 if alloc.ClientStatus != structs.AllocClientStatusLost { 1486 alloc.ClientStatus = exist.ClientStatus 1487 alloc.ClientDescription = exist.ClientDescription 1488 } 1489 1490 // The job has been denormalized so re-attach the original job 1491 if alloc.Job == nil { 1492 alloc.Job = exist.Job 1493 } 1494 } 1495 1496 if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil { 1497 return fmt.Errorf("error updating deployment: %v", err) 1498 } 1499 1500 if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil { 1501 return fmt.Errorf("error updating job summary: %v", err) 1502 } 1503 1504 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 1505 // COMPAT 0.4.1 -> 0.5 1506 if alloc.Job != nil { 1507 s.addEphemeralDiskToTaskGroups(alloc.Job) 1508 } 1509 1510 if err := txn.Insert("allocs", alloc); err != nil { 1511 return fmt.Errorf("alloc insert failed: %v", err) 1512 } 1513 1514 // If the allocation is running, force the job to running status. 1515 forceStatus := "" 1516 if !alloc.TerminalStatus() { 1517 forceStatus = structs.JobStatusRunning 1518 } 1519 jobs[alloc.JobID] = forceStatus 1520 } 1521 1522 // Update the indexes 1523 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1524 return fmt.Errorf("index update failed: %v", err) 1525 } 1526 1527 // Set the job's status 1528 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1529 return fmt.Errorf("setting job status failed: %v", err) 1530 } 1531 1532 return nil 1533 } 1534 1535 // AllocByID is used to lookup an allocation by its ID 1536 func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) { 1537 txn := s.db.Txn(false) 1538 1539 watchCh, existing, err := txn.FirstWatch("allocs", "id", id) 1540 if err != nil { 1541 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1542 } 1543 1544 ws.Add(watchCh) 1545 1546 if existing != nil { 1547 return existing.(*structs.Allocation), nil 1548 } 1549 return nil, nil 1550 } 1551 1552 // AllocsByIDPrefix is used to lookup allocs by prefix 1553 func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, id string) (memdb.ResultIterator, error) { 1554 txn := s.db.Txn(false) 1555 1556 iter, err := txn.Get("allocs", "id_prefix", id) 1557 if err != nil { 1558 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1559 } 1560 1561 ws.Add(iter.WatchCh()) 1562 1563 return iter, nil 1564 } 1565 1566 // AllocsByNode returns all the allocations by node 1567 func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 1568 txn := s.db.Txn(false) 1569 1570 // Get an iterator over the node allocations, using only the 1571 // node prefix which ignores the terminal status 1572 iter, err := txn.Get("allocs", "node_prefix", node) 1573 if err != nil { 1574 return nil, err 1575 } 1576 1577 ws.Add(iter.WatchCh()) 1578 1579 var out []*structs.Allocation 1580 for { 1581 raw := iter.Next() 1582 if raw == nil { 1583 break 1584 } 1585 out = append(out, raw.(*structs.Allocation)) 1586 } 1587 return out, nil 1588 } 1589 1590 // AllocsByNode returns all the allocations by node and terminal status 1591 func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) { 1592 txn := s.db.Txn(false) 1593 1594 // Get an iterator over the node allocations 1595 iter, err := txn.Get("allocs", "node", node, terminal) 1596 if err != nil { 1597 return nil, err 1598 } 1599 1600 ws.Add(iter.WatchCh()) 1601 1602 var out []*structs.Allocation 1603 for { 1604 raw := iter.Next() 1605 if raw == nil { 1606 break 1607 } 1608 out = append(out, raw.(*structs.Allocation)) 1609 } 1610 return out, nil 1611 } 1612 1613 // AllocsByJob returns all the allocations by job id 1614 func (s *StateStore) AllocsByJob(ws memdb.WatchSet, jobID string, all bool) ([]*structs.Allocation, error) { 1615 txn := s.db.Txn(false) 1616 1617 // Get the job 1618 var job *structs.Job 1619 rawJob, err := txn.First("jobs", "id", jobID) 1620 if err != nil { 1621 return nil, err 1622 } 1623 if rawJob != nil { 1624 job = rawJob.(*structs.Job) 1625 } 1626 1627 // Get an iterator over the node allocations 1628 iter, err := txn.Get("allocs", "job", jobID) 1629 if err != nil { 1630 return nil, err 1631 } 1632 1633 ws.Add(iter.WatchCh()) 1634 1635 var out []*structs.Allocation 1636 for { 1637 raw := iter.Next() 1638 if raw == nil { 1639 break 1640 } 1641 1642 alloc := raw.(*structs.Allocation) 1643 // If the allocation belongs to a job with the same ID but a different 1644 // create index and we are not getting all the allocations whose Jobs 1645 // matches the same Job ID then we skip it 1646 if !all && job != nil && alloc.Job.CreateIndex != job.CreateIndex { 1647 continue 1648 } 1649 out = append(out, raw.(*structs.Allocation)) 1650 } 1651 return out, nil 1652 } 1653 1654 // AllocsByEval returns all the allocations by eval id 1655 func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) { 1656 txn := s.db.Txn(false) 1657 1658 // Get an iterator over the eval allocations 1659 iter, err := txn.Get("allocs", "eval", evalID) 1660 if err != nil { 1661 return nil, err 1662 } 1663 1664 ws.Add(iter.WatchCh()) 1665 1666 var out []*structs.Allocation 1667 for { 1668 raw := iter.Next() 1669 if raw == nil { 1670 break 1671 } 1672 out = append(out, raw.(*structs.Allocation)) 1673 } 1674 return out, nil 1675 } 1676 1677 // AllocsByDeployment returns all the allocations by deployment id 1678 func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) { 1679 txn := s.db.Txn(false) 1680 1681 // Get an iterator over the deployments allocations 1682 iter, err := txn.Get("allocs", "deployment", deploymentID) 1683 if err != nil { 1684 return nil, err 1685 } 1686 1687 ws.Add(iter.WatchCh()) 1688 1689 var out []*structs.Allocation 1690 for { 1691 raw := iter.Next() 1692 if raw == nil { 1693 break 1694 } 1695 out = append(out, raw.(*structs.Allocation)) 1696 } 1697 return out, nil 1698 } 1699 1700 // Allocs returns an iterator over all the evaluations 1701 func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1702 txn := s.db.Txn(false) 1703 1704 // Walk the entire table 1705 iter, err := txn.Get("allocs", "id") 1706 if err != nil { 1707 return nil, err 1708 } 1709 1710 ws.Add(iter.WatchCh()) 1711 1712 return iter, nil 1713 } 1714 1715 // UpsertVaultAccessors is used to register a set of Vault Accessors 1716 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 1717 txn := s.db.Txn(true) 1718 defer txn.Abort() 1719 1720 for _, accessor := range accessors { 1721 // Set the create index 1722 accessor.CreateIndex = index 1723 1724 // Insert the accessor 1725 if err := txn.Insert("vault_accessors", accessor); err != nil { 1726 return fmt.Errorf("accessor insert failed: %v", err) 1727 } 1728 } 1729 1730 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 1731 return fmt.Errorf("index update failed: %v", err) 1732 } 1733 1734 txn.Commit() 1735 return nil 1736 } 1737 1738 // DeleteVaultAccessors is used to delete a set of Vault Accessors 1739 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 1740 txn := s.db.Txn(true) 1741 defer txn.Abort() 1742 1743 // Lookup the accessor 1744 for _, accessor := range accessors { 1745 // Delete the accessor 1746 if err := txn.Delete("vault_accessors", accessor); err != nil { 1747 return fmt.Errorf("accessor delete failed: %v", err) 1748 } 1749 } 1750 1751 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 1752 return fmt.Errorf("index update failed: %v", err) 1753 } 1754 1755 txn.Commit() 1756 return nil 1757 } 1758 1759 // VaultAccessor returns the given Vault accessor 1760 func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) { 1761 txn := s.db.Txn(false) 1762 1763 watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor) 1764 if err != nil { 1765 return nil, fmt.Errorf("accessor lookup failed: %v", err) 1766 } 1767 1768 ws.Add(watchCh) 1769 1770 if existing != nil { 1771 return existing.(*structs.VaultAccessor), nil 1772 } 1773 1774 return nil, nil 1775 } 1776 1777 // VaultAccessors returns an iterator of Vault accessors. 1778 func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1779 txn := s.db.Txn(false) 1780 1781 iter, err := txn.Get("vault_accessors", "id") 1782 if err != nil { 1783 return nil, err 1784 } 1785 1786 ws.Add(iter.WatchCh()) 1787 1788 return iter, nil 1789 } 1790 1791 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 1792 func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) { 1793 txn := s.db.Txn(false) 1794 1795 // Get an iterator over the accessors 1796 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 1797 if err != nil { 1798 return nil, err 1799 } 1800 1801 ws.Add(iter.WatchCh()) 1802 1803 var out []*structs.VaultAccessor 1804 for { 1805 raw := iter.Next() 1806 if raw == nil { 1807 break 1808 } 1809 out = append(out, raw.(*structs.VaultAccessor)) 1810 } 1811 return out, nil 1812 } 1813 1814 // VaultAccessorsByNode returns all the Vault accessors by node id 1815 func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) { 1816 txn := s.db.Txn(false) 1817 1818 // Get an iterator over the accessors 1819 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 1820 if err != nil { 1821 return nil, err 1822 } 1823 1824 ws.Add(iter.WatchCh()) 1825 1826 var out []*structs.VaultAccessor 1827 for { 1828 raw := iter.Next() 1829 if raw == nil { 1830 break 1831 } 1832 out = append(out, raw.(*structs.VaultAccessor)) 1833 } 1834 return out, nil 1835 } 1836 1837 // UpdateDeploymentStatus is used to make deployment status updates and 1838 // potentially make a evaluation 1839 func (s *StateStore) UpdateDeploymentStatus(index uint64, req *structs.DeploymentStatusUpdateRequest) error { 1840 txn := s.db.Txn(true) 1841 defer txn.Abort() 1842 1843 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 1844 return err 1845 } 1846 1847 // Upsert the job if necessary 1848 if req.Job != nil { 1849 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 1850 return err 1851 } 1852 } 1853 1854 // Upsert the optional eval 1855 if req.Eval != nil { 1856 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 1857 return err 1858 } 1859 } 1860 1861 txn.Commit() 1862 return nil 1863 } 1864 1865 // updateDeploymentStatusImpl is used to make deployment status updates 1866 func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *memdb.Txn) error { 1867 // Retrieve deployment 1868 ws := memdb.NewWatchSet() 1869 deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn) 1870 if err != nil { 1871 return err 1872 } else if deployment == nil { 1873 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID) 1874 } else if !deployment.Active() { 1875 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 1876 } 1877 1878 // Apply the new status 1879 copy := deployment.Copy() 1880 copy.Status = u.Status 1881 copy.StatusDescription = u.StatusDescription 1882 copy.ModifyIndex = index 1883 1884 // Insert the deployment 1885 if err := txn.Insert("deployment", copy); err != nil { 1886 return err 1887 } 1888 1889 // Update the index 1890 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 1891 return fmt.Errorf("index update failed: %v", err) 1892 } 1893 1894 // If the deployment is being marked as complete, set the job to stable. 1895 if copy.Status == structs.DeploymentStatusSuccessful { 1896 if err := s.updateJobStabilityImpl(index, copy.JobID, copy.JobVersion, true, txn); err != nil { 1897 return fmt.Errorf("failed to update job stability: %v", err) 1898 } 1899 } 1900 1901 return nil 1902 } 1903 1904 // UpdateJobStability updates the stability of the given job and version to the 1905 // desired status. 1906 func (s *StateStore) UpdateJobStability(index uint64, jobID string, jobVersion uint64, stable bool) error { 1907 txn := s.db.Txn(true) 1908 defer txn.Abort() 1909 1910 if err := s.updateJobStabilityImpl(index, jobID, jobVersion, stable, txn); err != nil { 1911 return err 1912 } 1913 1914 txn.Commit() 1915 return nil 1916 } 1917 1918 // updateJobStabilityImpl updates the stability of the given job and version 1919 func (s *StateStore) updateJobStabilityImpl(index uint64, jobID string, jobVersion uint64, stable bool, txn *memdb.Txn) error { 1920 // Get the job that is referenced 1921 job, err := s.jobByIDAndVersionImpl(nil, jobID, jobVersion, txn) 1922 if err != nil { 1923 return err 1924 } 1925 1926 // Has already been cleared, nothing to do 1927 if job == nil { 1928 return nil 1929 } 1930 1931 // If the job already has the desired stability, nothing to do 1932 if job.Stable == stable { 1933 return nil 1934 } 1935 1936 copy := job.Copy() 1937 copy.Stable = stable 1938 return s.upsertJobImpl(index, copy, true, txn) 1939 } 1940 1941 // UpdateDeploymentPromotion is used to promote canaries in a deployment and 1942 // potentially make a evaluation 1943 func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyDeploymentPromoteRequest) error { 1944 txn := s.db.Txn(true) 1945 defer txn.Abort() 1946 1947 // Retrieve deployment and ensure it is not terminal and is active 1948 ws := memdb.NewWatchSet() 1949 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 1950 if err != nil { 1951 return err 1952 } else if deployment == nil { 1953 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 1954 } else if !deployment.Active() { 1955 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 1956 } 1957 1958 // Retrieve effected allocations 1959 iter, err := txn.Get("allocs", "deployment", req.DeploymentID) 1960 if err != nil { 1961 return err 1962 } 1963 1964 groupIndex := make(map[string]struct{}, len(req.Groups)) 1965 for _, g := range req.Groups { 1966 groupIndex[g] = struct{}{} 1967 } 1968 1969 canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups)) 1970 for _, state := range deployment.TaskGroups { 1971 for _, c := range state.PlacedCanaries { 1972 canaryIndex[c] = struct{}{} 1973 } 1974 } 1975 1976 haveCanaries := false 1977 var unhealthyErr multierror.Error 1978 for { 1979 raw := iter.Next() 1980 if raw == nil { 1981 break 1982 } 1983 1984 alloc := raw.(*structs.Allocation) 1985 1986 // Check that the alloc is a canary 1987 if _, ok := canaryIndex[alloc.ID]; !ok { 1988 continue 1989 } 1990 1991 // Check that the canary is part of a group being promoted 1992 if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok { 1993 continue 1994 } 1995 1996 // Ensure the canaries are healthy 1997 if !alloc.DeploymentStatus.IsHealthy() { 1998 multierror.Append(&unhealthyErr, fmt.Errorf("Canary allocation %q for group %q is not healthy", alloc.ID, alloc.TaskGroup)) 1999 continue 2000 } 2001 2002 haveCanaries = true 2003 } 2004 2005 if err := unhealthyErr.ErrorOrNil(); err != nil { 2006 return err 2007 } 2008 2009 if !haveCanaries { 2010 return fmt.Errorf("no canaries to promote") 2011 } 2012 2013 // Update deployment 2014 copy := deployment.Copy() 2015 copy.ModifyIndex = index 2016 for tg, status := range copy.TaskGroups { 2017 _, ok := groupIndex[tg] 2018 if !req.All && !ok { 2019 continue 2020 } 2021 2022 status.Promoted = true 2023 } 2024 2025 // If the deployment no longer needs promotion, update its status 2026 if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning { 2027 copy.StatusDescription = structs.DeploymentStatusDescriptionRunning 2028 } 2029 2030 // Insert the deployment 2031 if err := s.upsertDeploymentImpl(index, copy, txn); err != nil { 2032 return err 2033 } 2034 2035 // Upsert the optional eval 2036 if req.Eval != nil { 2037 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 2038 return err 2039 } 2040 } 2041 2042 txn.Commit() 2043 return nil 2044 } 2045 2046 // UpdateDeploymentAllocHealth is used to update the health of allocations as 2047 // part of the deployment and potentially make a evaluation 2048 func (s *StateStore) UpdateDeploymentAllocHealth(index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error { 2049 txn := s.db.Txn(true) 2050 defer txn.Abort() 2051 2052 // Retrieve deployment and ensure it is not terminal and is active 2053 ws := memdb.NewWatchSet() 2054 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 2055 if err != nil { 2056 return err 2057 } else if deployment == nil { 2058 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 2059 } else if !deployment.Active() { 2060 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 2061 } 2062 2063 // Update the health status of each allocation 2064 if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 { 2065 setAllocHealth := func(id string, healthy bool) error { 2066 existing, err := txn.First("allocs", "id", id) 2067 if err != nil { 2068 return fmt.Errorf("alloc %q lookup failed: %v", id, err) 2069 } 2070 if existing == nil { 2071 return fmt.Errorf("unknown alloc %q", id) 2072 } 2073 2074 old := existing.(*structs.Allocation) 2075 if old.DeploymentID != req.DeploymentID { 2076 return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID) 2077 } 2078 2079 // Set the health 2080 copy := old.Copy() 2081 if copy.DeploymentStatus == nil { 2082 copy.DeploymentStatus = &structs.AllocDeploymentStatus{} 2083 } 2084 copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy) 2085 copy.DeploymentStatus.ModifyIndex = index 2086 2087 if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil { 2088 return fmt.Errorf("error updating deployment: %v", err) 2089 } 2090 2091 if err := txn.Insert("allocs", copy); err != nil { 2092 return fmt.Errorf("alloc insert failed: %v", err) 2093 } 2094 2095 return nil 2096 } 2097 2098 for _, id := range req.HealthyAllocationIDs { 2099 if err := setAllocHealth(id, true); err != nil { 2100 return err 2101 } 2102 } 2103 for _, id := range req.UnhealthyAllocationIDs { 2104 if err := setAllocHealth(id, false); err != nil { 2105 return err 2106 } 2107 } 2108 2109 // Update the indexes 2110 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 2111 return fmt.Errorf("index update failed: %v", err) 2112 } 2113 } 2114 2115 // Update the deployment status as needed. 2116 if req.DeploymentUpdate != nil { 2117 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 2118 return err 2119 } 2120 } 2121 2122 // Upsert the job if necessary 2123 if req.Job != nil { 2124 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 2125 return err 2126 } 2127 } 2128 2129 // Upsert the optional eval 2130 if req.Eval != nil { 2131 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 2132 return err 2133 } 2134 } 2135 2136 txn.Commit() 2137 return nil 2138 } 2139 2140 // LastIndex returns the greatest index value for all indexes 2141 func (s *StateStore) LatestIndex() (uint64, error) { 2142 indexes, err := s.Indexes() 2143 if err != nil { 2144 return 0, err 2145 } 2146 2147 var max uint64 = 0 2148 for { 2149 raw := indexes.Next() 2150 if raw == nil { 2151 break 2152 } 2153 2154 // Prepare the request struct 2155 idx := raw.(*IndexEntry) 2156 2157 // Determine the max 2158 if idx.Value > max { 2159 max = idx.Value 2160 } 2161 } 2162 2163 return max, nil 2164 } 2165 2166 // Index finds the matching index value 2167 func (s *StateStore) Index(name string) (uint64, error) { 2168 txn := s.db.Txn(false) 2169 2170 // Lookup the first matching index 2171 out, err := txn.First("index", "id", name) 2172 if err != nil { 2173 return 0, err 2174 } 2175 if out == nil { 2176 return 0, nil 2177 } 2178 return out.(*IndexEntry).Value, nil 2179 } 2180 2181 // RemoveIndex is a helper method to remove an index for testing purposes 2182 func (s *StateStore) RemoveIndex(name string) error { 2183 txn := s.db.Txn(true) 2184 defer txn.Abort() 2185 2186 if _, err := txn.DeleteAll("index", "id", name); err != nil { 2187 return err 2188 } 2189 2190 txn.Commit() 2191 return nil 2192 } 2193 2194 // Indexes returns an iterator over all the indexes 2195 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 2196 txn := s.db.Txn(false) 2197 2198 // Walk the entire nodes table 2199 iter, err := txn.Get("index", "id") 2200 if err != nil { 2201 return nil, err 2202 } 2203 return iter, nil 2204 } 2205 2206 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 2207 // store 2208 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 2209 txn := s.db.Txn(true) 2210 defer txn.Abort() 2211 2212 // Get all the jobs 2213 iter, err := txn.Get("jobs", "id") 2214 if err != nil { 2215 return err 2216 } 2217 for { 2218 rawJob := iter.Next() 2219 if rawJob == nil { 2220 break 2221 } 2222 job := rawJob.(*structs.Job) 2223 2224 // Create a job summary for the job 2225 summary := &structs.JobSummary{ 2226 JobID: job.ID, 2227 Summary: make(map[string]structs.TaskGroupSummary), 2228 } 2229 for _, tg := range job.TaskGroups { 2230 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 2231 } 2232 2233 // Find all the allocations for the jobs 2234 iterAllocs, err := txn.Get("allocs", "job", job.ID) 2235 if err != nil { 2236 return err 2237 } 2238 2239 // Calculate the summary for the job 2240 for { 2241 rawAlloc := iterAllocs.Next() 2242 if rawAlloc == nil { 2243 break 2244 } 2245 alloc := rawAlloc.(*structs.Allocation) 2246 2247 // Ignore the allocation if it doesn't belong to the currently 2248 // registered job. The allocation is checked because of issue #2304 2249 if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex { 2250 continue 2251 } 2252 2253 tg := summary.Summary[alloc.TaskGroup] 2254 switch alloc.ClientStatus { 2255 case structs.AllocClientStatusFailed: 2256 tg.Failed += 1 2257 case structs.AllocClientStatusLost: 2258 tg.Lost += 1 2259 case structs.AllocClientStatusComplete: 2260 tg.Complete += 1 2261 case structs.AllocClientStatusRunning: 2262 tg.Running += 1 2263 case structs.AllocClientStatusPending: 2264 tg.Starting += 1 2265 default: 2266 s.logger.Printf("[ERR] state_store: invalid client status: %v in allocation %q", alloc.ClientStatus, alloc.ID) 2267 } 2268 summary.Summary[alloc.TaskGroup] = tg 2269 } 2270 2271 // Set the create index of the summary same as the job's create index 2272 // and the modify index to the current index 2273 summary.CreateIndex = job.CreateIndex 2274 summary.ModifyIndex = index 2275 2276 // Insert the job summary 2277 if err := txn.Insert("job_summary", summary); err != nil { 2278 return fmt.Errorf("error inserting job summary: %v", err) 2279 } 2280 } 2281 2282 // Update the indexes table for job summary 2283 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2284 return fmt.Errorf("index update failed: %v", err) 2285 } 2286 txn.Commit() 2287 return nil 2288 } 2289 2290 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 2291 // It takes a map of job IDs to an optional forceStatus string. It returns an 2292 // error if the job doesn't exist or setJobStatus fails. 2293 func (s *StateStore) setJobStatuses(index uint64, txn *memdb.Txn, 2294 jobs map[string]string, evalDelete bool) error { 2295 for job, forceStatus := range jobs { 2296 existing, err := txn.First("jobs", "id", job) 2297 if err != nil { 2298 return fmt.Errorf("job lookup failed: %v", err) 2299 } 2300 2301 if existing == nil { 2302 continue 2303 } 2304 2305 if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 2306 return err 2307 } 2308 } 2309 2310 return nil 2311 } 2312 2313 // setJobStatus sets the status of the job by looking up associated evaluations 2314 // and allocations. evalDelete should be set to true if setJobStatus is being 2315 // called because an evaluation is being deleted (potentially because of garbage 2316 // collection). If forceStatus is non-empty, the job's status will be set to the 2317 // passed status. 2318 func (s *StateStore) setJobStatus(index uint64, txn *memdb.Txn, 2319 job *structs.Job, evalDelete bool, forceStatus string) error { 2320 2321 // Capture the current status so we can check if there is a change 2322 oldStatus := job.Status 2323 if index == job.CreateIndex { 2324 oldStatus = "" 2325 } 2326 newStatus := forceStatus 2327 2328 // If forceStatus is not set, compute the jobs status. 2329 if forceStatus == "" { 2330 var err error 2331 newStatus, err = s.getJobStatus(txn, job, evalDelete) 2332 if err != nil { 2333 return err 2334 } 2335 } 2336 2337 // Fast-path if nothing has changed. 2338 if oldStatus == newStatus { 2339 return nil 2340 } 2341 2342 // Copy and update the existing job 2343 updated := job.Copy() 2344 updated.Status = newStatus 2345 updated.ModifyIndex = index 2346 2347 // Insert the job 2348 if err := txn.Insert("jobs", updated); err != nil { 2349 return fmt.Errorf("job insert failed: %v", err) 2350 } 2351 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 2352 return fmt.Errorf("index update failed: %v", err) 2353 } 2354 2355 // Update the children summary 2356 if updated.ParentID != "" { 2357 // Try to update the summary of the parent job summary 2358 summaryRaw, err := txn.First("job_summary", "id", updated.ParentID) 2359 if err != nil { 2360 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 2361 } 2362 2363 // Only continue if the summary exists. It could not exist if the parent 2364 // job was removed 2365 if summaryRaw != nil { 2366 existing := summaryRaw.(*structs.JobSummary) 2367 pSummary := existing.Copy() 2368 if pSummary.Children == nil { 2369 pSummary.Children = new(structs.JobChildrenSummary) 2370 } 2371 2372 // Determine the transistion and update the correct fields 2373 children := pSummary.Children 2374 2375 // Decrement old status 2376 if oldStatus != "" { 2377 switch oldStatus { 2378 case structs.JobStatusPending: 2379 children.Pending-- 2380 case structs.JobStatusRunning: 2381 children.Running-- 2382 case structs.JobStatusDead: 2383 children.Dead-- 2384 default: 2385 return fmt.Errorf("unknown old job status %q", oldStatus) 2386 } 2387 } 2388 2389 // Increment new status 2390 switch newStatus { 2391 case structs.JobStatusPending: 2392 children.Pending++ 2393 case structs.JobStatusRunning: 2394 children.Running++ 2395 case structs.JobStatusDead: 2396 children.Dead++ 2397 default: 2398 return fmt.Errorf("unknown new job status %q", newStatus) 2399 } 2400 2401 // Update the index 2402 pSummary.ModifyIndex = index 2403 2404 // Insert the summary 2405 if err := txn.Insert("job_summary", pSummary); err != nil { 2406 return fmt.Errorf("job summary insert failed: %v", err) 2407 } 2408 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2409 return fmt.Errorf("index update failed: %v", err) 2410 } 2411 } 2412 } 2413 2414 return nil 2415 } 2416 2417 func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) { 2418 allocs, err := txn.Get("allocs", "job", job.ID) 2419 if err != nil { 2420 return "", err 2421 } 2422 2423 // If there is a non-terminal allocation, the job is running. 2424 hasAlloc := false 2425 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 2426 hasAlloc = true 2427 if !alloc.(*structs.Allocation).TerminalStatus() { 2428 return structs.JobStatusRunning, nil 2429 } 2430 } 2431 2432 evals, err := txn.Get("evals", "job_prefix", job.ID) 2433 if err != nil { 2434 return "", err 2435 } 2436 2437 hasEval := false 2438 for raw := evals.Next(); raw != nil; raw = evals.Next() { 2439 e := raw.(*structs.Evaluation) 2440 2441 // Filter non-exact matches 2442 if e.JobID != job.ID { 2443 continue 2444 } 2445 2446 hasEval = true 2447 if !e.TerminalStatus() { 2448 return structs.JobStatusPending, nil 2449 } 2450 } 2451 2452 // system jobs are running until explicitly stopped (which is handled elsewhere) 2453 if job.Type == structs.JobTypeSystem { 2454 if job.Stop { 2455 return structs.JobStatusDead, nil 2456 } 2457 2458 // Pending until at least one eval has completed 2459 return structs.JobStatusRunning, nil 2460 } 2461 2462 // The job is dead if all the allocations and evals are terminal or if there 2463 // are no evals because of garbage collection. 2464 if evalDelete || hasEval || hasAlloc { 2465 return structs.JobStatusDead, nil 2466 } 2467 2468 // If there are no allocations or evaluations it is a new job. If the 2469 // job is periodic or is a parameterized job, we mark it as running as 2470 // it will never have an allocation/evaluation against it. 2471 if job.IsPeriodic() || job.IsParameterized() { 2472 // If the job is stopped mark it as dead 2473 if job.Stop { 2474 return structs.JobStatusDead, nil 2475 } 2476 2477 return structs.JobStatusRunning, nil 2478 } 2479 return structs.JobStatusPending, nil 2480 } 2481 2482 // updateSummaryWithJob creates or updates job summaries when new jobs are 2483 // upserted or existing ones are updated 2484 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 2485 txn *memdb.Txn) error { 2486 2487 // Update the job summary 2488 summaryRaw, err := txn.First("job_summary", "id", job.ID) 2489 if err != nil { 2490 return fmt.Errorf("job summary lookup failed: %v", err) 2491 } 2492 2493 // Get the summary or create if necessary 2494 var summary *structs.JobSummary 2495 hasSummaryChanged := false 2496 if summaryRaw != nil { 2497 summary = summaryRaw.(*structs.JobSummary).Copy() 2498 } else { 2499 summary = &structs.JobSummary{ 2500 JobID: job.ID, 2501 Summary: make(map[string]structs.TaskGroupSummary), 2502 Children: new(structs.JobChildrenSummary), 2503 CreateIndex: index, 2504 } 2505 hasSummaryChanged = true 2506 } 2507 2508 for _, tg := range job.TaskGroups { 2509 if _, ok := summary.Summary[tg.Name]; !ok { 2510 newSummary := structs.TaskGroupSummary{ 2511 Complete: 0, 2512 Failed: 0, 2513 Running: 0, 2514 Starting: 0, 2515 } 2516 summary.Summary[tg.Name] = newSummary 2517 hasSummaryChanged = true 2518 } 2519 } 2520 2521 // The job summary has changed, so update the modify index. 2522 if hasSummaryChanged { 2523 summary.ModifyIndex = index 2524 2525 // Update the indexes table for job summary 2526 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2527 return fmt.Errorf("index update failed: %v", err) 2528 } 2529 if err := txn.Insert("job_summary", summary); err != nil { 2530 return err 2531 } 2532 } 2533 2534 return nil 2535 } 2536 2537 // updateDeploymentWithAlloc is used to update the deployment state associated 2538 // with the given allocation. The passed alloc may be updated if the deployment 2539 // status has changed to capture the modify index at which it has changed. 2540 func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *memdb.Txn) error { 2541 // Nothing to do if the allocation is not associated with a deployment 2542 if alloc.DeploymentID == "" { 2543 return nil 2544 } 2545 2546 // Get the deployment 2547 ws := memdb.NewWatchSet() 2548 deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn) 2549 if err != nil { 2550 return err 2551 } 2552 if deployment == nil { 2553 return nil 2554 } 2555 2556 // Retrieve the deployment state object 2557 _, ok := deployment.TaskGroups[alloc.TaskGroup] 2558 if !ok { 2559 // If the task group isn't part of the deployment, the task group wasn't 2560 // part of a rolling update so nothing to do 2561 return nil 2562 } 2563 2564 // Do not modify in-place. Instead keep track of what must be done 2565 placed := 0 2566 healthy := 0 2567 unhealthy := 0 2568 2569 // If there was no existing allocation, this is a placement and we increment 2570 // the placement 2571 existingHealthSet := existing != nil && existing.DeploymentStatus != nil && existing.DeploymentStatus.Healthy != nil 2572 allocHealthSet := alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil 2573 if existing == nil || existing.DeploymentID != alloc.DeploymentID { 2574 placed++ 2575 } else if !existingHealthSet && allocHealthSet { 2576 if *alloc.DeploymentStatus.Healthy { 2577 healthy++ 2578 } else { 2579 unhealthy++ 2580 } 2581 } else if existingHealthSet && allocHealthSet { 2582 // See if it has gone from healthy to unhealthy 2583 if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy { 2584 healthy-- 2585 unhealthy++ 2586 } 2587 } 2588 2589 // Nothing to do 2590 if placed == 0 && healthy == 0 && unhealthy == 0 { 2591 return nil 2592 } 2593 2594 // Update the allocation's deployment status modify index 2595 if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 { 2596 alloc.DeploymentStatus.ModifyIndex = index 2597 } 2598 2599 // Create a copy of the deployment object 2600 deploymentCopy := deployment.Copy() 2601 deploymentCopy.ModifyIndex = index 2602 2603 state := deploymentCopy.TaskGroups[alloc.TaskGroup] 2604 state.PlacedAllocs += placed 2605 state.HealthyAllocs += healthy 2606 state.UnhealthyAllocs += unhealthy 2607 2608 // Upsert the deployment 2609 if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil { 2610 return err 2611 } 2612 2613 return nil 2614 } 2615 2616 // updateSummaryWithAlloc updates the job summary when allocations are updated 2617 // or inserted 2618 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 2619 existingAlloc *structs.Allocation, txn *memdb.Txn) error { 2620 2621 // We don't have to update the summary if the job is missing 2622 if alloc.Job == nil { 2623 return nil 2624 } 2625 2626 summaryRaw, err := txn.First("job_summary", "id", alloc.JobID) 2627 if err != nil { 2628 return fmt.Errorf("unable to lookup job summary for job id %q: %v", alloc.JobID, err) 2629 } 2630 2631 if summaryRaw == nil { 2632 // Check if the job is de-registered 2633 rawJob, err := txn.First("jobs", "id", alloc.JobID) 2634 if err != nil { 2635 return fmt.Errorf("unable to query job: %v", err) 2636 } 2637 2638 // If the job is de-registered then we skip updating it's summary 2639 if rawJob == nil { 2640 return nil 2641 } 2642 2643 return fmt.Errorf("job summary for job %q is not present", alloc.JobID) 2644 } 2645 2646 // Get a copy of the existing summary 2647 jobSummary := summaryRaw.(*structs.JobSummary).Copy() 2648 2649 // Not updating the job summary because the allocation doesn't belong to the 2650 // currently registered job 2651 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 2652 return nil 2653 } 2654 2655 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 2656 if !ok { 2657 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 2658 } 2659 2660 summaryChanged := false 2661 if existingAlloc == nil { 2662 switch alloc.DesiredStatus { 2663 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 2664 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 2665 alloc.ID, alloc.DesiredStatus) 2666 } 2667 switch alloc.ClientStatus { 2668 case structs.AllocClientStatusPending: 2669 tgSummary.Starting += 1 2670 if tgSummary.Queued > 0 { 2671 tgSummary.Queued -= 1 2672 } 2673 summaryChanged = true 2674 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 2675 structs.AllocClientStatusComplete: 2676 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 2677 alloc.ID, alloc.ClientStatus) 2678 } 2679 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 2680 // Incrementing the client of the bin of the current state 2681 switch alloc.ClientStatus { 2682 case structs.AllocClientStatusRunning: 2683 tgSummary.Running += 1 2684 case structs.AllocClientStatusFailed: 2685 tgSummary.Failed += 1 2686 case structs.AllocClientStatusPending: 2687 tgSummary.Starting += 1 2688 case structs.AllocClientStatusComplete: 2689 tgSummary.Complete += 1 2690 case structs.AllocClientStatusLost: 2691 tgSummary.Lost += 1 2692 } 2693 2694 // Decrementing the count of the bin of the last state 2695 switch existingAlloc.ClientStatus { 2696 case structs.AllocClientStatusRunning: 2697 tgSummary.Running -= 1 2698 case structs.AllocClientStatusPending: 2699 tgSummary.Starting -= 1 2700 case structs.AllocClientStatusLost: 2701 tgSummary.Lost -= 1 2702 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 2703 default: 2704 s.logger.Printf("[ERR] state_store: invalid old state of allocation with id: %v, and state: %v", 2705 existingAlloc.ID, existingAlloc.ClientStatus) 2706 } 2707 summaryChanged = true 2708 } 2709 jobSummary.Summary[alloc.TaskGroup] = tgSummary 2710 2711 if summaryChanged { 2712 jobSummary.ModifyIndex = index 2713 2714 // Update the indexes table for job summary 2715 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2716 return fmt.Errorf("index update failed: %v", err) 2717 } 2718 2719 if err := txn.Insert("job_summary", jobSummary); err != nil { 2720 return fmt.Errorf("updating job summary failed: %v", err) 2721 } 2722 } 2723 2724 return nil 2725 } 2726 2727 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 2728 func (s *StateStore) addEphemeralDiskToTaskGroups(job *structs.Job) { 2729 for _, tg := range job.TaskGroups { 2730 var diskMB int 2731 for _, task := range tg.Tasks { 2732 if task.Resources != nil { 2733 diskMB += task.Resources.DiskMB 2734 task.Resources.DiskMB = 0 2735 } 2736 } 2737 if tg.EphemeralDisk != nil { 2738 continue 2739 } 2740 tg.EphemeralDisk = &structs.EphemeralDisk{ 2741 SizeMB: diskMB, 2742 } 2743 } 2744 } 2745 2746 // StateSnapshot is used to provide a point-in-time snapshot 2747 type StateSnapshot struct { 2748 StateStore 2749 } 2750 2751 // StateRestore is used to optimize the performance when 2752 // restoring state by only using a single large transaction 2753 // instead of thousands of sub transactions 2754 type StateRestore struct { 2755 txn *memdb.Txn 2756 } 2757 2758 // Abort is used to abort the restore operation 2759 func (s *StateRestore) Abort() { 2760 s.txn.Abort() 2761 } 2762 2763 // Commit is used to commit the restore operation 2764 func (s *StateRestore) Commit() { 2765 s.txn.Commit() 2766 } 2767 2768 // NodeRestore is used to restore a node 2769 func (r *StateRestore) NodeRestore(node *structs.Node) error { 2770 if err := r.txn.Insert("nodes", node); err != nil { 2771 return fmt.Errorf("node insert failed: %v", err) 2772 } 2773 return nil 2774 } 2775 2776 // JobRestore is used to restore a job 2777 func (r *StateRestore) JobRestore(job *structs.Job) error { 2778 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 2779 // COMPAT 0.4.1 -> 0.5 2780 r.addEphemeralDiskToTaskGroups(job) 2781 2782 if err := r.txn.Insert("jobs", job); err != nil { 2783 return fmt.Errorf("job insert failed: %v", err) 2784 } 2785 return nil 2786 } 2787 2788 // EvalRestore is used to restore an evaluation 2789 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 2790 if err := r.txn.Insert("evals", eval); err != nil { 2791 return fmt.Errorf("eval insert failed: %v", err) 2792 } 2793 return nil 2794 } 2795 2796 // AllocRestore is used to restore an allocation 2797 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 2798 // Set the shared resources if it's not present 2799 // COMPAT 0.4.1 -> 0.5 2800 if alloc.SharedResources == nil { 2801 alloc.SharedResources = &structs.Resources{ 2802 DiskMB: alloc.Resources.DiskMB, 2803 } 2804 } 2805 2806 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 2807 if alloc.Job != nil { 2808 r.addEphemeralDiskToTaskGroups(alloc.Job) 2809 } 2810 2811 if err := r.txn.Insert("allocs", alloc); err != nil { 2812 return fmt.Errorf("alloc insert failed: %v", err) 2813 } 2814 return nil 2815 } 2816 2817 // IndexRestore is used to restore an index 2818 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 2819 if err := r.txn.Insert("index", idx); err != nil { 2820 return fmt.Errorf("index insert failed: %v", err) 2821 } 2822 return nil 2823 } 2824 2825 // PeriodicLaunchRestore is used to restore a periodic launch. 2826 func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { 2827 if err := r.txn.Insert("periodic_launch", launch); err != nil { 2828 return fmt.Errorf("periodic launch insert failed: %v", err) 2829 } 2830 return nil 2831 } 2832 2833 // JobSummaryRestore is used to restore a job summary 2834 func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error { 2835 if err := r.txn.Insert("job_summary", jobSummary); err != nil { 2836 return fmt.Errorf("job summary insert failed: %v", err) 2837 } 2838 return nil 2839 } 2840 2841 // JobVersionRestore is used to restore a job version 2842 func (r *StateRestore) JobVersionRestore(version *structs.Job) error { 2843 if err := r.txn.Insert("job_version", version); err != nil { 2844 return fmt.Errorf("job version insert failed: %v", err) 2845 } 2846 return nil 2847 } 2848 2849 // DeploymentRestore is used to restore a deployment 2850 func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error { 2851 if err := r.txn.Insert("deployment", deployment); err != nil { 2852 return fmt.Errorf("deployment insert failed: %v", err) 2853 } 2854 return nil 2855 } 2856 2857 // VaultAccessorRestore is used to restore a vault accessor 2858 func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error { 2859 if err := r.txn.Insert("vault_accessors", accessor); err != nil { 2860 return fmt.Errorf("vault accessor insert failed: %v", err) 2861 } 2862 return nil 2863 } 2864 2865 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 2866 func (r *StateRestore) addEphemeralDiskToTaskGroups(job *structs.Job) { 2867 for _, tg := range job.TaskGroups { 2868 if tg.EphemeralDisk != nil { 2869 continue 2870 } 2871 var sizeMB int 2872 for _, task := range tg.Tasks { 2873 if task.Resources != nil { 2874 sizeMB += task.Resources.DiskMB 2875 task.Resources.DiskMB = 0 2876 } 2877 } 2878 tg.EphemeralDisk = &structs.EphemeralDisk{ 2879 SizeMB: sizeMB, 2880 } 2881 } 2882 }