github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "log" 8 "sort" 9 10 "github.com/hashicorp/go-memdb" 11 multierror "github.com/hashicorp/go-multierror" 12 "github.com/hashicorp/nomad/helper" 13 "github.com/hashicorp/nomad/nomad/structs" 14 ) 15 16 // IndexEntry is used with the "index" table 17 // for managing the latest Raft index affecting a table. 18 type IndexEntry struct { 19 Key string 20 Value uint64 21 } 22 23 // StateStoreConfig is used to configure a new state store 24 type StateStoreConfig struct { 25 // LogOutput is used to configure the output of the state store's logs 26 LogOutput io.Writer 27 28 // Region is the region of the server embedding the state store. 29 Region string 30 } 31 32 // The StateStore is responsible for maintaining all the Nomad 33 // state. It is manipulated by the FSM which maintains consistency 34 // through the use of Raft. The goals of the StateStore are to provide 35 // high concurrency for read operations without blocking writes, and 36 // to provide write availability in the face of reads. EVERY object 37 // returned as a result of a read against the state store should be 38 // considered a constant and NEVER modified in place. 39 type StateStore struct { 40 logger *log.Logger 41 db *memdb.MemDB 42 43 // config is the passed in configuration 44 config *StateStoreConfig 45 46 // abandonCh is used to signal watchers that this state store has been 47 // abandoned (usually during a restore). This is only ever closed. 48 abandonCh chan struct{} 49 } 50 51 // NewStateStore is used to create a new state store 52 func NewStateStore(config *StateStoreConfig) (*StateStore, error) { 53 // Create the MemDB 54 db, err := memdb.NewMemDB(stateStoreSchema()) 55 if err != nil { 56 return nil, fmt.Errorf("state store setup failed: %v", err) 57 } 58 59 // Create the state store 60 s := &StateStore{ 61 logger: log.New(config.LogOutput, "", log.LstdFlags), 62 db: db, 63 config: config, 64 abandonCh: make(chan struct{}), 65 } 66 return s, nil 67 } 68 69 // Config returns the state store configuration. 70 func (s *StateStore) Config() *StateStoreConfig { 71 return s.config 72 } 73 74 // Snapshot is used to create a point in time snapshot. Because 75 // we use MemDB, we just need to snapshot the state of the underlying 76 // database. 77 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 78 snap := &StateSnapshot{ 79 StateStore: StateStore{ 80 logger: s.logger, 81 config: s.config, 82 db: s.db.Snapshot(), 83 }, 84 } 85 return snap, nil 86 } 87 88 // Restore is used to optimize the efficiency of rebuilding 89 // state by minimizing the number of transactions and checking 90 // overhead. 91 func (s *StateStore) Restore() (*StateRestore, error) { 92 txn := s.db.Txn(true) 93 r := &StateRestore{ 94 txn: txn, 95 } 96 return r, nil 97 } 98 99 // AbandonCh returns a channel you can wait on to know if the state store was 100 // abandoned. 101 func (s *StateStore) AbandonCh() <-chan struct{} { 102 return s.abandonCh 103 } 104 105 // Abandon is used to signal that the given state store has been abandoned. 106 // Calling this more than one time will panic. 107 func (s *StateStore) Abandon() { 108 close(s.abandonCh) 109 } 110 111 // QueryFn is the definition of a function that can be used to implement a basic 112 // blocking query against the state store. 113 type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error) 114 115 // BlockingQuery takes a query function and runs the function until the minimum 116 // query index is met or until the passed context is cancelled. 117 func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) ( 118 resp interface{}, index uint64, err error) { 119 120 RUN_QUERY: 121 // We capture the state store and its abandon channel but pass a snapshot to 122 // the blocking query function. We operate on the snapshot to allow separate 123 // calls to the state store not all wrapped within the same transaction. 124 abandonCh := s.AbandonCh() 125 snap, _ := s.Snapshot() 126 stateSnap := &snap.StateStore 127 128 // We can skip all watch tracking if this isn't a blocking query. 129 var ws memdb.WatchSet 130 if minIndex > 0 { 131 ws = memdb.NewWatchSet() 132 133 // This channel will be closed if a snapshot is restored and the 134 // whole state store is abandoned. 135 ws.Add(abandonCh) 136 } 137 138 resp, index, err = query(ws, stateSnap) 139 if err != nil { 140 return nil, index, err 141 } 142 143 // We haven't reached the min-index yet. 144 if minIndex > 0 && index <= minIndex { 145 if err := ws.WatchCtx(ctx); err != nil { 146 return nil, index, err 147 } 148 149 goto RUN_QUERY 150 } 151 152 return resp, index, nil 153 } 154 155 // UpsertPlanResults is used to upsert the results of a plan. 156 func (s *StateStore) UpsertPlanResults(index uint64, results *structs.ApplyPlanResultsRequest) error { 157 txn := s.db.Txn(true) 158 defer txn.Abort() 159 160 // Upsert the newly created or updated deployment 161 if results.Deployment != nil { 162 if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil { 163 return err 164 } 165 } 166 167 // Update the status of deployments effected by the plan. 168 if len(results.DeploymentUpdates) != 0 { 169 s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn) 170 } 171 172 // Attach the job to all the allocations. It is pulled out in the payload to 173 // avoid the redundancy of encoding, but should be denormalized prior to 174 // being inserted into MemDB. 175 structs.DenormalizeAllocationJobs(results.Job, results.Alloc) 176 177 // Calculate the total resources of allocations. It is pulled out in the 178 // payload to avoid encoding something that can be computed, but should be 179 // denormalized prior to being inserted into MemDB. 180 for _, alloc := range results.Alloc { 181 if alloc.Resources != nil { 182 continue 183 } 184 185 alloc.Resources = new(structs.Resources) 186 for _, task := range alloc.TaskResources { 187 alloc.Resources.Add(task) 188 } 189 190 // Add the shared resources 191 alloc.Resources.Add(alloc.SharedResources) 192 } 193 194 // Upsert the allocations 195 if err := s.upsertAllocsImpl(index, results.Alloc, txn); err != nil { 196 return err 197 } 198 199 // COMPAT: Nomad versions before 0.7.1 did not include the eval ID when 200 // applying the plan. Thus while we are upgrading, we ignore updating the 201 // modify index of evaluations from older plans. 202 if results.EvalID != "" { 203 // Update the modify index of the eval id 204 if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil { 205 return err 206 } 207 } 208 209 txn.Commit() 210 return nil 211 } 212 213 // upsertDeploymentUpdates updates the deployments given the passed status 214 // updates. 215 func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *memdb.Txn) error { 216 for _, u := range updates { 217 if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil { 218 return err 219 } 220 } 221 222 return nil 223 } 224 225 // UpsertJobSummary upserts a job summary into the state store. 226 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 227 txn := s.db.Txn(true) 228 defer txn.Abort() 229 230 // COMPAT 0.7: Upgrade old objects that do not have namespaces 231 if jobSummary.Namespace == "" { 232 jobSummary.Namespace = structs.DefaultNamespace 233 } 234 235 // Check if the job summary already exists 236 existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID) 237 if err != nil { 238 return fmt.Errorf("job summary lookup failed: %v", err) 239 } 240 241 // Setup the indexes correctly 242 if existing != nil { 243 jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex 244 jobSummary.ModifyIndex = index 245 } else { 246 jobSummary.CreateIndex = index 247 jobSummary.ModifyIndex = index 248 } 249 250 // Update the index 251 if err := txn.Insert("job_summary", jobSummary); err != nil { 252 return err 253 } 254 255 // Update the indexes table for job summary 256 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 257 return fmt.Errorf("index update failed: %v", err) 258 } 259 260 txn.Commit() 261 return nil 262 } 263 264 // DeleteJobSummary deletes the job summary with the given ID. This is for 265 // testing purposes only. 266 func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error { 267 txn := s.db.Txn(true) 268 defer txn.Abort() 269 270 // COMPAT 0.7: Upgrade old objects that do not have namespaces 271 if namespace == "" { 272 namespace = structs.DefaultNamespace 273 } 274 275 // Delete the job summary 276 if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil { 277 return fmt.Errorf("deleting job summary failed: %v", err) 278 } 279 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 280 return fmt.Errorf("index update failed: %v", err) 281 } 282 txn.Commit() 283 return nil 284 } 285 286 // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to 287 // true, all prior deployments for the same job will be cancelled. 288 func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error { 289 txn := s.db.Txn(true) 290 defer txn.Abort() 291 if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil { 292 return err 293 } 294 txn.Commit() 295 return nil 296 } 297 298 func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *memdb.Txn) error { 299 // Check if the deployment already exists 300 existing, err := txn.First("deployment", "id", deployment.ID) 301 if err != nil { 302 return fmt.Errorf("deployment lookup failed: %v", err) 303 } 304 305 // COMPAT 0.7: Upgrade old objects that do not have namespaces 306 if deployment.Namespace == "" { 307 deployment.Namespace = structs.DefaultNamespace 308 } 309 310 // Setup the indexes correctly 311 if existing != nil { 312 deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex 313 deployment.ModifyIndex = index 314 } else { 315 deployment.CreateIndex = index 316 deployment.ModifyIndex = index 317 } 318 319 // Insert the deployment 320 if err := txn.Insert("deployment", deployment); err != nil { 321 return err 322 } 323 324 // Update the indexes table for deployment 325 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 326 return fmt.Errorf("index update failed: %v", err) 327 } 328 329 // If the deployment is being marked as complete, set the job to stable. 330 if deployment.Status == structs.DeploymentStatusSuccessful { 331 if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil { 332 return fmt.Errorf("failed to update job stability: %v", err) 333 } 334 } 335 336 return nil 337 } 338 339 func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) { 340 txn := s.db.Txn(false) 341 342 // Walk the entire deployments table 343 iter, err := txn.Get("deployment", "id") 344 if err != nil { 345 return nil, err 346 } 347 348 ws.Add(iter.WatchCh()) 349 return iter, nil 350 } 351 352 func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 353 txn := s.db.Txn(false) 354 355 // Walk the entire deployments table 356 iter, err := txn.Get("deployment", "namespace", namespace) 357 if err != nil { 358 return nil, err 359 } 360 361 ws.Add(iter.WatchCh()) 362 return iter, nil 363 } 364 365 func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string) (memdb.ResultIterator, error) { 366 txn := s.db.Txn(false) 367 368 // Walk the entire deployments table 369 iter, err := txn.Get("deployment", "id_prefix", deploymentID) 370 if err != nil { 371 return nil, err 372 } 373 374 ws.Add(iter.WatchCh()) 375 376 // Wrap the iterator in a filter 377 wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace)) 378 return wrap, nil 379 } 380 381 // deploymentNamespaceFilter returns a filter function that filters all 382 // deployment not in the given namespace. 383 func deploymentNamespaceFilter(namespace string) func(interface{}) bool { 384 return func(raw interface{}) bool { 385 d, ok := raw.(*structs.Deployment) 386 if !ok { 387 return true 388 } 389 390 return d.Namespace != namespace 391 } 392 } 393 394 func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) { 395 txn := s.db.Txn(false) 396 return s.deploymentByIDImpl(ws, deploymentID, txn) 397 } 398 399 func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *memdb.Txn) (*structs.Deployment, error) { 400 watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID) 401 if err != nil { 402 return nil, fmt.Errorf("deployment lookup failed: %v", err) 403 } 404 ws.Add(watchCh) 405 406 if existing != nil { 407 return existing.(*structs.Deployment), nil 408 } 409 410 return nil, nil 411 } 412 413 func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Deployment, error) { 414 txn := s.db.Txn(false) 415 416 // COMPAT 0.7: Upgrade old objects that do not have namespaces 417 if namespace == "" { 418 namespace = structs.DefaultNamespace 419 } 420 421 // Get an iterator over the deployments 422 iter, err := txn.Get("deployment", "job", namespace, jobID) 423 if err != nil { 424 return nil, err 425 } 426 427 ws.Add(iter.WatchCh()) 428 429 var out []*structs.Deployment 430 for { 431 raw := iter.Next() 432 if raw == nil { 433 break 434 } 435 436 d := raw.(*structs.Deployment) 437 out = append(out, d) 438 } 439 440 return out, nil 441 } 442 443 // LatestDeploymentByJobID returns the latest deployment for the given job. The 444 // latest is determined strictly by CreateIndex. 445 func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) { 446 txn := s.db.Txn(false) 447 448 // COMPAT 0.7: Upgrade old objects that do not have namespaces 449 if namespace == "" { 450 namespace = structs.DefaultNamespace 451 } 452 453 // Get an iterator over the deployments 454 iter, err := txn.Get("deployment", "job", namespace, jobID) 455 if err != nil { 456 return nil, err 457 } 458 459 ws.Add(iter.WatchCh()) 460 461 var out *structs.Deployment 462 for { 463 raw := iter.Next() 464 if raw == nil { 465 break 466 } 467 468 d := raw.(*structs.Deployment) 469 if out == nil || out.CreateIndex < d.CreateIndex { 470 out = d 471 } 472 } 473 474 return out, nil 475 } 476 477 // DeleteDeployment is used to delete a set of deployments by ID 478 func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error { 479 txn := s.db.Txn(true) 480 defer txn.Abort() 481 482 if len(deploymentIDs) == 0 { 483 return nil 484 } 485 486 for _, deploymentID := range deploymentIDs { 487 // Lookup the deployment 488 existing, err := txn.First("deployment", "id", deploymentID) 489 if err != nil { 490 return fmt.Errorf("deployment lookup failed: %v", err) 491 } 492 if existing == nil { 493 return fmt.Errorf("deployment not found") 494 } 495 496 // Delete the deployment 497 if err := txn.Delete("deployment", existing); err != nil { 498 return fmt.Errorf("deployment delete failed: %v", err) 499 } 500 } 501 502 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 503 return fmt.Errorf("index update failed: %v", err) 504 } 505 506 txn.Commit() 507 return nil 508 } 509 510 // UpsertNode is used to register a node or update a node definition 511 // This is assumed to be triggered by the client, so we retain the value 512 // of drain which is set by the scheduler. 513 func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { 514 txn := s.db.Txn(true) 515 defer txn.Abort() 516 517 // Check if the node already exists 518 existing, err := txn.First("nodes", "id", node.ID) 519 if err != nil { 520 return fmt.Errorf("node lookup failed: %v", err) 521 } 522 523 // Setup the indexes correctly 524 if existing != nil { 525 exist := existing.(*structs.Node) 526 node.CreateIndex = exist.CreateIndex 527 node.ModifyIndex = index 528 node.Drain = exist.Drain // Retain the drain mode 529 } else { 530 node.CreateIndex = index 531 node.ModifyIndex = index 532 } 533 534 // Insert the node 535 if err := txn.Insert("nodes", node); err != nil { 536 return fmt.Errorf("node insert failed: %v", err) 537 } 538 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 539 return fmt.Errorf("index update failed: %v", err) 540 } 541 542 txn.Commit() 543 return nil 544 } 545 546 // DeleteNode is used to deregister a node 547 func (s *StateStore) DeleteNode(index uint64, nodeID string) error { 548 txn := s.db.Txn(true) 549 defer txn.Abort() 550 551 // Lookup the node 552 existing, err := txn.First("nodes", "id", nodeID) 553 if err != nil { 554 return fmt.Errorf("node lookup failed: %v", err) 555 } 556 if existing == nil { 557 return fmt.Errorf("node not found") 558 } 559 560 // Delete the node 561 if err := txn.Delete("nodes", existing); err != nil { 562 return fmt.Errorf("node delete failed: %v", err) 563 } 564 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 565 return fmt.Errorf("index update failed: %v", err) 566 } 567 568 txn.Commit() 569 return nil 570 } 571 572 // UpdateNodeStatus is used to update the status of a node 573 func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error { 574 txn := s.db.Txn(true) 575 defer txn.Abort() 576 577 // Lookup the node 578 existing, err := txn.First("nodes", "id", nodeID) 579 if err != nil { 580 return fmt.Errorf("node lookup failed: %v", err) 581 } 582 if existing == nil { 583 return fmt.Errorf("node not found") 584 } 585 586 // Copy the existing node 587 existingNode := existing.(*structs.Node) 588 copyNode := new(structs.Node) 589 *copyNode = *existingNode 590 591 // Update the status in the copy 592 copyNode.Status = status 593 copyNode.ModifyIndex = index 594 595 // Insert the node 596 if err := txn.Insert("nodes", copyNode); err != nil { 597 return fmt.Errorf("node update failed: %v", err) 598 } 599 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 600 return fmt.Errorf("index update failed: %v", err) 601 } 602 603 txn.Commit() 604 return nil 605 } 606 607 // UpdateNodeDrain is used to update the drain of a node 608 func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error { 609 txn := s.db.Txn(true) 610 defer txn.Abort() 611 612 // Lookup the node 613 existing, err := txn.First("nodes", "id", nodeID) 614 if err != nil { 615 return fmt.Errorf("node lookup failed: %v", err) 616 } 617 if existing == nil { 618 return fmt.Errorf("node not found") 619 } 620 621 // Copy the existing node 622 existingNode := existing.(*structs.Node) 623 copyNode := new(structs.Node) 624 *copyNode = *existingNode 625 626 // Update the drain in the copy 627 copyNode.Drain = drain 628 copyNode.ModifyIndex = index 629 630 // Insert the node 631 if err := txn.Insert("nodes", copyNode); err != nil { 632 return fmt.Errorf("node update failed: %v", err) 633 } 634 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 635 return fmt.Errorf("index update failed: %v", err) 636 } 637 638 txn.Commit() 639 return nil 640 } 641 642 // NodeByID is used to lookup a node by ID 643 func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) { 644 txn := s.db.Txn(false) 645 646 watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID) 647 if err != nil { 648 return nil, fmt.Errorf("node lookup failed: %v", err) 649 } 650 ws.Add(watchCh) 651 652 if existing != nil { 653 return existing.(*structs.Node), nil 654 } 655 return nil, nil 656 } 657 658 // NodesByIDPrefix is used to lookup nodes by prefix 659 func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 660 txn := s.db.Txn(false) 661 662 iter, err := txn.Get("nodes", "id_prefix", nodeID) 663 if err != nil { 664 return nil, fmt.Errorf("node lookup failed: %v", err) 665 } 666 ws.Add(iter.WatchCh()) 667 668 return iter, nil 669 } 670 671 // NodeBySecretID is used to lookup a node by SecretID 672 func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) { 673 txn := s.db.Txn(false) 674 675 watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID) 676 if err != nil { 677 return nil, fmt.Errorf("node lookup by SecretID failed: %v", err) 678 } 679 ws.Add(watchCh) 680 681 if existing != nil { 682 return existing.(*structs.Node), nil 683 } 684 return nil, nil 685 } 686 687 // Nodes returns an iterator over all the nodes 688 func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 689 txn := s.db.Txn(false) 690 691 // Walk the entire nodes table 692 iter, err := txn.Get("nodes", "id") 693 if err != nil { 694 return nil, err 695 } 696 ws.Add(iter.WatchCh()) 697 return iter, nil 698 } 699 700 // UpsertJob is used to register a job or update a job definition 701 func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { 702 txn := s.db.Txn(true) 703 defer txn.Abort() 704 if err := s.upsertJobImpl(index, job, false, txn); err != nil { 705 return err 706 } 707 txn.Commit() 708 return nil 709 } 710 711 // upsertJobImpl is the implementation for registering a job or updating a job definition 712 func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *memdb.Txn) error { 713 // COMPAT 0.7: Upgrade old objects that do not have namespaces 714 if job.Namespace == "" { 715 job.Namespace = structs.DefaultNamespace 716 } 717 718 // Assert the namespace exists 719 if exists, err := s.namespaceExists(txn, job.Namespace); err != nil { 720 return err 721 } else if !exists { 722 return fmt.Errorf("job %q is in non-existent namespace %q", job.ID, job.Namespace) 723 } 724 725 // Check if the job already exists 726 existing, err := txn.First("jobs", "id", job.Namespace, job.ID) 727 if err != nil { 728 return fmt.Errorf("job lookup failed: %v", err) 729 } 730 731 // Setup the indexes correctly 732 if existing != nil { 733 job.CreateIndex = existing.(*structs.Job).CreateIndex 734 job.ModifyIndex = index 735 736 // Bump the version unless asked to keep it. This should only be done 737 // when changing an internal field such as Stable. A spec change should 738 // always come with a version bump 739 if !keepVersion { 740 job.JobModifyIndex = index 741 job.Version = existing.(*structs.Job).Version + 1 742 } 743 744 // Compute the job status 745 var err error 746 job.Status, err = s.getJobStatus(txn, job, false) 747 if err != nil { 748 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 749 } 750 } else { 751 job.CreateIndex = index 752 job.ModifyIndex = index 753 job.JobModifyIndex = index 754 job.Version = 0 755 756 if err := s.setJobStatus(index, txn, job, false, ""); err != nil { 757 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 758 } 759 760 // Have to get the job again since it could have been updated 761 updated, err := txn.First("jobs", "id", job.Namespace, job.ID) 762 if err != nil { 763 return fmt.Errorf("job lookup failed: %v", err) 764 } 765 if updated != nil { 766 job = updated.(*structs.Job) 767 } 768 } 769 770 if err := s.updateSummaryWithJob(index, job, txn); err != nil { 771 return fmt.Errorf("unable to create job summary: %v", err) 772 } 773 774 if err := s.upsertJobVersion(index, job, txn); err != nil { 775 return fmt.Errorf("unable to upsert job into job_version table: %v", err) 776 } 777 778 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 779 // COMPAT 0.4.1 -> 0.5 780 s.addEphemeralDiskToTaskGroups(job) 781 782 // Insert the job 783 if err := txn.Insert("jobs", job); err != nil { 784 return fmt.Errorf("job insert failed: %v", err) 785 } 786 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 787 return fmt.Errorf("index update failed: %v", err) 788 } 789 790 return nil 791 } 792 793 // DeleteJob is used to deregister a job 794 func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error { 795 txn := s.db.Txn(true) 796 defer txn.Abort() 797 798 // COMPAT 0.7: Upgrade old objects that do not have namespaces 799 if namespace == "" { 800 namespace = structs.DefaultNamespace 801 } 802 803 // Lookup the node 804 existing, err := txn.First("jobs", "id", namespace, jobID) 805 if err != nil { 806 return fmt.Errorf("job lookup failed: %v", err) 807 } 808 if existing == nil { 809 return fmt.Errorf("job not found") 810 } 811 812 // Check if we should update a parent job summary 813 job := existing.(*structs.Job) 814 if job.ParentID != "" { 815 summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID) 816 if err != nil { 817 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 818 } 819 820 // Only continue if the summary exists. It could not exist if the parent 821 // job was removed 822 if summaryRaw != nil { 823 existing := summaryRaw.(*structs.JobSummary) 824 pSummary := existing.Copy() 825 if pSummary.Children != nil { 826 827 modified := false 828 switch job.Status { 829 case structs.JobStatusPending: 830 pSummary.Children.Pending-- 831 pSummary.Children.Dead++ 832 modified = true 833 case structs.JobStatusRunning: 834 pSummary.Children.Running-- 835 pSummary.Children.Dead++ 836 modified = true 837 case structs.JobStatusDead: 838 default: 839 return fmt.Errorf("unknown old job status %q", job.Status) 840 } 841 842 if modified { 843 // Update the modify index 844 pSummary.ModifyIndex = index 845 846 // COMPAT 0.7: Upgrade old objects that do not have namespaces 847 if pSummary.Namespace == "" { 848 pSummary.Namespace = structs.DefaultNamespace 849 } 850 851 // Insert the summary 852 if err := txn.Insert("job_summary", pSummary); err != nil { 853 return fmt.Errorf("job summary insert failed: %v", err) 854 } 855 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 856 return fmt.Errorf("index update failed: %v", err) 857 } 858 } 859 } 860 } 861 } 862 863 // Delete the job 864 if err := txn.Delete("jobs", existing); err != nil { 865 return fmt.Errorf("job delete failed: %v", err) 866 } 867 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 868 return fmt.Errorf("index update failed: %v", err) 869 } 870 871 // Delete the job versions 872 if err := s.deleteJobVersions(index, job, txn); err != nil { 873 return err 874 } 875 876 // Delete the job summary 877 if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil { 878 return fmt.Errorf("deleing job summary failed: %v", err) 879 } 880 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 881 return fmt.Errorf("index update failed: %v", err) 882 } 883 884 txn.Commit() 885 return nil 886 } 887 888 // deleteJobVersions deletes all versions of the given job. 889 func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error { 890 // COMPAT 0.7: Upgrade old objects that do not have namespaces 891 if job.Namespace == "" { 892 job.Namespace = structs.DefaultNamespace 893 } 894 895 iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID) 896 if err != nil { 897 return err 898 } 899 900 for { 901 raw := iter.Next() 902 if raw == nil { 903 break 904 } 905 906 // Ensure the ID is an exact match 907 j := raw.(*structs.Job) 908 if j.ID != job.ID { 909 continue 910 } 911 912 if _, err = txn.DeleteAll("job_version", "id", j.Namespace, j.ID, j.Version); err != nil { 913 return fmt.Errorf("deleting job versions failed: %v", err) 914 } 915 } 916 917 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 918 return fmt.Errorf("index update failed: %v", err) 919 } 920 921 return nil 922 } 923 924 // upsertJobVersion inserts a job into its historic version table and limits the 925 // number of job versions that are tracked. 926 func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *memdb.Txn) error { 927 // COMPAT 0.7: Upgrade old objects that do not have namespaces 928 if job.Namespace == "" { 929 job.Namespace = structs.DefaultNamespace 930 } 931 932 // Insert the job 933 if err := txn.Insert("job_version", job); err != nil { 934 return fmt.Errorf("failed to insert job into job_version table: %v", err) 935 } 936 937 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 938 return fmt.Errorf("index update failed: %v", err) 939 } 940 941 // Get all the historic jobs for this ID 942 all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID) 943 if err != nil { 944 return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err) 945 } 946 947 // If we are below the limit there is no GCing to be done 948 if len(all) <= structs.JobTrackedVersions { 949 return nil 950 } 951 952 // We have to delete a historic job to make room. 953 // Find index of the highest versioned stable job 954 stableIdx := -1 955 for i, j := range all { 956 if j.Stable { 957 stableIdx = i 958 break 959 } 960 } 961 962 // If the stable job is the oldest version, do a swap to bring it into the 963 // keep set. 964 max := structs.JobTrackedVersions 965 if stableIdx == max { 966 all[max-1], all[max] = all[max], all[max-1] 967 } 968 969 // Delete the job outside of the set that are being kept. 970 d := all[max] 971 if err := txn.Delete("job_version", d); err != nil { 972 return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version) 973 } 974 975 return nil 976 } 977 978 // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job 979 // version. 980 func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) { 981 txn := s.db.Txn(false) 982 983 // COMPAT 0.7: Upgrade old objects that do not have namespaces 984 if namespace == "" { 985 namespace = structs.DefaultNamespace 986 } 987 988 watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id) 989 if err != nil { 990 return nil, fmt.Errorf("job lookup failed: %v", err) 991 } 992 ws.Add(watchCh) 993 994 if existing != nil { 995 return existing.(*structs.Job), nil 996 } 997 return nil, nil 998 } 999 1000 // JobsByIDPrefix is used to lookup a job by prefix 1001 func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1002 txn := s.db.Txn(false) 1003 1004 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1005 if namespace == "" { 1006 namespace = structs.DefaultNamespace 1007 } 1008 1009 iter, err := txn.Get("jobs", "id_prefix", namespace, id) 1010 if err != nil { 1011 return nil, fmt.Errorf("job lookup failed: %v", err) 1012 } 1013 1014 ws.Add(iter.WatchCh()) 1015 1016 return iter, nil 1017 } 1018 1019 // JobVersionsByID returns all the tracked versions of a job. 1020 func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 1021 txn := s.db.Txn(false) 1022 1023 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1024 if namespace == "" { 1025 namespace = structs.DefaultNamespace 1026 } 1027 1028 return s.jobVersionByID(txn, &ws, namespace, id) 1029 } 1030 1031 // jobVersionByID is the underlying implementation for retrieving all tracked 1032 // versions of a job and is called under an existing transaction. A watch set 1033 // can optionally be passed in to add the job histories to the watch set. 1034 func (s *StateStore) jobVersionByID(txn *memdb.Txn, ws *memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 1035 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1036 if namespace == "" { 1037 namespace = structs.DefaultNamespace 1038 } 1039 1040 // Get all the historic jobs for this ID 1041 iter, err := txn.Get("job_version", "id_prefix", namespace, id) 1042 if err != nil { 1043 return nil, err 1044 } 1045 1046 if ws != nil { 1047 ws.Add(iter.WatchCh()) 1048 } 1049 1050 var all []*structs.Job 1051 for { 1052 raw := iter.Next() 1053 if raw == nil { 1054 break 1055 } 1056 1057 // Ensure the ID is an exact match 1058 j := raw.(*structs.Job) 1059 if j.ID != id { 1060 continue 1061 } 1062 1063 all = append(all, j) 1064 } 1065 1066 // Sort in reverse order so that the highest version is first 1067 sort.Slice(all, func(i, j int) bool { 1068 return all[i].Version > all[j].Version 1069 }) 1070 1071 return all, nil 1072 } 1073 1074 // JobByIDAndVersion returns the job identified by its ID and Version. The 1075 // passed watchset may be nil. 1076 func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) { 1077 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1078 if namespace == "" { 1079 namespace = structs.DefaultNamespace 1080 } 1081 txn := s.db.Txn(false) 1082 return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn) 1083 } 1084 1085 // jobByIDAndVersionImpl returns the job identified by its ID and Version. The 1086 // passed watchset may be nil. 1087 func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string, 1088 version uint64, txn *memdb.Txn) (*structs.Job, error) { 1089 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1090 if namespace == "" { 1091 namespace = structs.DefaultNamespace 1092 } 1093 1094 watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version) 1095 if err != nil { 1096 return nil, err 1097 } 1098 1099 if ws != nil { 1100 ws.Add(watchCh) 1101 } 1102 1103 if existing != nil { 1104 job := existing.(*structs.Job) 1105 return job, nil 1106 } 1107 1108 return nil, nil 1109 } 1110 1111 func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1112 txn := s.db.Txn(false) 1113 1114 // Walk the entire deployments table 1115 iter, err := txn.Get("job_version", "id") 1116 if err != nil { 1117 return nil, err 1118 } 1119 1120 ws.Add(iter.WatchCh()) 1121 return iter, nil 1122 } 1123 1124 // Jobs returns an iterator over all the jobs 1125 func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1126 txn := s.db.Txn(false) 1127 1128 // Walk the entire jobs table 1129 iter, err := txn.Get("jobs", "id") 1130 if err != nil { 1131 return nil, err 1132 } 1133 1134 ws.Add(iter.WatchCh()) 1135 1136 return iter, nil 1137 } 1138 1139 // JobsByNamespace returns an iterator over all the jobs for the given namespace 1140 func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 1141 txn := s.db.Txn(false) 1142 return s.jobsByNamespaceImpl(ws, namespace, txn) 1143 } 1144 1145 // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace 1146 func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *memdb.Txn) (memdb.ResultIterator, error) { 1147 // Walk the entire jobs table 1148 iter, err := txn.Get("jobs", "id_prefix", namespace, "") 1149 if err != nil { 1150 return nil, err 1151 } 1152 1153 ws.Add(iter.WatchCh()) 1154 1155 return iter, nil 1156 } 1157 1158 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 1159 func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) { 1160 txn := s.db.Txn(false) 1161 1162 iter, err := txn.Get("jobs", "periodic", periodic) 1163 if err != nil { 1164 return nil, err 1165 } 1166 1167 ws.Add(iter.WatchCh()) 1168 1169 return iter, nil 1170 } 1171 1172 // JobsByScheduler returns an iterator over all the jobs with the specific 1173 // scheduler type. 1174 func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) { 1175 txn := s.db.Txn(false) 1176 1177 // Return an iterator for jobs with the specific type. 1178 iter, err := txn.Get("jobs", "type", schedulerType) 1179 if err != nil { 1180 return nil, err 1181 } 1182 1183 ws.Add(iter.WatchCh()) 1184 1185 return iter, nil 1186 } 1187 1188 // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage 1189 // collection. 1190 func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) { 1191 txn := s.db.Txn(false) 1192 1193 iter, err := txn.Get("jobs", "gc", gc) 1194 if err != nil { 1195 return nil, err 1196 } 1197 1198 ws.Add(iter.WatchCh()) 1199 1200 return iter, nil 1201 } 1202 1203 // JobSummary returns a job summary object which matches a specific id. 1204 func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) { 1205 txn := s.db.Txn(false) 1206 1207 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1208 if namespace == "" { 1209 namespace = structs.DefaultNamespace 1210 } 1211 1212 watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID) 1213 if err != nil { 1214 return nil, err 1215 } 1216 1217 ws.Add(watchCh) 1218 1219 if existing != nil { 1220 summary := existing.(*structs.JobSummary) 1221 return summary, nil 1222 } 1223 1224 return nil, nil 1225 } 1226 1227 // JobSummaries walks the entire job summary table and returns all the job 1228 // summary objects 1229 func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1230 txn := s.db.Txn(false) 1231 1232 iter, err := txn.Get("job_summary", "id") 1233 if err != nil { 1234 return nil, err 1235 } 1236 1237 ws.Add(iter.WatchCh()) 1238 1239 return iter, nil 1240 } 1241 1242 // JobSummaryByPrefix is used to look up Job Summary by id prefix 1243 func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1244 txn := s.db.Txn(false) 1245 1246 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1247 if namespace == "" { 1248 namespace = structs.DefaultNamespace 1249 } 1250 1251 iter, err := txn.Get("job_summary", "id_prefix", namespace, id) 1252 if err != nil { 1253 return nil, fmt.Errorf("eval lookup failed: %v", err) 1254 } 1255 1256 ws.Add(iter.WatchCh()) 1257 1258 return iter, nil 1259 } 1260 1261 // UpsertPeriodicLaunch is used to register a launch or update it. 1262 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 1263 txn := s.db.Txn(true) 1264 defer txn.Abort() 1265 1266 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1267 if launch.Namespace == "" { 1268 launch.Namespace = structs.DefaultNamespace 1269 } 1270 1271 // Check if the job already exists 1272 existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID) 1273 if err != nil { 1274 return fmt.Errorf("periodic launch lookup failed: %v", err) 1275 } 1276 1277 // Setup the indexes correctly 1278 if existing != nil { 1279 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 1280 launch.ModifyIndex = index 1281 } else { 1282 launch.CreateIndex = index 1283 launch.ModifyIndex = index 1284 } 1285 1286 // Insert the job 1287 if err := txn.Insert("periodic_launch", launch); err != nil { 1288 return fmt.Errorf("launch insert failed: %v", err) 1289 } 1290 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 1291 return fmt.Errorf("index update failed: %v", err) 1292 } 1293 1294 txn.Commit() 1295 return nil 1296 } 1297 1298 // DeletePeriodicLaunch is used to delete the periodic launch 1299 func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error { 1300 txn := s.db.Txn(true) 1301 defer txn.Abort() 1302 1303 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1304 if namespace == "" { 1305 namespace = structs.DefaultNamespace 1306 } 1307 1308 // Lookup the launch 1309 existing, err := txn.First("periodic_launch", "id", namespace, jobID) 1310 if err != nil { 1311 return fmt.Errorf("launch lookup failed: %v", err) 1312 } 1313 if existing == nil { 1314 return fmt.Errorf("launch not found") 1315 } 1316 1317 // Delete the launch 1318 if err := txn.Delete("periodic_launch", existing); err != nil { 1319 return fmt.Errorf("launch delete failed: %v", err) 1320 } 1321 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 1322 return fmt.Errorf("index update failed: %v", err) 1323 } 1324 1325 txn.Commit() 1326 return nil 1327 } 1328 1329 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 1330 // ID. 1331 func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) { 1332 txn := s.db.Txn(false) 1333 1334 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1335 if namespace == "" { 1336 namespace = structs.DefaultNamespace 1337 } 1338 1339 watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id) 1340 if err != nil { 1341 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 1342 } 1343 1344 ws.Add(watchCh) 1345 1346 if existing != nil { 1347 return existing.(*structs.PeriodicLaunch), nil 1348 } 1349 return nil, nil 1350 } 1351 1352 // PeriodicLaunches returns an iterator over all the periodic launches 1353 func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1354 txn := s.db.Txn(false) 1355 1356 // Walk the entire table 1357 iter, err := txn.Get("periodic_launch", "id") 1358 if err != nil { 1359 return nil, err 1360 } 1361 1362 ws.Add(iter.WatchCh()) 1363 1364 return iter, nil 1365 } 1366 1367 // UpsertEvals is used to upsert a set of evaluations 1368 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { 1369 txn := s.db.Txn(true) 1370 defer txn.Abort() 1371 1372 // Do a nested upsert 1373 jobs := make(map[structs.NamespacedID]string, len(evals)) 1374 for _, eval := range evals { 1375 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 1376 return err 1377 } 1378 1379 tuple := structs.NamespacedID{ 1380 ID: eval.JobID, 1381 Namespace: eval.Namespace, 1382 } 1383 jobs[tuple] = "" 1384 } 1385 1386 // Set the job's status 1387 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1388 return fmt.Errorf("setting job status failed: %v", err) 1389 } 1390 1391 txn.Commit() 1392 return nil 1393 } 1394 1395 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 1396 func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { 1397 // Lookup the evaluation 1398 existing, err := txn.First("evals", "id", eval.ID) 1399 if err != nil { 1400 return fmt.Errorf("eval lookup failed: %v", err) 1401 } 1402 1403 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1404 if eval.Namespace == "" { 1405 eval.Namespace = structs.DefaultNamespace 1406 } 1407 1408 // Update the indexes 1409 if existing != nil { 1410 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 1411 eval.ModifyIndex = index 1412 } else { 1413 eval.CreateIndex = index 1414 eval.ModifyIndex = index 1415 } 1416 1417 // Update the job summary 1418 summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID) 1419 if err != nil { 1420 return fmt.Errorf("job summary lookup failed: %v", err) 1421 } 1422 if summaryRaw != nil { 1423 js := summaryRaw.(*structs.JobSummary).Copy() 1424 hasSummaryChanged := false 1425 for tg, num := range eval.QueuedAllocations { 1426 if summary, ok := js.Summary[tg]; ok { 1427 if summary.Queued != num { 1428 summary.Queued = num 1429 js.Summary[tg] = summary 1430 hasSummaryChanged = true 1431 } 1432 } else { 1433 s.logger.Printf("[ERR] state_store: unable to update queued for job %q and task group %q", eval.JobID, tg) 1434 } 1435 } 1436 1437 // Insert the job summary 1438 if hasSummaryChanged { 1439 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1440 if js.Namespace == "" { 1441 js.Namespace = structs.DefaultNamespace 1442 } 1443 1444 js.ModifyIndex = index 1445 if err := txn.Insert("job_summary", js); err != nil { 1446 return fmt.Errorf("job summary insert failed: %v", err) 1447 } 1448 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1449 return fmt.Errorf("index update failed: %v", err) 1450 } 1451 } 1452 } 1453 1454 // Check if the job has any blocked evaluations and cancel them 1455 if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 { 1456 // Get the blocked evaluation for a job if it exists 1457 iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked) 1458 if err != nil { 1459 return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err) 1460 } 1461 1462 var blocked []*structs.Evaluation 1463 for { 1464 raw := iter.Next() 1465 if raw == nil { 1466 break 1467 } 1468 blocked = append(blocked, raw.(*structs.Evaluation)) 1469 } 1470 1471 // Go through and update the evals 1472 for _, eval := range blocked { 1473 newEval := eval.Copy() 1474 newEval.Status = structs.EvalStatusCancelled 1475 newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID) 1476 newEval.ModifyIndex = index 1477 1478 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1479 if newEval.Namespace == "" { 1480 newEval.Namespace = structs.DefaultNamespace 1481 } 1482 1483 if err := txn.Insert("evals", newEval); err != nil { 1484 return fmt.Errorf("eval insert failed: %v", err) 1485 } 1486 } 1487 } 1488 1489 // Insert the eval 1490 if err := txn.Insert("evals", eval); err != nil { 1491 return fmt.Errorf("eval insert failed: %v", err) 1492 } 1493 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 1494 return fmt.Errorf("index update failed: %v", err) 1495 } 1496 return nil 1497 } 1498 1499 // updateEvalModifyIndex is used to update the modify index of an evaluation that has been 1500 // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent 1501 // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply. 1502 func (s *StateStore) updateEvalModifyIndex(txn *memdb.Txn, index uint64, evalID string) error { 1503 // Lookup the evaluation 1504 existing, err := txn.First("evals", "id", evalID) 1505 if err != nil { 1506 return fmt.Errorf("eval lookup failed: %v", err) 1507 } 1508 if existing == nil { 1509 err := fmt.Errorf("unable to find eval id %q", evalID) 1510 s.logger.Printf("[ERR] state_store: %v", err) 1511 return err 1512 } 1513 eval := existing.(*structs.Evaluation).Copy() 1514 // Update the indexes 1515 eval.ModifyIndex = index 1516 1517 // Insert the eval 1518 if err := txn.Insert("evals", eval); err != nil { 1519 return fmt.Errorf("eval insert failed: %v", err) 1520 } 1521 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 1522 return fmt.Errorf("index update failed: %v", err) 1523 } 1524 return nil 1525 } 1526 1527 // DeleteEval is used to delete an evaluation 1528 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 1529 txn := s.db.Txn(true) 1530 defer txn.Abort() 1531 1532 jobs := make(map[structs.NamespacedID]string, len(evals)) 1533 for _, eval := range evals { 1534 existing, err := txn.First("evals", "id", eval) 1535 if err != nil { 1536 return fmt.Errorf("eval lookup failed: %v", err) 1537 } 1538 if existing == nil { 1539 continue 1540 } 1541 if err := txn.Delete("evals", existing); err != nil { 1542 return fmt.Errorf("eval delete failed: %v", err) 1543 } 1544 eval := existing.(*structs.Evaluation) 1545 1546 tuple := structs.NamespacedID{ 1547 ID: eval.JobID, 1548 Namespace: eval.Namespace, 1549 } 1550 jobs[tuple] = "" 1551 } 1552 1553 for _, alloc := range allocs { 1554 raw, err := txn.First("allocs", "id", alloc) 1555 if err != nil { 1556 return fmt.Errorf("alloc lookup failed: %v", err) 1557 } 1558 if raw == nil { 1559 continue 1560 } 1561 if err := txn.Delete("allocs", raw); err != nil { 1562 return fmt.Errorf("alloc delete failed: %v", err) 1563 } 1564 } 1565 1566 // Update the indexes 1567 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 1568 return fmt.Errorf("index update failed: %v", err) 1569 } 1570 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1571 return fmt.Errorf("index update failed: %v", err) 1572 } 1573 1574 // Set the job's status 1575 if err := s.setJobStatuses(index, txn, jobs, true); err != nil { 1576 return fmt.Errorf("setting job status failed: %v", err) 1577 } 1578 1579 txn.Commit() 1580 return nil 1581 } 1582 1583 // EvalByID is used to lookup an eval by its ID 1584 func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) { 1585 txn := s.db.Txn(false) 1586 1587 watchCh, existing, err := txn.FirstWatch("evals", "id", id) 1588 if err != nil { 1589 return nil, fmt.Errorf("eval lookup failed: %v", err) 1590 } 1591 1592 ws.Add(watchCh) 1593 1594 if existing != nil { 1595 return existing.(*structs.Evaluation), nil 1596 } 1597 return nil, nil 1598 } 1599 1600 // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular 1601 // namespace 1602 func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1603 txn := s.db.Txn(false) 1604 1605 // Get an iterator over all evals by the id prefix 1606 iter, err := txn.Get("evals", "id_prefix", id) 1607 if err != nil { 1608 return nil, fmt.Errorf("eval lookup failed: %v", err) 1609 } 1610 1611 ws.Add(iter.WatchCh()) 1612 1613 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1614 if namespace == "" { 1615 namespace = structs.DefaultNamespace 1616 } 1617 1618 // Wrap the iterator in a filter 1619 wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace)) 1620 return wrap, nil 1621 } 1622 1623 // evalNamespaceFilter returns a filter function that filters all evaluations 1624 // not in the given namespace. 1625 func evalNamespaceFilter(namespace string) func(interface{}) bool { 1626 return func(raw interface{}) bool { 1627 eval, ok := raw.(*structs.Evaluation) 1628 if !ok { 1629 return true 1630 } 1631 1632 return eval.Namespace != namespace 1633 } 1634 } 1635 1636 // EvalsByJob returns all the evaluations by job id 1637 func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) { 1638 txn := s.db.Txn(false) 1639 1640 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1641 if namespace == "" { 1642 namespace = structs.DefaultNamespace 1643 } 1644 1645 // Get an iterator over the node allocations 1646 iter, err := txn.Get("evals", "job_prefix", namespace, jobID) 1647 if err != nil { 1648 return nil, err 1649 } 1650 1651 ws.Add(iter.WatchCh()) 1652 1653 var out []*structs.Evaluation 1654 for { 1655 raw := iter.Next() 1656 if raw == nil { 1657 break 1658 } 1659 1660 e := raw.(*structs.Evaluation) 1661 1662 // Filter non-exact matches 1663 if e.JobID != jobID { 1664 continue 1665 } 1666 1667 out = append(out, e) 1668 } 1669 return out, nil 1670 } 1671 1672 // Evals returns an iterator over all the evaluations 1673 func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1674 txn := s.db.Txn(false) 1675 1676 // Walk the entire table 1677 iter, err := txn.Get("evals", "id") 1678 if err != nil { 1679 return nil, err 1680 } 1681 1682 ws.Add(iter.WatchCh()) 1683 1684 return iter, nil 1685 } 1686 1687 // EvalsByNamespace returns an iterator over all the evaluations in the given 1688 // namespace 1689 func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 1690 txn := s.db.Txn(false) 1691 1692 // Walk the entire table 1693 iter, err := txn.Get("evals", "namespace", namespace) 1694 if err != nil { 1695 return nil, err 1696 } 1697 1698 ws.Add(iter.WatchCh()) 1699 1700 return iter, nil 1701 } 1702 1703 // UpdateAllocsFromClient is used to update an allocation based on input 1704 // from a client. While the schedulers are the authority on the allocation for 1705 // most things, some updates are authoritative from the client. Specifically, 1706 // the desired state comes from the schedulers, while the actual state comes 1707 // from clients. 1708 func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error { 1709 txn := s.db.Txn(true) 1710 defer txn.Abort() 1711 1712 // Handle each of the updated allocations 1713 for _, alloc := range allocs { 1714 if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil { 1715 return err 1716 } 1717 } 1718 1719 // Update the indexes 1720 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1721 return fmt.Errorf("index update failed: %v", err) 1722 } 1723 1724 txn.Commit() 1725 return nil 1726 } 1727 1728 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 1729 func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, index uint64, alloc *structs.Allocation) error { 1730 // Look for existing alloc 1731 existing, err := txn.First("allocs", "id", alloc.ID) 1732 if err != nil { 1733 return fmt.Errorf("alloc lookup failed: %v", err) 1734 } 1735 1736 // Nothing to do if this does not exist 1737 if existing == nil { 1738 return nil 1739 } 1740 exist := existing.(*structs.Allocation) 1741 1742 // Copy everything from the existing allocation 1743 copyAlloc := exist.Copy() 1744 1745 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1746 if copyAlloc.Namespace == "" { 1747 copyAlloc.Namespace = structs.DefaultNamespace 1748 } 1749 1750 // Pull in anything the client is the authority on 1751 copyAlloc.ClientStatus = alloc.ClientStatus 1752 copyAlloc.ClientDescription = alloc.ClientDescription 1753 copyAlloc.TaskStates = alloc.TaskStates 1754 copyAlloc.DeploymentStatus = alloc.DeploymentStatus 1755 1756 // Update the modify index 1757 copyAlloc.ModifyIndex = index 1758 1759 // Update the modify time 1760 copyAlloc.ModifyTime = alloc.ModifyTime 1761 1762 if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil { 1763 return fmt.Errorf("error updating deployment: %v", err) 1764 } 1765 1766 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil { 1767 return fmt.Errorf("error updating job summary: %v", err) 1768 } 1769 1770 if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil { 1771 return err 1772 } 1773 1774 // Update the allocation 1775 if err := txn.Insert("allocs", copyAlloc); err != nil { 1776 return fmt.Errorf("alloc insert failed: %v", err) 1777 } 1778 1779 // Set the job's status 1780 forceStatus := "" 1781 if !copyAlloc.TerminalStatus() { 1782 forceStatus = structs.JobStatusRunning 1783 } 1784 1785 tuple := structs.NamespacedID{ 1786 ID: exist.JobID, 1787 Namespace: exist.Namespace, 1788 } 1789 jobs := map[structs.NamespacedID]string{tuple: forceStatus} 1790 1791 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1792 return fmt.Errorf("setting job status failed: %v", err) 1793 } 1794 return nil 1795 } 1796 1797 // UpsertAllocs is used to evict a set of allocations and allocate new ones at 1798 // the same time. 1799 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { 1800 txn := s.db.Txn(true) 1801 defer txn.Abort() 1802 if err := s.upsertAllocsImpl(index, allocs, txn); err != nil { 1803 return err 1804 } 1805 txn.Commit() 1806 return nil 1807 } 1808 1809 // upsertAllocs is the actual implementation of UpsertAllocs so that it may be 1810 // used with an existing transaction. 1811 func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *memdb.Txn) error { 1812 // Handle the allocations 1813 jobs := make(map[structs.NamespacedID]string, 1) 1814 for _, alloc := range allocs { 1815 existing, err := txn.First("allocs", "id", alloc.ID) 1816 if err != nil { 1817 return fmt.Errorf("alloc lookup failed: %v", err) 1818 } 1819 exist, _ := existing.(*structs.Allocation) 1820 1821 if exist == nil { 1822 alloc.CreateIndex = index 1823 alloc.ModifyIndex = index 1824 alloc.AllocModifyIndex = index 1825 1826 // Issue https://github.com/hashicorp/nomad/issues/2583 uncovered 1827 // the a race between a forced garbage collection and the scheduler 1828 // marking an allocation as terminal. The issue is that the 1829 // allocation from the scheduler has its job normalized and the FSM 1830 // will only denormalize if the allocation is not terminal. However 1831 // if the allocation is garbage collected, that will result in a 1832 // allocation being upserted for the first time without a job 1833 // attached. By returning an error here, it will cause the FSM to 1834 // error, causing the plan_apply to error and thus causing the 1835 // evaluation to be failed. This will force an index refresh that 1836 // should solve this issue. 1837 if alloc.Job == nil { 1838 return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID) 1839 } 1840 } else { 1841 alloc.CreateIndex = exist.CreateIndex 1842 alloc.ModifyIndex = index 1843 alloc.AllocModifyIndex = index 1844 1845 // Keep the clients task states 1846 alloc.TaskStates = exist.TaskStates 1847 1848 // If the scheduler is marking this allocation as lost we do not 1849 // want to reuse the status of the existing allocation. 1850 if alloc.ClientStatus != structs.AllocClientStatusLost { 1851 alloc.ClientStatus = exist.ClientStatus 1852 alloc.ClientDescription = exist.ClientDescription 1853 } 1854 1855 // The job has been denormalized so re-attach the original job 1856 if alloc.Job == nil { 1857 alloc.Job = exist.Job 1858 } 1859 } 1860 1861 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1862 if alloc.Namespace == "" { 1863 alloc.Namespace = structs.DefaultNamespace 1864 } 1865 1866 // OPTIMIZATION: 1867 // These should be given a map of new to old allocation and the updates 1868 // should be one on all changes. The current implementation causes O(n) 1869 // lookups/copies/insertions rather than O(1) 1870 if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil { 1871 return fmt.Errorf("error updating deployment: %v", err) 1872 } 1873 1874 if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil { 1875 return fmt.Errorf("error updating job summary: %v", err) 1876 } 1877 1878 if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil { 1879 return err 1880 } 1881 1882 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 1883 // COMPAT 0.4.1 -> 0.5 1884 if alloc.Job != nil { 1885 s.addEphemeralDiskToTaskGroups(alloc.Job) 1886 } 1887 1888 if err := txn.Insert("allocs", alloc); err != nil { 1889 return fmt.Errorf("alloc insert failed: %v", err) 1890 } 1891 1892 // If the allocation is running, force the job to running status. 1893 forceStatus := "" 1894 if !alloc.TerminalStatus() { 1895 forceStatus = structs.JobStatusRunning 1896 } 1897 1898 tuple := structs.NamespacedID{ 1899 ID: alloc.JobID, 1900 Namespace: alloc.Namespace, 1901 } 1902 jobs[tuple] = forceStatus 1903 } 1904 1905 // Update the indexes 1906 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1907 return fmt.Errorf("index update failed: %v", err) 1908 } 1909 1910 // Set the job's status 1911 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1912 return fmt.Errorf("setting job status failed: %v", err) 1913 } 1914 1915 return nil 1916 } 1917 1918 // AllocByID is used to lookup an allocation by its ID 1919 func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) { 1920 txn := s.db.Txn(false) 1921 1922 watchCh, existing, err := txn.FirstWatch("allocs", "id", id) 1923 if err != nil { 1924 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1925 } 1926 1927 ws.Add(watchCh) 1928 1929 if existing != nil { 1930 return existing.(*structs.Allocation), nil 1931 } 1932 return nil, nil 1933 } 1934 1935 // AllocsByIDPrefix is used to lookup allocs by prefix 1936 func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1937 txn := s.db.Txn(false) 1938 1939 iter, err := txn.Get("allocs", "id_prefix", id) 1940 if err != nil { 1941 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1942 } 1943 1944 ws.Add(iter.WatchCh()) 1945 1946 // Wrap the iterator in a filter 1947 wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace)) 1948 return wrap, nil 1949 } 1950 1951 // allocNamespaceFilter returns a filter function that filters all allocations 1952 // not in the given namespace. 1953 func allocNamespaceFilter(namespace string) func(interface{}) bool { 1954 return func(raw interface{}) bool { 1955 alloc, ok := raw.(*structs.Allocation) 1956 if !ok { 1957 return true 1958 } 1959 1960 return alloc.Namespace != namespace 1961 } 1962 } 1963 1964 // AllocsByNode returns all the allocations by node 1965 func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 1966 txn := s.db.Txn(false) 1967 1968 // Get an iterator over the node allocations, using only the 1969 // node prefix which ignores the terminal status 1970 iter, err := txn.Get("allocs", "node_prefix", node) 1971 if err != nil { 1972 return nil, err 1973 } 1974 1975 ws.Add(iter.WatchCh()) 1976 1977 var out []*structs.Allocation 1978 for { 1979 raw := iter.Next() 1980 if raw == nil { 1981 break 1982 } 1983 out = append(out, raw.(*structs.Allocation)) 1984 } 1985 return out, nil 1986 } 1987 1988 // AllocsByNode returns all the allocations by node and terminal status 1989 func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) { 1990 txn := s.db.Txn(false) 1991 1992 // Get an iterator over the node allocations 1993 iter, err := txn.Get("allocs", "node", node, terminal) 1994 if err != nil { 1995 return nil, err 1996 } 1997 1998 ws.Add(iter.WatchCh()) 1999 2000 var out []*structs.Allocation 2001 for { 2002 raw := iter.Next() 2003 if raw == nil { 2004 break 2005 } 2006 out = append(out, raw.(*structs.Allocation)) 2007 } 2008 return out, nil 2009 } 2010 2011 // AllocsByJob returns all the allocations by job id 2012 func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Allocation, error) { 2013 txn := s.db.Txn(false) 2014 2015 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2016 if namespace == "" { 2017 namespace = structs.DefaultNamespace 2018 } 2019 2020 // Get the job 2021 var job *structs.Job 2022 rawJob, err := txn.First("jobs", "id", namespace, jobID) 2023 if err != nil { 2024 return nil, err 2025 } 2026 if rawJob != nil { 2027 job = rawJob.(*structs.Job) 2028 } 2029 2030 // Get an iterator over the node allocations 2031 iter, err := txn.Get("allocs", "job", namespace, jobID) 2032 if err != nil { 2033 return nil, err 2034 } 2035 2036 ws.Add(iter.WatchCh()) 2037 2038 var out []*structs.Allocation 2039 for { 2040 raw := iter.Next() 2041 if raw == nil { 2042 break 2043 } 2044 2045 alloc := raw.(*structs.Allocation) 2046 // If the allocation belongs to a job with the same ID but a different 2047 // create index and we are not getting all the allocations whose Jobs 2048 // matches the same Job ID then we skip it 2049 if !all && job != nil && alloc.Job.CreateIndex != job.CreateIndex { 2050 continue 2051 } 2052 out = append(out, raw.(*structs.Allocation)) 2053 } 2054 return out, nil 2055 } 2056 2057 // AllocsByEval returns all the allocations by eval id 2058 func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) { 2059 txn := s.db.Txn(false) 2060 2061 // Get an iterator over the eval allocations 2062 iter, err := txn.Get("allocs", "eval", evalID) 2063 if err != nil { 2064 return nil, err 2065 } 2066 2067 ws.Add(iter.WatchCh()) 2068 2069 var out []*structs.Allocation 2070 for { 2071 raw := iter.Next() 2072 if raw == nil { 2073 break 2074 } 2075 out = append(out, raw.(*structs.Allocation)) 2076 } 2077 return out, nil 2078 } 2079 2080 // AllocsByDeployment returns all the allocations by deployment id 2081 func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) { 2082 txn := s.db.Txn(false) 2083 2084 // Get an iterator over the deployments allocations 2085 iter, err := txn.Get("allocs", "deployment", deploymentID) 2086 if err != nil { 2087 return nil, err 2088 } 2089 2090 ws.Add(iter.WatchCh()) 2091 2092 var out []*structs.Allocation 2093 for { 2094 raw := iter.Next() 2095 if raw == nil { 2096 break 2097 } 2098 out = append(out, raw.(*structs.Allocation)) 2099 } 2100 return out, nil 2101 } 2102 2103 // Allocs returns an iterator over all the evaluations 2104 func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2105 txn := s.db.Txn(false) 2106 2107 // Walk the entire table 2108 iter, err := txn.Get("allocs", "id") 2109 if err != nil { 2110 return nil, err 2111 } 2112 2113 ws.Add(iter.WatchCh()) 2114 2115 return iter, nil 2116 } 2117 2118 // AllocsByNamespace returns an iterator over all the allocations in the 2119 // namespace 2120 func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 2121 txn := s.db.Txn(false) 2122 return s.allocsByNamespaceImpl(ws, txn, namespace) 2123 } 2124 2125 // allocsByNamespaceImpl returns an iterator over all the allocations in the 2126 // namespace 2127 func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *memdb.Txn, namespace string) (memdb.ResultIterator, error) { 2128 // Walk the entire table 2129 iter, err := txn.Get("allocs", "namespace", namespace) 2130 if err != nil { 2131 return nil, err 2132 } 2133 2134 ws.Add(iter.WatchCh()) 2135 2136 return iter, nil 2137 } 2138 2139 // UpsertVaultAccessors is used to register a set of Vault Accessors 2140 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 2141 txn := s.db.Txn(true) 2142 defer txn.Abort() 2143 2144 for _, accessor := range accessors { 2145 // Set the create index 2146 accessor.CreateIndex = index 2147 2148 // Insert the accessor 2149 if err := txn.Insert("vault_accessors", accessor); err != nil { 2150 return fmt.Errorf("accessor insert failed: %v", err) 2151 } 2152 } 2153 2154 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 2155 return fmt.Errorf("index update failed: %v", err) 2156 } 2157 2158 txn.Commit() 2159 return nil 2160 } 2161 2162 // DeleteVaultAccessors is used to delete a set of Vault Accessors 2163 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 2164 txn := s.db.Txn(true) 2165 defer txn.Abort() 2166 2167 // Lookup the accessor 2168 for _, accessor := range accessors { 2169 // Delete the accessor 2170 if err := txn.Delete("vault_accessors", accessor); err != nil { 2171 return fmt.Errorf("accessor delete failed: %v", err) 2172 } 2173 } 2174 2175 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 2176 return fmt.Errorf("index update failed: %v", err) 2177 } 2178 2179 txn.Commit() 2180 return nil 2181 } 2182 2183 // VaultAccessor returns the given Vault accessor 2184 func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) { 2185 txn := s.db.Txn(false) 2186 2187 watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor) 2188 if err != nil { 2189 return nil, fmt.Errorf("accessor lookup failed: %v", err) 2190 } 2191 2192 ws.Add(watchCh) 2193 2194 if existing != nil { 2195 return existing.(*structs.VaultAccessor), nil 2196 } 2197 2198 return nil, nil 2199 } 2200 2201 // VaultAccessors returns an iterator of Vault accessors. 2202 func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2203 txn := s.db.Txn(false) 2204 2205 iter, err := txn.Get("vault_accessors", "id") 2206 if err != nil { 2207 return nil, err 2208 } 2209 2210 ws.Add(iter.WatchCh()) 2211 2212 return iter, nil 2213 } 2214 2215 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 2216 func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) { 2217 txn := s.db.Txn(false) 2218 2219 // Get an iterator over the accessors 2220 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 2221 if err != nil { 2222 return nil, err 2223 } 2224 2225 ws.Add(iter.WatchCh()) 2226 2227 var out []*structs.VaultAccessor 2228 for { 2229 raw := iter.Next() 2230 if raw == nil { 2231 break 2232 } 2233 out = append(out, raw.(*structs.VaultAccessor)) 2234 } 2235 return out, nil 2236 } 2237 2238 // VaultAccessorsByNode returns all the Vault accessors by node id 2239 func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) { 2240 txn := s.db.Txn(false) 2241 2242 // Get an iterator over the accessors 2243 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 2244 if err != nil { 2245 return nil, err 2246 } 2247 2248 ws.Add(iter.WatchCh()) 2249 2250 var out []*structs.VaultAccessor 2251 for { 2252 raw := iter.Next() 2253 if raw == nil { 2254 break 2255 } 2256 out = append(out, raw.(*structs.VaultAccessor)) 2257 } 2258 return out, nil 2259 } 2260 2261 // UpdateDeploymentStatus is used to make deployment status updates and 2262 // potentially make a evaluation 2263 func (s *StateStore) UpdateDeploymentStatus(index uint64, req *structs.DeploymentStatusUpdateRequest) error { 2264 txn := s.db.Txn(true) 2265 defer txn.Abort() 2266 2267 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 2268 return err 2269 } 2270 2271 // Upsert the job if necessary 2272 if req.Job != nil { 2273 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 2274 return err 2275 } 2276 } 2277 2278 // Upsert the optional eval 2279 if req.Eval != nil { 2280 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 2281 return err 2282 } 2283 } 2284 2285 txn.Commit() 2286 return nil 2287 } 2288 2289 // updateDeploymentStatusImpl is used to make deployment status updates 2290 func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *memdb.Txn) error { 2291 // Retrieve deployment 2292 ws := memdb.NewWatchSet() 2293 deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn) 2294 if err != nil { 2295 return err 2296 } else if deployment == nil { 2297 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID) 2298 } else if !deployment.Active() { 2299 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 2300 } 2301 2302 // Apply the new status 2303 copy := deployment.Copy() 2304 copy.Status = u.Status 2305 copy.StatusDescription = u.StatusDescription 2306 copy.ModifyIndex = index 2307 2308 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2309 if copy.Namespace == "" { 2310 copy.Namespace = structs.DefaultNamespace 2311 } 2312 2313 // Insert the deployment 2314 if err := txn.Insert("deployment", copy); err != nil { 2315 return err 2316 } 2317 2318 // Update the index 2319 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 2320 return fmt.Errorf("index update failed: %v", err) 2321 } 2322 2323 // If the deployment is being marked as complete, set the job to stable. 2324 if copy.Status == structs.DeploymentStatusSuccessful { 2325 if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil { 2326 return fmt.Errorf("failed to update job stability: %v", err) 2327 } 2328 } 2329 2330 return nil 2331 } 2332 2333 // UpdateJobStability updates the stability of the given job and version to the 2334 // desired status. 2335 func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error { 2336 txn := s.db.Txn(true) 2337 defer txn.Abort() 2338 2339 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2340 if namespace == "" { 2341 namespace = structs.DefaultNamespace 2342 } 2343 2344 if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil { 2345 return err 2346 } 2347 2348 txn.Commit() 2349 return nil 2350 } 2351 2352 // updateJobStabilityImpl updates the stability of the given job and version 2353 func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *memdb.Txn) error { 2354 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2355 if namespace == "" { 2356 namespace = structs.DefaultNamespace 2357 } 2358 2359 // Get the job that is referenced 2360 job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn) 2361 if err != nil { 2362 return err 2363 } 2364 2365 // Has already been cleared, nothing to do 2366 if job == nil { 2367 return nil 2368 } 2369 2370 // If the job already has the desired stability, nothing to do 2371 if job.Stable == stable { 2372 return nil 2373 } 2374 2375 copy := job.Copy() 2376 copy.Stable = stable 2377 return s.upsertJobImpl(index, copy, true, txn) 2378 } 2379 2380 // UpdateDeploymentPromotion is used to promote canaries in a deployment and 2381 // potentially make a evaluation 2382 func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyDeploymentPromoteRequest) error { 2383 txn := s.db.Txn(true) 2384 defer txn.Abort() 2385 2386 // Retrieve deployment and ensure it is not terminal and is active 2387 ws := memdb.NewWatchSet() 2388 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 2389 if err != nil { 2390 return err 2391 } else if deployment == nil { 2392 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 2393 } else if !deployment.Active() { 2394 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 2395 } 2396 2397 // Retrieve effected allocations 2398 iter, err := txn.Get("allocs", "deployment", req.DeploymentID) 2399 if err != nil { 2400 return err 2401 } 2402 2403 groupIndex := make(map[string]struct{}, len(req.Groups)) 2404 for _, g := range req.Groups { 2405 groupIndex[g] = struct{}{} 2406 } 2407 2408 canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups)) 2409 for _, state := range deployment.TaskGroups { 2410 for _, c := range state.PlacedCanaries { 2411 canaryIndex[c] = struct{}{} 2412 } 2413 } 2414 2415 haveCanaries := false 2416 var unhealthyErr multierror.Error 2417 for { 2418 raw := iter.Next() 2419 if raw == nil { 2420 break 2421 } 2422 2423 alloc := raw.(*structs.Allocation) 2424 2425 // Check that the alloc is a canary 2426 if _, ok := canaryIndex[alloc.ID]; !ok { 2427 continue 2428 } 2429 2430 // Check that the canary is part of a group being promoted 2431 if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok { 2432 continue 2433 } 2434 2435 // Ensure the canaries are healthy 2436 if !alloc.DeploymentStatus.IsHealthy() { 2437 multierror.Append(&unhealthyErr, fmt.Errorf("Canary allocation %q for group %q is not healthy", alloc.ID, alloc.TaskGroup)) 2438 continue 2439 } 2440 2441 haveCanaries = true 2442 } 2443 2444 if err := unhealthyErr.ErrorOrNil(); err != nil { 2445 return err 2446 } 2447 2448 if !haveCanaries { 2449 return fmt.Errorf("no canaries to promote") 2450 } 2451 2452 // Update deployment 2453 copy := deployment.Copy() 2454 copy.ModifyIndex = index 2455 for tg, status := range copy.TaskGroups { 2456 _, ok := groupIndex[tg] 2457 if !req.All && !ok { 2458 continue 2459 } 2460 2461 status.Promoted = true 2462 } 2463 2464 // If the deployment no longer needs promotion, update its status 2465 if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning { 2466 copy.StatusDescription = structs.DeploymentStatusDescriptionRunning 2467 } 2468 2469 // Insert the deployment 2470 if err := s.upsertDeploymentImpl(index, copy, txn); err != nil { 2471 return err 2472 } 2473 2474 // Upsert the optional eval 2475 if req.Eval != nil { 2476 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 2477 return err 2478 } 2479 } 2480 2481 txn.Commit() 2482 return nil 2483 } 2484 2485 // UpdateDeploymentAllocHealth is used to update the health of allocations as 2486 // part of the deployment and potentially make a evaluation 2487 func (s *StateStore) UpdateDeploymentAllocHealth(index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error { 2488 txn := s.db.Txn(true) 2489 defer txn.Abort() 2490 2491 // Retrieve deployment and ensure it is not terminal and is active 2492 ws := memdb.NewWatchSet() 2493 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 2494 if err != nil { 2495 return err 2496 } else if deployment == nil { 2497 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 2498 } else if !deployment.Active() { 2499 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 2500 } 2501 2502 // Update the health status of each allocation 2503 if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 { 2504 setAllocHealth := func(id string, healthy bool) error { 2505 existing, err := txn.First("allocs", "id", id) 2506 if err != nil { 2507 return fmt.Errorf("alloc %q lookup failed: %v", id, err) 2508 } 2509 if existing == nil { 2510 return fmt.Errorf("unknown alloc %q", id) 2511 } 2512 2513 old := existing.(*structs.Allocation) 2514 if old.DeploymentID != req.DeploymentID { 2515 return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID) 2516 } 2517 2518 // Set the health 2519 copy := old.Copy() 2520 if copy.DeploymentStatus == nil { 2521 copy.DeploymentStatus = &structs.AllocDeploymentStatus{} 2522 } 2523 copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy) 2524 copy.DeploymentStatus.ModifyIndex = index 2525 2526 if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil { 2527 return fmt.Errorf("error updating deployment: %v", err) 2528 } 2529 2530 if err := txn.Insert("allocs", copy); err != nil { 2531 return fmt.Errorf("alloc insert failed: %v", err) 2532 } 2533 2534 return nil 2535 } 2536 2537 for _, id := range req.HealthyAllocationIDs { 2538 if err := setAllocHealth(id, true); err != nil { 2539 return err 2540 } 2541 } 2542 for _, id := range req.UnhealthyAllocationIDs { 2543 if err := setAllocHealth(id, false); err != nil { 2544 return err 2545 } 2546 } 2547 2548 // Update the indexes 2549 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 2550 return fmt.Errorf("index update failed: %v", err) 2551 } 2552 } 2553 2554 // Update the deployment status as needed. 2555 if req.DeploymentUpdate != nil { 2556 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 2557 return err 2558 } 2559 } 2560 2561 // Upsert the job if necessary 2562 if req.Job != nil { 2563 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 2564 return err 2565 } 2566 } 2567 2568 // Upsert the optional eval 2569 if req.Eval != nil { 2570 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 2571 return err 2572 } 2573 } 2574 2575 txn.Commit() 2576 return nil 2577 } 2578 2579 // LastIndex returns the greatest index value for all indexes 2580 func (s *StateStore) LatestIndex() (uint64, error) { 2581 indexes, err := s.Indexes() 2582 if err != nil { 2583 return 0, err 2584 } 2585 2586 var max uint64 = 0 2587 for { 2588 raw := indexes.Next() 2589 if raw == nil { 2590 break 2591 } 2592 2593 // Prepare the request struct 2594 idx := raw.(*IndexEntry) 2595 2596 // Determine the max 2597 if idx.Value > max { 2598 max = idx.Value 2599 } 2600 } 2601 2602 return max, nil 2603 } 2604 2605 // Index finds the matching index value 2606 func (s *StateStore) Index(name string) (uint64, error) { 2607 txn := s.db.Txn(false) 2608 2609 // Lookup the first matching index 2610 out, err := txn.First("index", "id", name) 2611 if err != nil { 2612 return 0, err 2613 } 2614 if out == nil { 2615 return 0, nil 2616 } 2617 return out.(*IndexEntry).Value, nil 2618 } 2619 2620 // RemoveIndex is a helper method to remove an index for testing purposes 2621 func (s *StateStore) RemoveIndex(name string) error { 2622 txn := s.db.Txn(true) 2623 defer txn.Abort() 2624 2625 if _, err := txn.DeleteAll("index", "id", name); err != nil { 2626 return err 2627 } 2628 2629 txn.Commit() 2630 return nil 2631 } 2632 2633 // Indexes returns an iterator over all the indexes 2634 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 2635 txn := s.db.Txn(false) 2636 2637 // Walk the entire nodes table 2638 iter, err := txn.Get("index", "id") 2639 if err != nil { 2640 return nil, err 2641 } 2642 return iter, nil 2643 } 2644 2645 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 2646 // store 2647 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 2648 txn := s.db.Txn(true) 2649 defer txn.Abort() 2650 2651 // Get all the jobs 2652 iter, err := txn.Get("jobs", "id") 2653 if err != nil { 2654 return err 2655 } 2656 for { 2657 rawJob := iter.Next() 2658 if rawJob == nil { 2659 break 2660 } 2661 job := rawJob.(*structs.Job) 2662 2663 // Create a job summary for the job 2664 summary := &structs.JobSummary{ 2665 JobID: job.ID, 2666 Namespace: job.Namespace, 2667 Summary: make(map[string]structs.TaskGroupSummary), 2668 } 2669 for _, tg := range job.TaskGroups { 2670 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 2671 } 2672 2673 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2674 if job.Namespace == "" { 2675 job.Namespace = structs.DefaultNamespace 2676 } 2677 2678 // Find all the allocations for the jobs 2679 iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 2680 if err != nil { 2681 return err 2682 } 2683 2684 // Calculate the summary for the job 2685 for { 2686 rawAlloc := iterAllocs.Next() 2687 if rawAlloc == nil { 2688 break 2689 } 2690 alloc := rawAlloc.(*structs.Allocation) 2691 2692 // Ignore the allocation if it doesn't belong to the currently 2693 // registered job. The allocation is checked because of issue #2304 2694 if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex { 2695 continue 2696 } 2697 2698 tg := summary.Summary[alloc.TaskGroup] 2699 switch alloc.ClientStatus { 2700 case structs.AllocClientStatusFailed: 2701 tg.Failed += 1 2702 case structs.AllocClientStatusLost: 2703 tg.Lost += 1 2704 case structs.AllocClientStatusComplete: 2705 tg.Complete += 1 2706 case structs.AllocClientStatusRunning: 2707 tg.Running += 1 2708 case structs.AllocClientStatusPending: 2709 tg.Starting += 1 2710 default: 2711 s.logger.Printf("[ERR] state_store: invalid client status: %v in allocation %q", alloc.ClientStatus, alloc.ID) 2712 } 2713 summary.Summary[alloc.TaskGroup] = tg 2714 } 2715 2716 // Set the create index of the summary same as the job's create index 2717 // and the modify index to the current index 2718 summary.CreateIndex = job.CreateIndex 2719 summary.ModifyIndex = index 2720 2721 // Insert the job summary 2722 if err := txn.Insert("job_summary", summary); err != nil { 2723 return fmt.Errorf("error inserting job summary: %v", err) 2724 } 2725 } 2726 2727 // Update the indexes table for job summary 2728 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2729 return fmt.Errorf("index update failed: %v", err) 2730 } 2731 txn.Commit() 2732 return nil 2733 } 2734 2735 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 2736 // It takes a map of job IDs to an optional forceStatus string. It returns an 2737 // error if the job doesn't exist or setJobStatus fails. 2738 func (s *StateStore) setJobStatuses(index uint64, txn *memdb.Txn, 2739 jobs map[structs.NamespacedID]string, evalDelete bool) error { 2740 for tuple, forceStatus := range jobs { 2741 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2742 if tuple.Namespace == "" { 2743 tuple.Namespace = structs.DefaultNamespace 2744 } 2745 2746 existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID) 2747 if err != nil { 2748 return fmt.Errorf("job lookup failed: %v", err) 2749 } 2750 2751 if existing == nil { 2752 continue 2753 } 2754 2755 if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 2756 return err 2757 } 2758 } 2759 2760 return nil 2761 } 2762 2763 // setJobStatus sets the status of the job by looking up associated evaluations 2764 // and allocations. evalDelete should be set to true if setJobStatus is being 2765 // called because an evaluation is being deleted (potentially because of garbage 2766 // collection). If forceStatus is non-empty, the job's status will be set to the 2767 // passed status. 2768 func (s *StateStore) setJobStatus(index uint64, txn *memdb.Txn, 2769 job *structs.Job, evalDelete bool, forceStatus string) error { 2770 2771 // Capture the current status so we can check if there is a change 2772 oldStatus := job.Status 2773 if index == job.CreateIndex { 2774 oldStatus = "" 2775 } 2776 newStatus := forceStatus 2777 2778 // If forceStatus is not set, compute the jobs status. 2779 if forceStatus == "" { 2780 var err error 2781 newStatus, err = s.getJobStatus(txn, job, evalDelete) 2782 if err != nil { 2783 return err 2784 } 2785 } 2786 2787 // Fast-path if nothing has changed. 2788 if oldStatus == newStatus { 2789 return nil 2790 } 2791 2792 // Copy and update the existing job 2793 updated := job.Copy() 2794 updated.Status = newStatus 2795 updated.ModifyIndex = index 2796 2797 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2798 if updated.Namespace == "" { 2799 updated.Namespace = structs.DefaultNamespace 2800 } 2801 2802 // Insert the job 2803 if err := txn.Insert("jobs", updated); err != nil { 2804 return fmt.Errorf("job insert failed: %v", err) 2805 } 2806 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 2807 return fmt.Errorf("index update failed: %v", err) 2808 } 2809 2810 // Update the children summary 2811 if updated.ParentID != "" { 2812 // Try to update the summary of the parent job summary 2813 summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID) 2814 if err != nil { 2815 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 2816 } 2817 2818 // Only continue if the summary exists. It could not exist if the parent 2819 // job was removed 2820 if summaryRaw != nil { 2821 existing := summaryRaw.(*structs.JobSummary) 2822 pSummary := existing.Copy() 2823 if pSummary.Children == nil { 2824 pSummary.Children = new(structs.JobChildrenSummary) 2825 } 2826 2827 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2828 if pSummary.Namespace == "" { 2829 pSummary.Namespace = structs.DefaultNamespace 2830 } 2831 2832 // Determine the transition and update the correct fields 2833 children := pSummary.Children 2834 2835 // Decrement old status 2836 if oldStatus != "" { 2837 switch oldStatus { 2838 case structs.JobStatusPending: 2839 children.Pending-- 2840 case structs.JobStatusRunning: 2841 children.Running-- 2842 case structs.JobStatusDead: 2843 children.Dead-- 2844 default: 2845 return fmt.Errorf("unknown old job status %q", oldStatus) 2846 } 2847 } 2848 2849 // Increment new status 2850 switch newStatus { 2851 case structs.JobStatusPending: 2852 children.Pending++ 2853 case structs.JobStatusRunning: 2854 children.Running++ 2855 case structs.JobStatusDead: 2856 children.Dead++ 2857 default: 2858 return fmt.Errorf("unknown new job status %q", newStatus) 2859 } 2860 2861 // Update the index 2862 pSummary.ModifyIndex = index 2863 2864 // Insert the summary 2865 if err := txn.Insert("job_summary", pSummary); err != nil { 2866 return fmt.Errorf("job summary insert failed: %v", err) 2867 } 2868 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2869 return fmt.Errorf("index update failed: %v", err) 2870 } 2871 } 2872 } 2873 2874 return nil 2875 } 2876 2877 func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) { 2878 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2879 if job.Namespace == "" { 2880 job.Namespace = structs.DefaultNamespace 2881 } 2882 2883 // System, Periodic and Parameterized jobs are running until explicitly 2884 // stopped 2885 if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() { 2886 if job.Stop { 2887 return structs.JobStatusDead, nil 2888 } 2889 2890 return structs.JobStatusRunning, nil 2891 } 2892 2893 allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 2894 if err != nil { 2895 return "", err 2896 } 2897 2898 // If there is a non-terminal allocation, the job is running. 2899 hasAlloc := false 2900 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 2901 hasAlloc = true 2902 if !alloc.(*structs.Allocation).TerminalStatus() { 2903 return structs.JobStatusRunning, nil 2904 } 2905 } 2906 2907 evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID) 2908 if err != nil { 2909 return "", err 2910 } 2911 2912 hasEval := false 2913 for raw := evals.Next(); raw != nil; raw = evals.Next() { 2914 e := raw.(*structs.Evaluation) 2915 2916 // Filter non-exact matches 2917 if e.JobID != job.ID { 2918 continue 2919 } 2920 2921 hasEval = true 2922 if !e.TerminalStatus() { 2923 return structs.JobStatusPending, nil 2924 } 2925 } 2926 2927 // The job is dead if all the allocations and evals are terminal or if there 2928 // are no evals because of garbage collection. 2929 if evalDelete || hasEval || hasAlloc { 2930 return structs.JobStatusDead, nil 2931 } 2932 2933 return structs.JobStatusPending, nil 2934 } 2935 2936 // updateSummaryWithJob creates or updates job summaries when new jobs are 2937 // upserted or existing ones are updated 2938 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 2939 txn *memdb.Txn) error { 2940 2941 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2942 if job.Namespace == "" { 2943 job.Namespace = structs.DefaultNamespace 2944 } 2945 2946 // Update the job summary 2947 summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID) 2948 if err != nil { 2949 return fmt.Errorf("job summary lookup failed: %v", err) 2950 } 2951 2952 // Get the summary or create if necessary 2953 var summary *structs.JobSummary 2954 hasSummaryChanged := false 2955 if summaryRaw != nil { 2956 summary = summaryRaw.(*structs.JobSummary).Copy() 2957 } else { 2958 summary = &structs.JobSummary{ 2959 JobID: job.ID, 2960 Namespace: job.Namespace, 2961 Summary: make(map[string]structs.TaskGroupSummary), 2962 Children: new(structs.JobChildrenSummary), 2963 CreateIndex: index, 2964 } 2965 hasSummaryChanged = true 2966 } 2967 2968 for _, tg := range job.TaskGroups { 2969 if _, ok := summary.Summary[tg.Name]; !ok { 2970 newSummary := structs.TaskGroupSummary{ 2971 Complete: 0, 2972 Failed: 0, 2973 Running: 0, 2974 Starting: 0, 2975 } 2976 summary.Summary[tg.Name] = newSummary 2977 hasSummaryChanged = true 2978 } 2979 } 2980 2981 // The job summary has changed, so update the modify index. 2982 if hasSummaryChanged { 2983 summary.ModifyIndex = index 2984 2985 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2986 if summary.Namespace == "" { 2987 summary.Namespace = structs.DefaultNamespace 2988 } 2989 2990 // Update the indexes table for job summary 2991 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2992 return fmt.Errorf("index update failed: %v", err) 2993 } 2994 if err := txn.Insert("job_summary", summary); err != nil { 2995 return err 2996 } 2997 } 2998 2999 return nil 3000 } 3001 3002 // updateDeploymentWithAlloc is used to update the deployment state associated 3003 // with the given allocation. The passed alloc may be updated if the deployment 3004 // status has changed to capture the modify index at which it has changed. 3005 func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *memdb.Txn) error { 3006 // Nothing to do if the allocation is not associated with a deployment 3007 if alloc.DeploymentID == "" { 3008 return nil 3009 } 3010 3011 // Get the deployment 3012 ws := memdb.NewWatchSet() 3013 deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn) 3014 if err != nil { 3015 return err 3016 } 3017 if deployment == nil { 3018 return nil 3019 } 3020 3021 // Retrieve the deployment state object 3022 _, ok := deployment.TaskGroups[alloc.TaskGroup] 3023 if !ok { 3024 // If the task group isn't part of the deployment, the task group wasn't 3025 // part of a rolling update so nothing to do 3026 return nil 3027 } 3028 3029 // Do not modify in-place. Instead keep track of what must be done 3030 placed := 0 3031 healthy := 0 3032 unhealthy := 0 3033 3034 // If there was no existing allocation, this is a placement and we increment 3035 // the placement 3036 existingHealthSet := existing != nil && existing.DeploymentStatus != nil && existing.DeploymentStatus.Healthy != nil 3037 allocHealthSet := alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil 3038 if existing == nil || existing.DeploymentID != alloc.DeploymentID { 3039 placed++ 3040 } else if !existingHealthSet && allocHealthSet { 3041 if *alloc.DeploymentStatus.Healthy { 3042 healthy++ 3043 } else { 3044 unhealthy++ 3045 } 3046 } else if existingHealthSet && allocHealthSet { 3047 // See if it has gone from healthy to unhealthy 3048 if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy { 3049 healthy-- 3050 unhealthy++ 3051 } 3052 } 3053 3054 // Nothing to do 3055 if placed == 0 && healthy == 0 && unhealthy == 0 { 3056 return nil 3057 } 3058 3059 // Update the allocation's deployment status modify index 3060 if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 { 3061 alloc.DeploymentStatus.ModifyIndex = index 3062 } 3063 3064 // Create a copy of the deployment object 3065 deploymentCopy := deployment.Copy() 3066 deploymentCopy.ModifyIndex = index 3067 3068 state := deploymentCopy.TaskGroups[alloc.TaskGroup] 3069 state.PlacedAllocs += placed 3070 state.HealthyAllocs += healthy 3071 state.UnhealthyAllocs += unhealthy 3072 3073 // Upsert the deployment 3074 if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil { 3075 return err 3076 } 3077 3078 return nil 3079 } 3080 3081 // updateSummaryWithAlloc updates the job summary when allocations are updated 3082 // or inserted 3083 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 3084 existingAlloc *structs.Allocation, txn *memdb.Txn) error { 3085 3086 // We don't have to update the summary if the job is missing 3087 if alloc.Job == nil { 3088 return nil 3089 } 3090 // COMPAT 0.7: Upgrade old objects that do not have namespaces 3091 if alloc.Namespace == "" { 3092 alloc.Namespace = structs.DefaultNamespace 3093 } 3094 3095 summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID) 3096 if err != nil { 3097 return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err) 3098 } 3099 3100 if summaryRaw == nil { 3101 // Check if the job is de-registered 3102 rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID) 3103 if err != nil { 3104 return fmt.Errorf("unable to query job: %v", err) 3105 } 3106 3107 // If the job is de-registered then we skip updating it's summary 3108 if rawJob == nil { 3109 return nil 3110 } 3111 3112 return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace) 3113 } 3114 3115 // Get a copy of the existing summary 3116 jobSummary := summaryRaw.(*structs.JobSummary).Copy() 3117 3118 // Not updating the job summary because the allocation doesn't belong to the 3119 // currently registered job 3120 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 3121 return nil 3122 } 3123 3124 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 3125 if !ok { 3126 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 3127 } 3128 3129 summaryChanged := false 3130 if existingAlloc == nil { 3131 switch alloc.DesiredStatus { 3132 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 3133 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 3134 alloc.ID, alloc.DesiredStatus) 3135 } 3136 switch alloc.ClientStatus { 3137 case structs.AllocClientStatusPending: 3138 tgSummary.Starting += 1 3139 if tgSummary.Queued > 0 { 3140 tgSummary.Queued -= 1 3141 } 3142 summaryChanged = true 3143 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 3144 structs.AllocClientStatusComplete: 3145 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 3146 alloc.ID, alloc.ClientStatus) 3147 } 3148 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 3149 // Incrementing the client of the bin of the current state 3150 switch alloc.ClientStatus { 3151 case structs.AllocClientStatusRunning: 3152 tgSummary.Running += 1 3153 case structs.AllocClientStatusFailed: 3154 tgSummary.Failed += 1 3155 case structs.AllocClientStatusPending: 3156 tgSummary.Starting += 1 3157 case structs.AllocClientStatusComplete: 3158 tgSummary.Complete += 1 3159 case structs.AllocClientStatusLost: 3160 tgSummary.Lost += 1 3161 } 3162 3163 // Decrementing the count of the bin of the last state 3164 switch existingAlloc.ClientStatus { 3165 case structs.AllocClientStatusRunning: 3166 tgSummary.Running -= 1 3167 case structs.AllocClientStatusPending: 3168 tgSummary.Starting -= 1 3169 case structs.AllocClientStatusLost: 3170 tgSummary.Lost -= 1 3171 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 3172 default: 3173 s.logger.Printf("[ERR] state_store: invalid old state of allocation with id: %v, and state: %v", 3174 existingAlloc.ID, existingAlloc.ClientStatus) 3175 } 3176 summaryChanged = true 3177 } 3178 jobSummary.Summary[alloc.TaskGroup] = tgSummary 3179 3180 if summaryChanged { 3181 jobSummary.ModifyIndex = index 3182 3183 // COMPAT 0.7: Upgrade old objects that do not have namespaces 3184 if jobSummary.Namespace == "" { 3185 jobSummary.Namespace = structs.DefaultNamespace 3186 } 3187 3188 // Update the indexes table for job summary 3189 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 3190 return fmt.Errorf("index update failed: %v", err) 3191 } 3192 3193 if err := txn.Insert("job_summary", jobSummary); err != nil { 3194 return fmt.Errorf("updating job summary failed: %v", err) 3195 } 3196 } 3197 3198 return nil 3199 } 3200 3201 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 3202 func (s *StateStore) addEphemeralDiskToTaskGroups(job *structs.Job) { 3203 for _, tg := range job.TaskGroups { 3204 var diskMB int 3205 for _, task := range tg.Tasks { 3206 if task.Resources != nil { 3207 diskMB += task.Resources.DiskMB 3208 task.Resources.DiskMB = 0 3209 } 3210 } 3211 if tg.EphemeralDisk != nil { 3212 continue 3213 } 3214 tg.EphemeralDisk = &structs.EphemeralDisk{ 3215 SizeMB: diskMB, 3216 } 3217 } 3218 } 3219 3220 // UpsertACLPolicies is used to create or update a set of ACL policies 3221 func (s *StateStore) UpsertACLPolicies(index uint64, policies []*structs.ACLPolicy) error { 3222 txn := s.db.Txn(true) 3223 defer txn.Abort() 3224 3225 for _, policy := range policies { 3226 // Ensure the policy hash is non-nil. This should be done outside the state store 3227 // for performance reasons, but we check here for defense in depth. 3228 if len(policy.Hash) == 0 { 3229 policy.SetHash() 3230 } 3231 3232 // Check if the policy already exists 3233 existing, err := txn.First("acl_policy", "id", policy.Name) 3234 if err != nil { 3235 return fmt.Errorf("policy lookup failed: %v", err) 3236 } 3237 3238 // Update all the indexes 3239 if existing != nil { 3240 policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex 3241 policy.ModifyIndex = index 3242 } else { 3243 policy.CreateIndex = index 3244 policy.ModifyIndex = index 3245 } 3246 3247 // Update the policy 3248 if err := txn.Insert("acl_policy", policy); err != nil { 3249 return fmt.Errorf("upserting policy failed: %v", err) 3250 } 3251 } 3252 3253 // Update the indexes tabl 3254 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 3255 return fmt.Errorf("index update failed: %v", err) 3256 } 3257 3258 txn.Commit() 3259 return nil 3260 } 3261 3262 // DeleteACLPolicies deletes the policies with the given names 3263 func (s *StateStore) DeleteACLPolicies(index uint64, names []string) error { 3264 txn := s.db.Txn(true) 3265 defer txn.Abort() 3266 3267 // Delete the policy 3268 for _, name := range names { 3269 if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil { 3270 return fmt.Errorf("deleting acl policy failed: %v", err) 3271 } 3272 } 3273 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 3274 return fmt.Errorf("index update failed: %v", err) 3275 } 3276 txn.Commit() 3277 return nil 3278 } 3279 3280 // ACLPolicyByName is used to lookup a policy by name 3281 func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) { 3282 txn := s.db.Txn(false) 3283 3284 watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name) 3285 if err != nil { 3286 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 3287 } 3288 ws.Add(watchCh) 3289 3290 if existing != nil { 3291 return existing.(*structs.ACLPolicy), nil 3292 } 3293 return nil, nil 3294 } 3295 3296 // ACLPolicyByNamePrefix is used to lookup policies by prefix 3297 func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 3298 txn := s.db.Txn(false) 3299 3300 iter, err := txn.Get("acl_policy", "id_prefix", prefix) 3301 if err != nil { 3302 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 3303 } 3304 ws.Add(iter.WatchCh()) 3305 3306 return iter, nil 3307 } 3308 3309 // ACLPolicies returns an iterator over all the acl policies 3310 func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3311 txn := s.db.Txn(false) 3312 3313 // Walk the entire table 3314 iter, err := txn.Get("acl_policy", "id") 3315 if err != nil { 3316 return nil, err 3317 } 3318 ws.Add(iter.WatchCh()) 3319 return iter, nil 3320 } 3321 3322 // UpsertACLTokens is used to create or update a set of ACL tokens 3323 func (s *StateStore) UpsertACLTokens(index uint64, tokens []*structs.ACLToken) error { 3324 txn := s.db.Txn(true) 3325 defer txn.Abort() 3326 3327 for _, token := range tokens { 3328 // Ensure the policy hash is non-nil. This should be done outside the state store 3329 // for performance reasons, but we check here for defense in depth. 3330 if len(token.Hash) == 0 { 3331 token.SetHash() 3332 } 3333 3334 // Check if the token already exists 3335 existing, err := txn.First("acl_token", "id", token.AccessorID) 3336 if err != nil { 3337 return fmt.Errorf("token lookup failed: %v", err) 3338 } 3339 3340 // Update all the indexes 3341 if existing != nil { 3342 existTK := existing.(*structs.ACLToken) 3343 token.CreateIndex = existTK.CreateIndex 3344 token.ModifyIndex = index 3345 3346 // Do not allow SecretID or create time to change 3347 token.SecretID = existTK.SecretID 3348 token.CreateTime = existTK.CreateTime 3349 3350 } else { 3351 token.CreateIndex = index 3352 token.ModifyIndex = index 3353 } 3354 3355 // Update the token 3356 if err := txn.Insert("acl_token", token); err != nil { 3357 return fmt.Errorf("upserting token failed: %v", err) 3358 } 3359 } 3360 3361 // Update the indexes table 3362 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 3363 return fmt.Errorf("index update failed: %v", err) 3364 } 3365 txn.Commit() 3366 return nil 3367 } 3368 3369 // DeleteACLTokens deletes the tokens with the given accessor ids 3370 func (s *StateStore) DeleteACLTokens(index uint64, ids []string) error { 3371 txn := s.db.Txn(true) 3372 defer txn.Abort() 3373 3374 // Delete the tokens 3375 for _, id := range ids { 3376 if _, err := txn.DeleteAll("acl_token", "id", id); err != nil { 3377 return fmt.Errorf("deleting acl token failed: %v", err) 3378 } 3379 } 3380 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 3381 return fmt.Errorf("index update failed: %v", err) 3382 } 3383 txn.Commit() 3384 return nil 3385 } 3386 3387 // ACLTokenByAccessorID is used to lookup a token by accessor ID 3388 func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) { 3389 txn := s.db.Txn(false) 3390 3391 watchCh, existing, err := txn.FirstWatch("acl_token", "id", id) 3392 if err != nil { 3393 return nil, fmt.Errorf("acl token lookup failed: %v", err) 3394 } 3395 ws.Add(watchCh) 3396 3397 if existing != nil { 3398 return existing.(*structs.ACLToken), nil 3399 } 3400 return nil, nil 3401 } 3402 3403 // ACLTokenBySecretID is used to lookup a token by secret ID 3404 func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) { 3405 txn := s.db.Txn(false) 3406 3407 watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID) 3408 if err != nil { 3409 return nil, fmt.Errorf("acl token lookup failed: %v", err) 3410 } 3411 ws.Add(watchCh) 3412 3413 if existing != nil { 3414 return existing.(*structs.ACLToken), nil 3415 } 3416 return nil, nil 3417 } 3418 3419 // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix 3420 func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 3421 txn := s.db.Txn(false) 3422 3423 iter, err := txn.Get("acl_token", "id_prefix", prefix) 3424 if err != nil { 3425 return nil, fmt.Errorf("acl token lookup failed: %v", err) 3426 } 3427 ws.Add(iter.WatchCh()) 3428 return iter, nil 3429 } 3430 3431 // ACLTokens returns an iterator over all the tokens 3432 func (s *StateStore) ACLTokens(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3433 txn := s.db.Txn(false) 3434 3435 // Walk the entire table 3436 iter, err := txn.Get("acl_token", "id") 3437 if err != nil { 3438 return nil, err 3439 } 3440 ws.Add(iter.WatchCh()) 3441 return iter, nil 3442 } 3443 3444 // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value 3445 func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool) (memdb.ResultIterator, error) { 3446 txn := s.db.Txn(false) 3447 3448 // Walk the entire table 3449 iter, err := txn.Get("acl_token", "global", globalVal) 3450 if err != nil { 3451 return nil, err 3452 } 3453 ws.Add(iter.WatchCh()) 3454 return iter, nil 3455 } 3456 3457 // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index 3458 func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) { 3459 txn := s.db.Txn(false) 3460 3461 // Lookup the bootstrap sentinel 3462 out, err := txn.First("index", "id", "acl_token_bootstrap") 3463 if err != nil { 3464 return false, 0, err 3465 } 3466 3467 // No entry, we haven't bootstrapped yet 3468 if out == nil { 3469 return true, 0, nil 3470 } 3471 3472 // Return the reset index if we've already bootstrapped 3473 return false, out.(*IndexEntry).Value, nil 3474 } 3475 3476 // BootstrapACLToken is used to create an initial ACL token 3477 func (s *StateStore) BootstrapACLTokens(index, resetIndex uint64, token *structs.ACLToken) error { 3478 txn := s.db.Txn(true) 3479 defer txn.Abort() 3480 3481 // Check if we have already done a bootstrap 3482 existing, err := txn.First("index", "id", "acl_token_bootstrap") 3483 if err != nil { 3484 return fmt.Errorf("bootstrap check failed: %v", err) 3485 } 3486 if existing != nil { 3487 if resetIndex == 0 { 3488 return fmt.Errorf("ACL bootstrap already done") 3489 } else if resetIndex != existing.(*IndexEntry).Value { 3490 return fmt.Errorf("Invalid reset index for ACL bootstrap") 3491 } 3492 } 3493 3494 // Update the Create/Modify time 3495 token.CreateIndex = index 3496 token.ModifyIndex = index 3497 3498 // Insert the token 3499 if err := txn.Insert("acl_token", token); err != nil { 3500 return fmt.Errorf("upserting token failed: %v", err) 3501 } 3502 3503 // Update the indexes table, prevents future bootstrap until reset 3504 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 3505 return fmt.Errorf("index update failed: %v", err) 3506 } 3507 if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil { 3508 return fmt.Errorf("index update failed: %v", err) 3509 } 3510 txn.Commit() 3511 return nil 3512 } 3513 3514 // StateSnapshot is used to provide a point-in-time snapshot 3515 type StateSnapshot struct { 3516 StateStore 3517 } 3518 3519 // StateRestore is used to optimize the performance when 3520 // restoring state by only using a single large transaction 3521 // instead of thousands of sub transactions 3522 type StateRestore struct { 3523 txn *memdb.Txn 3524 } 3525 3526 // Abort is used to abort the restore operation 3527 func (s *StateRestore) Abort() { 3528 s.txn.Abort() 3529 } 3530 3531 // Commit is used to commit the restore operation 3532 func (s *StateRestore) Commit() { 3533 s.txn.Commit() 3534 } 3535 3536 // NodeRestore is used to restore a node 3537 func (r *StateRestore) NodeRestore(node *structs.Node) error { 3538 if err := r.txn.Insert("nodes", node); err != nil { 3539 return fmt.Errorf("node insert failed: %v", err) 3540 } 3541 return nil 3542 } 3543 3544 // JobRestore is used to restore a job 3545 func (r *StateRestore) JobRestore(job *structs.Job) error { 3546 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 3547 // COMPAT 0.4.1 -> 0.5 3548 r.addEphemeralDiskToTaskGroups(job) 3549 3550 if err := r.txn.Insert("jobs", job); err != nil { 3551 return fmt.Errorf("job insert failed: %v", err) 3552 } 3553 return nil 3554 } 3555 3556 // EvalRestore is used to restore an evaluation 3557 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 3558 if err := r.txn.Insert("evals", eval); err != nil { 3559 return fmt.Errorf("eval insert failed: %v", err) 3560 } 3561 return nil 3562 } 3563 3564 // AllocRestore is used to restore an allocation 3565 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 3566 // Set the shared resources if it's not present 3567 // COMPAT 0.4.1 -> 0.5 3568 if alloc.SharedResources == nil { 3569 alloc.SharedResources = &structs.Resources{ 3570 DiskMB: alloc.Resources.DiskMB, 3571 } 3572 } 3573 3574 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 3575 if alloc.Job != nil { 3576 r.addEphemeralDiskToTaskGroups(alloc.Job) 3577 } 3578 3579 if err := r.txn.Insert("allocs", alloc); err != nil { 3580 return fmt.Errorf("alloc insert failed: %v", err) 3581 } 3582 return nil 3583 } 3584 3585 // IndexRestore is used to restore an index 3586 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 3587 if err := r.txn.Insert("index", idx); err != nil { 3588 return fmt.Errorf("index insert failed: %v", err) 3589 } 3590 return nil 3591 } 3592 3593 // PeriodicLaunchRestore is used to restore a periodic launch. 3594 func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { 3595 if err := r.txn.Insert("periodic_launch", launch); err != nil { 3596 return fmt.Errorf("periodic launch insert failed: %v", err) 3597 } 3598 return nil 3599 } 3600 3601 // JobSummaryRestore is used to restore a job summary 3602 func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error { 3603 if err := r.txn.Insert("job_summary", jobSummary); err != nil { 3604 return fmt.Errorf("job summary insert failed: %v", err) 3605 } 3606 return nil 3607 } 3608 3609 // JobVersionRestore is used to restore a job version 3610 func (r *StateRestore) JobVersionRestore(version *structs.Job) error { 3611 if err := r.txn.Insert("job_version", version); err != nil { 3612 return fmt.Errorf("job version insert failed: %v", err) 3613 } 3614 return nil 3615 } 3616 3617 // DeploymentRestore is used to restore a deployment 3618 func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error { 3619 if err := r.txn.Insert("deployment", deployment); err != nil { 3620 return fmt.Errorf("deployment insert failed: %v", err) 3621 } 3622 return nil 3623 } 3624 3625 // VaultAccessorRestore is used to restore a vault accessor 3626 func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error { 3627 if err := r.txn.Insert("vault_accessors", accessor); err != nil { 3628 return fmt.Errorf("vault accessor insert failed: %v", err) 3629 } 3630 return nil 3631 } 3632 3633 // ACLPolicyRestore is used to restore an ACL policy 3634 func (r *StateRestore) ACLPolicyRestore(policy *structs.ACLPolicy) error { 3635 if err := r.txn.Insert("acl_policy", policy); err != nil { 3636 return fmt.Errorf("inserting acl policy failed: %v", err) 3637 } 3638 return nil 3639 } 3640 3641 // ACLTokenRestore is used to restore an ACL token 3642 func (r *StateRestore) ACLTokenRestore(token *structs.ACLToken) error { 3643 if err := r.txn.Insert("acl_token", token); err != nil { 3644 return fmt.Errorf("inserting acl token failed: %v", err) 3645 } 3646 return nil 3647 } 3648 3649 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 3650 func (r *StateRestore) addEphemeralDiskToTaskGroups(job *structs.Job) { 3651 for _, tg := range job.TaskGroups { 3652 if tg.EphemeralDisk != nil { 3653 continue 3654 } 3655 var sizeMB int 3656 for _, task := range tg.Tasks { 3657 if task.Resources != nil { 3658 sizeMB += task.Resources.DiskMB 3659 task.Resources.DiskMB = 0 3660 } 3661 } 3662 tg.EphemeralDisk = &structs.EphemeralDisk{ 3663 SizeMB: sizeMB, 3664 } 3665 } 3666 }