github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "log" 8 "sort" 9 10 "github.com/hashicorp/go-memdb" 11 multierror "github.com/hashicorp/go-multierror" 12 "github.com/hashicorp/nomad/helper" 13 "github.com/hashicorp/nomad/nomad/structs" 14 ) 15 16 // IndexEntry is used with the "index" table 17 // for managing the latest Raft index affecting a table. 18 type IndexEntry struct { 19 Key string 20 Value uint64 21 } 22 23 // StateStoreConfig is used to configure a new state store 24 type StateStoreConfig struct { 25 // LogOutput is used to configure the output of the state store's logs 26 LogOutput io.Writer 27 28 // Region is the region of the server embedding the state store. 29 Region string 30 } 31 32 // The StateStore is responsible for maintaining all the Nomad 33 // state. It is manipulated by the FSM which maintains consistency 34 // through the use of Raft. The goals of the StateStore are to provide 35 // high concurrency for read operations without blocking writes, and 36 // to provide write availability in the face of reads. EVERY object 37 // returned as a result of a read against the state store should be 38 // considered a constant and NEVER modified in place. 39 type StateStore struct { 40 logger *log.Logger 41 db *memdb.MemDB 42 43 // config is the passed in configuration 44 config *StateStoreConfig 45 46 // abandonCh is used to signal watchers that this state store has been 47 // abandoned (usually during a restore). This is only ever closed. 48 abandonCh chan struct{} 49 } 50 51 // NewStateStore is used to create a new state store 52 func NewStateStore(config *StateStoreConfig) (*StateStore, error) { 53 // Create the MemDB 54 db, err := memdb.NewMemDB(stateStoreSchema()) 55 if err != nil { 56 return nil, fmt.Errorf("state store setup failed: %v", err) 57 } 58 59 // Create the state store 60 s := &StateStore{ 61 logger: log.New(config.LogOutput, "", log.LstdFlags), 62 db: db, 63 config: config, 64 abandonCh: make(chan struct{}), 65 } 66 return s, nil 67 } 68 69 // Config returns the state store configuration. 70 func (s *StateStore) Config() *StateStoreConfig { 71 return s.config 72 } 73 74 // Snapshot is used to create a point in time snapshot. Because 75 // we use MemDB, we just need to snapshot the state of the underlying 76 // database. 77 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 78 snap := &StateSnapshot{ 79 StateStore: StateStore{ 80 logger: s.logger, 81 config: s.config, 82 db: s.db.Snapshot(), 83 }, 84 } 85 return snap, nil 86 } 87 88 // Restore is used to optimize the efficiency of rebuilding 89 // state by minimizing the number of transactions and checking 90 // overhead. 91 func (s *StateStore) Restore() (*StateRestore, error) { 92 txn := s.db.Txn(true) 93 r := &StateRestore{ 94 txn: txn, 95 } 96 return r, nil 97 } 98 99 // AbandonCh returns a channel you can wait on to know if the state store was 100 // abandoned. 101 func (s *StateStore) AbandonCh() <-chan struct{} { 102 return s.abandonCh 103 } 104 105 // Abandon is used to signal that the given state store has been abandoned. 106 // Calling this more than one time will panic. 107 func (s *StateStore) Abandon() { 108 close(s.abandonCh) 109 } 110 111 // QueryFn is the definition of a function that can be used to implement a basic 112 // blocking query against the state store. 113 type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error) 114 115 // BlockingQuery takes a query function and runs the function until the minimum 116 // query index is met or until the passed context is cancelled. 117 func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) ( 118 resp interface{}, index uint64, err error) { 119 120 RUN_QUERY: 121 // We capture the state store and its abandon channel but pass a snapshot to 122 // the blocking query function. We operate on the snapshot to allow separate 123 // calls to the state store not all wrapped within the same transaction. 124 abandonCh := s.AbandonCh() 125 snap, _ := s.Snapshot() 126 stateSnap := &snap.StateStore 127 128 // We can skip all watch tracking if this isn't a blocking query. 129 var ws memdb.WatchSet 130 if minIndex > 0 { 131 ws = memdb.NewWatchSet() 132 133 // This channel will be closed if a snapshot is restored and the 134 // whole state store is abandoned. 135 ws.Add(abandonCh) 136 } 137 138 resp, index, err = query(ws, stateSnap) 139 if err != nil { 140 return nil, index, err 141 } 142 143 // We haven't reached the min-index yet. 144 if minIndex > 0 && index <= minIndex { 145 if err := ws.WatchCtx(ctx); err != nil { 146 return nil, index, err 147 } 148 149 goto RUN_QUERY 150 } 151 152 return resp, index, nil 153 } 154 155 // UpsertPlanResults is used to upsert the results of a plan. 156 func (s *StateStore) UpsertPlanResults(index uint64, results *structs.ApplyPlanResultsRequest) error { 157 txn := s.db.Txn(true) 158 defer txn.Abort() 159 160 // Upsert the newly created or updated deployment 161 if results.Deployment != nil { 162 if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil { 163 return err 164 } 165 } 166 167 // Update the status of deployments effected by the plan. 168 if len(results.DeploymentUpdates) != 0 { 169 s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn) 170 } 171 172 // Attach the job to all the allocations. It is pulled out in the payload to 173 // avoid the redundancy of encoding, but should be denormalized prior to 174 // being inserted into MemDB. 175 structs.DenormalizeAllocationJobs(results.Job, results.Alloc) 176 177 // Calculate the total resources of allocations. It is pulled out in the 178 // payload to avoid encoding something that can be computed, but should be 179 // denormalized prior to being inserted into MemDB. 180 for _, alloc := range results.Alloc { 181 if alloc.Resources != nil { 182 continue 183 } 184 185 alloc.Resources = new(structs.Resources) 186 for _, task := range alloc.TaskResources { 187 alloc.Resources.Add(task) 188 } 189 190 // Add the shared resources 191 alloc.Resources.Add(alloc.SharedResources) 192 } 193 194 // Upsert the allocations 195 if err := s.upsertAllocsImpl(index, results.Alloc, txn); err != nil { 196 return err 197 } 198 199 // COMPAT: Nomad versions before 0.7.1 did not include the eval ID when 200 // applying the plan. Thus while we are upgrading, we ignore updating the 201 // modify index of evaluations from older plans. 202 if results.EvalID != "" { 203 // Update the modify index of the eval id 204 if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil { 205 return err 206 } 207 } 208 209 txn.Commit() 210 return nil 211 } 212 213 // upsertDeploymentUpdates updates the deployments given the passed status 214 // updates. 215 func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *memdb.Txn) error { 216 for _, u := range updates { 217 if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil { 218 return err 219 } 220 } 221 222 return nil 223 } 224 225 // UpsertJobSummary upserts a job summary into the state store. 226 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 227 txn := s.db.Txn(true) 228 defer txn.Abort() 229 230 // COMPAT 0.7: Upgrade old objects that do not have namespaces 231 if jobSummary.Namespace == "" { 232 jobSummary.Namespace = structs.DefaultNamespace 233 } 234 235 // Check if the job summary already exists 236 existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID) 237 if err != nil { 238 return fmt.Errorf("job summary lookup failed: %v", err) 239 } 240 241 // Setup the indexes correctly 242 if existing != nil { 243 jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex 244 jobSummary.ModifyIndex = index 245 } else { 246 jobSummary.CreateIndex = index 247 jobSummary.ModifyIndex = index 248 } 249 250 // Update the index 251 if err := txn.Insert("job_summary", jobSummary); err != nil { 252 return err 253 } 254 255 // Update the indexes table for job summary 256 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 257 return fmt.Errorf("index update failed: %v", err) 258 } 259 260 txn.Commit() 261 return nil 262 } 263 264 // DeleteJobSummary deletes the job summary with the given ID. This is for 265 // testing purposes only. 266 func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error { 267 txn := s.db.Txn(true) 268 defer txn.Abort() 269 270 // COMPAT 0.7: Upgrade old objects that do not have namespaces 271 if namespace == "" { 272 namespace = structs.DefaultNamespace 273 } 274 275 // Delete the job summary 276 if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil { 277 return fmt.Errorf("deleting job summary failed: %v", err) 278 } 279 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 280 return fmt.Errorf("index update failed: %v", err) 281 } 282 txn.Commit() 283 return nil 284 } 285 286 // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to 287 // true, all prior deployments for the same job will be cancelled. 288 func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error { 289 txn := s.db.Txn(true) 290 defer txn.Abort() 291 if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil { 292 return err 293 } 294 txn.Commit() 295 return nil 296 } 297 298 func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *memdb.Txn) error { 299 // Check if the deployment already exists 300 existing, err := txn.First("deployment", "id", deployment.ID) 301 if err != nil { 302 return fmt.Errorf("deployment lookup failed: %v", err) 303 } 304 305 // COMPAT 0.7: Upgrade old objects that do not have namespaces 306 if deployment.Namespace == "" { 307 deployment.Namespace = structs.DefaultNamespace 308 } 309 310 // Setup the indexes correctly 311 if existing != nil { 312 deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex 313 deployment.ModifyIndex = index 314 } else { 315 deployment.CreateIndex = index 316 deployment.ModifyIndex = index 317 } 318 319 // Insert the deployment 320 if err := txn.Insert("deployment", deployment); err != nil { 321 return err 322 } 323 324 // Update the indexes table for deployment 325 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 326 return fmt.Errorf("index update failed: %v", err) 327 } 328 329 // If the deployment is being marked as complete, set the job to stable. 330 if deployment.Status == structs.DeploymentStatusSuccessful { 331 if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil { 332 return fmt.Errorf("failed to update job stability: %v", err) 333 } 334 } 335 336 return nil 337 } 338 339 func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) { 340 txn := s.db.Txn(false) 341 342 // Walk the entire deployments table 343 iter, err := txn.Get("deployment", "id") 344 if err != nil { 345 return nil, err 346 } 347 348 ws.Add(iter.WatchCh()) 349 return iter, nil 350 } 351 352 func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 353 txn := s.db.Txn(false) 354 355 // Walk the entire deployments table 356 iter, err := txn.Get("deployment", "namespace", namespace) 357 if err != nil { 358 return nil, err 359 } 360 361 ws.Add(iter.WatchCh()) 362 return iter, nil 363 } 364 365 func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string) (memdb.ResultIterator, error) { 366 txn := s.db.Txn(false) 367 368 // Walk the entire deployments table 369 iter, err := txn.Get("deployment", "id_prefix", deploymentID) 370 if err != nil { 371 return nil, err 372 } 373 374 ws.Add(iter.WatchCh()) 375 376 // Wrap the iterator in a filter 377 wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace)) 378 return wrap, nil 379 } 380 381 // deploymentNamespaceFilter returns a filter function that filters all 382 // deployment not in the given namespace. 383 func deploymentNamespaceFilter(namespace string) func(interface{}) bool { 384 return func(raw interface{}) bool { 385 d, ok := raw.(*structs.Deployment) 386 if !ok { 387 return true 388 } 389 390 return d.Namespace != namespace 391 } 392 } 393 394 func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) { 395 txn := s.db.Txn(false) 396 return s.deploymentByIDImpl(ws, deploymentID, txn) 397 } 398 399 func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *memdb.Txn) (*structs.Deployment, error) { 400 watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID) 401 if err != nil { 402 return nil, fmt.Errorf("deployment lookup failed: %v", err) 403 } 404 ws.Add(watchCh) 405 406 if existing != nil { 407 return existing.(*structs.Deployment), nil 408 } 409 410 return nil, nil 411 } 412 413 func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Deployment, error) { 414 txn := s.db.Txn(false) 415 416 // COMPAT 0.7: Upgrade old objects that do not have namespaces 417 if namespace == "" { 418 namespace = structs.DefaultNamespace 419 } 420 421 // Get an iterator over the deployments 422 iter, err := txn.Get("deployment", "job", namespace, jobID) 423 if err != nil { 424 return nil, err 425 } 426 427 ws.Add(iter.WatchCh()) 428 429 var out []*structs.Deployment 430 for { 431 raw := iter.Next() 432 if raw == nil { 433 break 434 } 435 436 d := raw.(*structs.Deployment) 437 out = append(out, d) 438 } 439 440 return out, nil 441 } 442 443 // LatestDeploymentByJobID returns the latest deployment for the given job. The 444 // latest is determined strictly by CreateIndex. 445 func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) { 446 txn := s.db.Txn(false) 447 448 // COMPAT 0.7: Upgrade old objects that do not have namespaces 449 if namespace == "" { 450 namespace = structs.DefaultNamespace 451 } 452 453 // Get an iterator over the deployments 454 iter, err := txn.Get("deployment", "job", namespace, jobID) 455 if err != nil { 456 return nil, err 457 } 458 459 ws.Add(iter.WatchCh()) 460 461 var out *structs.Deployment 462 for { 463 raw := iter.Next() 464 if raw == nil { 465 break 466 } 467 468 d := raw.(*structs.Deployment) 469 if out == nil || out.CreateIndex < d.CreateIndex { 470 out = d 471 } 472 } 473 474 return out, nil 475 } 476 477 // DeleteDeployment is used to delete a set of deployments by ID 478 func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error { 479 txn := s.db.Txn(true) 480 defer txn.Abort() 481 482 if len(deploymentIDs) == 0 { 483 return nil 484 } 485 486 for _, deploymentID := range deploymentIDs { 487 // Lookup the deployment 488 existing, err := txn.First("deployment", "id", deploymentID) 489 if err != nil { 490 return fmt.Errorf("deployment lookup failed: %v", err) 491 } 492 if existing == nil { 493 return fmt.Errorf("deployment not found") 494 } 495 496 // Delete the deployment 497 if err := txn.Delete("deployment", existing); err != nil { 498 return fmt.Errorf("deployment delete failed: %v", err) 499 } 500 } 501 502 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 503 return fmt.Errorf("index update failed: %v", err) 504 } 505 506 txn.Commit() 507 return nil 508 } 509 510 // UpsertNode is used to register a node or update a node definition 511 // This is assumed to be triggered by the client, so we retain the value 512 // of drain which is set by the scheduler. 513 func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { 514 txn := s.db.Txn(true) 515 defer txn.Abort() 516 517 // Check if the node already exists 518 existing, err := txn.First("nodes", "id", node.ID) 519 if err != nil { 520 return fmt.Errorf("node lookup failed: %v", err) 521 } 522 523 // Setup the indexes correctly 524 if existing != nil { 525 exist := existing.(*structs.Node) 526 node.CreateIndex = exist.CreateIndex 527 node.ModifyIndex = index 528 node.Drain = exist.Drain // Retain the drain mode 529 } else { 530 node.CreateIndex = index 531 node.ModifyIndex = index 532 } 533 534 // Insert the node 535 if err := txn.Insert("nodes", node); err != nil { 536 return fmt.Errorf("node insert failed: %v", err) 537 } 538 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 539 return fmt.Errorf("index update failed: %v", err) 540 } 541 542 txn.Commit() 543 return nil 544 } 545 546 // DeleteNode is used to deregister a node 547 func (s *StateStore) DeleteNode(index uint64, nodeID string) error { 548 txn := s.db.Txn(true) 549 defer txn.Abort() 550 551 // Lookup the node 552 existing, err := txn.First("nodes", "id", nodeID) 553 if err != nil { 554 return fmt.Errorf("node lookup failed: %v", err) 555 } 556 if existing == nil { 557 return fmt.Errorf("node not found") 558 } 559 560 // Delete the node 561 if err := txn.Delete("nodes", existing); err != nil { 562 return fmt.Errorf("node delete failed: %v", err) 563 } 564 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 565 return fmt.Errorf("index update failed: %v", err) 566 } 567 568 txn.Commit() 569 return nil 570 } 571 572 // UpdateNodeStatus is used to update the status of a node 573 func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error { 574 txn := s.db.Txn(true) 575 defer txn.Abort() 576 577 // Lookup the node 578 existing, err := txn.First("nodes", "id", nodeID) 579 if err != nil { 580 return fmt.Errorf("node lookup failed: %v", err) 581 } 582 if existing == nil { 583 return fmt.Errorf("node not found") 584 } 585 586 // Copy the existing node 587 existingNode := existing.(*structs.Node) 588 copyNode := new(structs.Node) 589 *copyNode = *existingNode 590 591 // Update the status in the copy 592 copyNode.Status = status 593 copyNode.ModifyIndex = index 594 595 // Insert the node 596 if err := txn.Insert("nodes", copyNode); err != nil { 597 return fmt.Errorf("node update failed: %v", err) 598 } 599 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 600 return fmt.Errorf("index update failed: %v", err) 601 } 602 603 txn.Commit() 604 return nil 605 } 606 607 // UpdateNodeDrain is used to update the drain of a node 608 func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error { 609 txn := s.db.Txn(true) 610 defer txn.Abort() 611 612 // Lookup the node 613 existing, err := txn.First("nodes", "id", nodeID) 614 if err != nil { 615 return fmt.Errorf("node lookup failed: %v", err) 616 } 617 if existing == nil { 618 return fmt.Errorf("node not found") 619 } 620 621 // Copy the existing node 622 existingNode := existing.(*structs.Node) 623 copyNode := new(structs.Node) 624 *copyNode = *existingNode 625 626 // Update the drain in the copy 627 copyNode.Drain = drain 628 copyNode.ModifyIndex = index 629 630 // Insert the node 631 if err := txn.Insert("nodes", copyNode); err != nil { 632 return fmt.Errorf("node update failed: %v", err) 633 } 634 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 635 return fmt.Errorf("index update failed: %v", err) 636 } 637 638 txn.Commit() 639 return nil 640 } 641 642 // NodeByID is used to lookup a node by ID 643 func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) { 644 txn := s.db.Txn(false) 645 646 watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID) 647 if err != nil { 648 return nil, fmt.Errorf("node lookup failed: %v", err) 649 } 650 ws.Add(watchCh) 651 652 if existing != nil { 653 return existing.(*structs.Node), nil 654 } 655 return nil, nil 656 } 657 658 // NodesByIDPrefix is used to lookup nodes by prefix 659 func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 660 txn := s.db.Txn(false) 661 662 iter, err := txn.Get("nodes", "id_prefix", nodeID) 663 if err != nil { 664 return nil, fmt.Errorf("node lookup failed: %v", err) 665 } 666 ws.Add(iter.WatchCh()) 667 668 return iter, nil 669 } 670 671 // NodeBySecretID is used to lookup a node by SecretID 672 func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) { 673 txn := s.db.Txn(false) 674 675 watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID) 676 if err != nil { 677 return nil, fmt.Errorf("node lookup by SecretID failed: %v", err) 678 } 679 ws.Add(watchCh) 680 681 if existing != nil { 682 return existing.(*structs.Node), nil 683 } 684 return nil, nil 685 } 686 687 // Nodes returns an iterator over all the nodes 688 func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 689 txn := s.db.Txn(false) 690 691 // Walk the entire nodes table 692 iter, err := txn.Get("nodes", "id") 693 if err != nil { 694 return nil, err 695 } 696 ws.Add(iter.WatchCh()) 697 return iter, nil 698 } 699 700 // UpsertJob is used to register a job or update a job definition 701 func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { 702 txn := s.db.Txn(true) 703 defer txn.Abort() 704 if err := s.upsertJobImpl(index, job, false, txn); err != nil { 705 return err 706 } 707 txn.Commit() 708 return nil 709 } 710 711 // upsertJobImpl is the implementation for registering a job or updating a job definition 712 func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *memdb.Txn) error { 713 // COMPAT 0.7: Upgrade old objects that do not have namespaces 714 if job.Namespace == "" { 715 job.Namespace = structs.DefaultNamespace 716 } 717 718 // Assert the namespace exists 719 if exists, err := s.namespaceExists(txn, job.Namespace); err != nil { 720 return err 721 } else if !exists { 722 return fmt.Errorf("job %q is in non-existent namespace %q", job.ID, job.Namespace) 723 } 724 725 // Check if the job already exists 726 existing, err := txn.First("jobs", "id", job.Namespace, job.ID) 727 if err != nil { 728 return fmt.Errorf("job lookup failed: %v", err) 729 } 730 731 // Setup the indexes correctly 732 if existing != nil { 733 job.CreateIndex = existing.(*structs.Job).CreateIndex 734 job.ModifyIndex = index 735 736 // Bump the version unless asked to keep it. This should only be done 737 // when changing an internal field such as Stable. A spec change should 738 // always come with a version bump 739 if !keepVersion { 740 job.JobModifyIndex = index 741 job.Version = existing.(*structs.Job).Version + 1 742 } 743 744 // Compute the job status 745 var err error 746 job.Status, err = s.getJobStatus(txn, job, false) 747 if err != nil { 748 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 749 } 750 } else { 751 job.CreateIndex = index 752 job.ModifyIndex = index 753 job.JobModifyIndex = index 754 job.Version = 0 755 756 if err := s.setJobStatus(index, txn, job, false, ""); err != nil { 757 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 758 } 759 760 // Have to get the job again since it could have been updated 761 updated, err := txn.First("jobs", "id", job.Namespace, job.ID) 762 if err != nil { 763 return fmt.Errorf("job lookup failed: %v", err) 764 } 765 if updated != nil { 766 job = updated.(*structs.Job) 767 } 768 } 769 770 if err := s.updateSummaryWithJob(index, job, txn); err != nil { 771 return fmt.Errorf("unable to create job summary: %v", err) 772 } 773 774 if err := s.upsertJobVersion(index, job, txn); err != nil { 775 return fmt.Errorf("unable to upsert job into job_version table: %v", err) 776 } 777 778 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 779 // COMPAT 0.4.1 -> 0.5 780 s.addEphemeralDiskToTaskGroups(job) 781 782 // Insert the job 783 if err := txn.Insert("jobs", job); err != nil { 784 return fmt.Errorf("job insert failed: %v", err) 785 } 786 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 787 return fmt.Errorf("index update failed: %v", err) 788 } 789 790 return nil 791 } 792 793 // DeleteJob is used to deregister a job 794 func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error { 795 txn := s.db.Txn(true) 796 defer txn.Abort() 797 798 // COMPAT 0.7: Upgrade old objects that do not have namespaces 799 if namespace == "" { 800 namespace = structs.DefaultNamespace 801 } 802 803 // Lookup the node 804 existing, err := txn.First("jobs", "id", namespace, jobID) 805 if err != nil { 806 return fmt.Errorf("job lookup failed: %v", err) 807 } 808 if existing == nil { 809 return fmt.Errorf("job not found") 810 } 811 812 // Check if we should update a parent job summary 813 job := existing.(*structs.Job) 814 if job.ParentID != "" { 815 summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID) 816 if err != nil { 817 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 818 } 819 820 // Only continue if the summary exists. It could not exist if the parent 821 // job was removed 822 if summaryRaw != nil { 823 existing := summaryRaw.(*structs.JobSummary) 824 pSummary := existing.Copy() 825 if pSummary.Children != nil { 826 827 modified := false 828 switch job.Status { 829 case structs.JobStatusPending: 830 pSummary.Children.Pending-- 831 pSummary.Children.Dead++ 832 modified = true 833 case structs.JobStatusRunning: 834 pSummary.Children.Running-- 835 pSummary.Children.Dead++ 836 modified = true 837 case structs.JobStatusDead: 838 default: 839 return fmt.Errorf("unknown old job status %q", job.Status) 840 } 841 842 if modified { 843 // Update the modify index 844 pSummary.ModifyIndex = index 845 846 // COMPAT 0.7: Upgrade old objects that do not have namespaces 847 if pSummary.Namespace == "" { 848 pSummary.Namespace = structs.DefaultNamespace 849 } 850 851 // Insert the summary 852 if err := txn.Insert("job_summary", pSummary); err != nil { 853 return fmt.Errorf("job summary insert failed: %v", err) 854 } 855 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 856 return fmt.Errorf("index update failed: %v", err) 857 } 858 } 859 } 860 } 861 } 862 863 // Delete the job 864 if err := txn.Delete("jobs", existing); err != nil { 865 return fmt.Errorf("job delete failed: %v", err) 866 } 867 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 868 return fmt.Errorf("index update failed: %v", err) 869 } 870 871 // Delete the job versions 872 if err := s.deleteJobVersions(index, job, txn); err != nil { 873 return err 874 } 875 876 // Delete the job summary 877 if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil { 878 return fmt.Errorf("deleing job summary failed: %v", err) 879 } 880 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 881 return fmt.Errorf("index update failed: %v", err) 882 } 883 884 txn.Commit() 885 return nil 886 } 887 888 // deleteJobVersions deletes all versions of the given job. 889 func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error { 890 // COMPAT 0.7: Upgrade old objects that do not have namespaces 891 if job.Namespace == "" { 892 job.Namespace = structs.DefaultNamespace 893 } 894 895 iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID) 896 if err != nil { 897 return err 898 } 899 900 for { 901 raw := iter.Next() 902 if raw == nil { 903 break 904 } 905 906 // Ensure the ID is an exact match 907 j := raw.(*structs.Job) 908 if j.ID != job.ID { 909 continue 910 } 911 912 if _, err = txn.DeleteAll("job_version", "id", j.Namespace, j.ID, j.Version); err != nil { 913 return fmt.Errorf("deleting job versions failed: %v", err) 914 } 915 } 916 917 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 918 return fmt.Errorf("index update failed: %v", err) 919 } 920 921 return nil 922 } 923 924 // upsertJobVersion inserts a job into its historic version table and limits the 925 // number of job versions that are tracked. 926 func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *memdb.Txn) error { 927 // COMPAT 0.7: Upgrade old objects that do not have namespaces 928 if job.Namespace == "" { 929 job.Namespace = structs.DefaultNamespace 930 } 931 932 // Insert the job 933 if err := txn.Insert("job_version", job); err != nil { 934 return fmt.Errorf("failed to insert job into job_version table: %v", err) 935 } 936 937 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 938 return fmt.Errorf("index update failed: %v", err) 939 } 940 941 // Get all the historic jobs for this ID 942 all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID) 943 if err != nil { 944 return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err) 945 } 946 947 // If we are below the limit there is no GCing to be done 948 if len(all) <= structs.JobTrackedVersions { 949 return nil 950 } 951 952 // We have to delete a historic job to make room. 953 // Find index of the highest versioned stable job 954 stableIdx := -1 955 for i, j := range all { 956 if j.Stable { 957 stableIdx = i 958 break 959 } 960 } 961 962 // If the stable job is the oldest version, do a swap to bring it into the 963 // keep set. 964 max := structs.JobTrackedVersions 965 if stableIdx == max { 966 all[max-1], all[max] = all[max], all[max-1] 967 } 968 969 // Delete the job outside of the set that are being kept. 970 d := all[max] 971 if err := txn.Delete("job_version", d); err != nil { 972 return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version) 973 } 974 975 return nil 976 } 977 978 // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job 979 // version. 980 func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) { 981 txn := s.db.Txn(false) 982 983 // COMPAT 0.7: Upgrade old objects that do not have namespaces 984 if namespace == "" { 985 namespace = structs.DefaultNamespace 986 } 987 988 watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id) 989 if err != nil { 990 return nil, fmt.Errorf("job lookup failed: %v", err) 991 } 992 ws.Add(watchCh) 993 994 if existing != nil { 995 return existing.(*structs.Job), nil 996 } 997 return nil, nil 998 } 999 1000 // JobsByIDPrefix is used to lookup a job by prefix 1001 func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1002 txn := s.db.Txn(false) 1003 1004 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1005 if namespace == "" { 1006 namespace = structs.DefaultNamespace 1007 } 1008 1009 iter, err := txn.Get("jobs", "id_prefix", namespace, id) 1010 if err != nil { 1011 return nil, fmt.Errorf("job lookup failed: %v", err) 1012 } 1013 1014 ws.Add(iter.WatchCh()) 1015 1016 return iter, nil 1017 } 1018 1019 // JobVersionsByID returns all the tracked versions of a job. 1020 func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 1021 txn := s.db.Txn(false) 1022 1023 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1024 if namespace == "" { 1025 namespace = structs.DefaultNamespace 1026 } 1027 1028 return s.jobVersionByID(txn, &ws, namespace, id) 1029 } 1030 1031 // jobVersionByID is the underlying implementation for retrieving all tracked 1032 // versions of a job and is called under an existing transaction. A watch set 1033 // can optionally be passed in to add the job histories to the watch set. 1034 func (s *StateStore) jobVersionByID(txn *memdb.Txn, ws *memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 1035 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1036 if namespace == "" { 1037 namespace = structs.DefaultNamespace 1038 } 1039 1040 // Get all the historic jobs for this ID 1041 iter, err := txn.Get("job_version", "id_prefix", namespace, id) 1042 if err != nil { 1043 return nil, err 1044 } 1045 1046 if ws != nil { 1047 ws.Add(iter.WatchCh()) 1048 } 1049 1050 var all []*structs.Job 1051 for { 1052 raw := iter.Next() 1053 if raw == nil { 1054 break 1055 } 1056 1057 // Ensure the ID is an exact match 1058 j := raw.(*structs.Job) 1059 if j.ID != id { 1060 continue 1061 } 1062 1063 all = append(all, j) 1064 } 1065 1066 // Sort in reverse order so that the highest version is first 1067 sort.Slice(all, func(i, j int) bool { 1068 return all[i].Version > all[j].Version 1069 }) 1070 1071 return all, nil 1072 } 1073 1074 // JobByIDAndVersion returns the job identified by its ID and Version. The 1075 // passed watchset may be nil. 1076 func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) { 1077 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1078 if namespace == "" { 1079 namespace = structs.DefaultNamespace 1080 } 1081 txn := s.db.Txn(false) 1082 return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn) 1083 } 1084 1085 // jobByIDAndVersionImpl returns the job identified by its ID and Version. The 1086 // passed watchset may be nil. 1087 func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string, 1088 version uint64, txn *memdb.Txn) (*structs.Job, error) { 1089 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1090 if namespace == "" { 1091 namespace = structs.DefaultNamespace 1092 } 1093 1094 watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version) 1095 if err != nil { 1096 return nil, err 1097 } 1098 1099 if ws != nil { 1100 ws.Add(watchCh) 1101 } 1102 1103 if existing != nil { 1104 job := existing.(*structs.Job) 1105 return job, nil 1106 } 1107 1108 return nil, nil 1109 } 1110 1111 func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1112 txn := s.db.Txn(false) 1113 1114 // Walk the entire deployments table 1115 iter, err := txn.Get("job_version", "id") 1116 if err != nil { 1117 return nil, err 1118 } 1119 1120 ws.Add(iter.WatchCh()) 1121 return iter, nil 1122 } 1123 1124 // Jobs returns an iterator over all the jobs 1125 func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1126 txn := s.db.Txn(false) 1127 1128 // Walk the entire jobs table 1129 iter, err := txn.Get("jobs", "id") 1130 if err != nil { 1131 return nil, err 1132 } 1133 1134 ws.Add(iter.WatchCh()) 1135 1136 return iter, nil 1137 } 1138 1139 // JobsByNamespace returns an iterator over all the jobs for the given namespace 1140 func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 1141 txn := s.db.Txn(false) 1142 return s.jobsByNamespaceImpl(ws, namespace, txn) 1143 } 1144 1145 // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace 1146 func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *memdb.Txn) (memdb.ResultIterator, error) { 1147 // Walk the entire jobs table 1148 iter, err := txn.Get("jobs", "id_prefix", namespace, "") 1149 if err != nil { 1150 return nil, err 1151 } 1152 1153 ws.Add(iter.WatchCh()) 1154 1155 return iter, nil 1156 } 1157 1158 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 1159 func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) { 1160 txn := s.db.Txn(false) 1161 1162 iter, err := txn.Get("jobs", "periodic", periodic) 1163 if err != nil { 1164 return nil, err 1165 } 1166 1167 ws.Add(iter.WatchCh()) 1168 1169 return iter, nil 1170 } 1171 1172 // JobsByScheduler returns an iterator over all the jobs with the specific 1173 // scheduler type. 1174 func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) { 1175 txn := s.db.Txn(false) 1176 1177 // Return an iterator for jobs with the specific type. 1178 iter, err := txn.Get("jobs", "type", schedulerType) 1179 if err != nil { 1180 return nil, err 1181 } 1182 1183 ws.Add(iter.WatchCh()) 1184 1185 return iter, nil 1186 } 1187 1188 // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage 1189 // collection. 1190 func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) { 1191 txn := s.db.Txn(false) 1192 1193 iter, err := txn.Get("jobs", "gc", gc) 1194 if err != nil { 1195 return nil, err 1196 } 1197 1198 ws.Add(iter.WatchCh()) 1199 1200 return iter, nil 1201 } 1202 1203 // JobSummary returns a job summary object which matches a specific id. 1204 func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) { 1205 txn := s.db.Txn(false) 1206 1207 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1208 if namespace == "" { 1209 namespace = structs.DefaultNamespace 1210 } 1211 1212 watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID) 1213 if err != nil { 1214 return nil, err 1215 } 1216 1217 ws.Add(watchCh) 1218 1219 if existing != nil { 1220 summary := existing.(*structs.JobSummary) 1221 return summary, nil 1222 } 1223 1224 return nil, nil 1225 } 1226 1227 // JobSummaries walks the entire job summary table and returns all the job 1228 // summary objects 1229 func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1230 txn := s.db.Txn(false) 1231 1232 iter, err := txn.Get("job_summary", "id") 1233 if err != nil { 1234 return nil, err 1235 } 1236 1237 ws.Add(iter.WatchCh()) 1238 1239 return iter, nil 1240 } 1241 1242 // JobSummaryByPrefix is used to look up Job Summary by id prefix 1243 func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1244 txn := s.db.Txn(false) 1245 1246 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1247 if namespace == "" { 1248 namespace = structs.DefaultNamespace 1249 } 1250 1251 iter, err := txn.Get("job_summary", "id_prefix", namespace, id) 1252 if err != nil { 1253 return nil, fmt.Errorf("eval lookup failed: %v", err) 1254 } 1255 1256 ws.Add(iter.WatchCh()) 1257 1258 return iter, nil 1259 } 1260 1261 // UpsertPeriodicLaunch is used to register a launch or update it. 1262 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 1263 txn := s.db.Txn(true) 1264 defer txn.Abort() 1265 1266 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1267 if launch.Namespace == "" { 1268 launch.Namespace = structs.DefaultNamespace 1269 } 1270 1271 // Check if the job already exists 1272 existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID) 1273 if err != nil { 1274 return fmt.Errorf("periodic launch lookup failed: %v", err) 1275 } 1276 1277 // Setup the indexes correctly 1278 if existing != nil { 1279 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 1280 launch.ModifyIndex = index 1281 } else { 1282 launch.CreateIndex = index 1283 launch.ModifyIndex = index 1284 } 1285 1286 // Insert the job 1287 if err := txn.Insert("periodic_launch", launch); err != nil { 1288 return fmt.Errorf("launch insert failed: %v", err) 1289 } 1290 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 1291 return fmt.Errorf("index update failed: %v", err) 1292 } 1293 1294 txn.Commit() 1295 return nil 1296 } 1297 1298 // DeletePeriodicLaunch is used to delete the periodic launch 1299 func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error { 1300 txn := s.db.Txn(true) 1301 defer txn.Abort() 1302 1303 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1304 if namespace == "" { 1305 namespace = structs.DefaultNamespace 1306 } 1307 1308 // Lookup the launch 1309 existing, err := txn.First("periodic_launch", "id", namespace, jobID) 1310 if err != nil { 1311 return fmt.Errorf("launch lookup failed: %v", err) 1312 } 1313 if existing == nil { 1314 return fmt.Errorf("launch not found") 1315 } 1316 1317 // Delete the launch 1318 if err := txn.Delete("periodic_launch", existing); err != nil { 1319 return fmt.Errorf("launch delete failed: %v", err) 1320 } 1321 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 1322 return fmt.Errorf("index update failed: %v", err) 1323 } 1324 1325 txn.Commit() 1326 return nil 1327 } 1328 1329 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 1330 // ID. 1331 func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) { 1332 txn := s.db.Txn(false) 1333 1334 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1335 if namespace == "" { 1336 namespace = structs.DefaultNamespace 1337 } 1338 1339 watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id) 1340 if err != nil { 1341 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 1342 } 1343 1344 ws.Add(watchCh) 1345 1346 if existing != nil { 1347 return existing.(*structs.PeriodicLaunch), nil 1348 } 1349 return nil, nil 1350 } 1351 1352 // PeriodicLaunches returns an iterator over all the periodic launches 1353 func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1354 txn := s.db.Txn(false) 1355 1356 // Walk the entire table 1357 iter, err := txn.Get("periodic_launch", "id") 1358 if err != nil { 1359 return nil, err 1360 } 1361 1362 ws.Add(iter.WatchCh()) 1363 1364 return iter, nil 1365 } 1366 1367 // UpsertEvals is used to upsert a set of evaluations 1368 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { 1369 txn := s.db.Txn(true) 1370 defer txn.Abort() 1371 1372 // Do a nested upsert 1373 jobs := make(map[structs.NamespacedID]string, len(evals)) 1374 for _, eval := range evals { 1375 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 1376 return err 1377 } 1378 1379 tuple := structs.NamespacedID{ 1380 ID: eval.JobID, 1381 Namespace: eval.Namespace, 1382 } 1383 jobs[tuple] = "" 1384 } 1385 1386 // Set the job's status 1387 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1388 return fmt.Errorf("setting job status failed: %v", err) 1389 } 1390 1391 txn.Commit() 1392 return nil 1393 } 1394 1395 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 1396 func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { 1397 // Lookup the evaluation 1398 existing, err := txn.First("evals", "id", eval.ID) 1399 if err != nil { 1400 return fmt.Errorf("eval lookup failed: %v", err) 1401 } 1402 1403 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1404 if eval.Namespace == "" { 1405 eval.Namespace = structs.DefaultNamespace 1406 } 1407 1408 // Update the indexes 1409 if existing != nil { 1410 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 1411 eval.ModifyIndex = index 1412 } else { 1413 eval.CreateIndex = index 1414 eval.ModifyIndex = index 1415 } 1416 1417 // Update the job summary 1418 summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID) 1419 if err != nil { 1420 return fmt.Errorf("job summary lookup failed: %v", err) 1421 } 1422 if summaryRaw != nil { 1423 js := summaryRaw.(*structs.JobSummary).Copy() 1424 hasSummaryChanged := false 1425 for tg, num := range eval.QueuedAllocations { 1426 if summary, ok := js.Summary[tg]; ok { 1427 if summary.Queued != num { 1428 summary.Queued = num 1429 js.Summary[tg] = summary 1430 hasSummaryChanged = true 1431 } 1432 } else { 1433 s.logger.Printf("[ERR] state_store: unable to update queued for job %q and task group %q", eval.JobID, tg) 1434 } 1435 } 1436 1437 // Insert the job summary 1438 if hasSummaryChanged { 1439 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1440 if js.Namespace == "" { 1441 js.Namespace = structs.DefaultNamespace 1442 } 1443 1444 js.ModifyIndex = index 1445 if err := txn.Insert("job_summary", js); err != nil { 1446 return fmt.Errorf("job summary insert failed: %v", err) 1447 } 1448 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1449 return fmt.Errorf("index update failed: %v", err) 1450 } 1451 } 1452 } 1453 1454 // Check if the job has any blocked evaluations and cancel them 1455 if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 { 1456 // Get the blocked evaluation for a job if it exists 1457 iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked) 1458 if err != nil { 1459 return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err) 1460 } 1461 1462 var blocked []*structs.Evaluation 1463 for { 1464 raw := iter.Next() 1465 if raw == nil { 1466 break 1467 } 1468 blocked = append(blocked, raw.(*structs.Evaluation)) 1469 } 1470 1471 // Go through and update the evals 1472 for _, eval := range blocked { 1473 newEval := eval.Copy() 1474 newEval.Status = structs.EvalStatusCancelled 1475 newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID) 1476 newEval.ModifyIndex = index 1477 1478 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1479 if newEval.Namespace == "" { 1480 newEval.Namespace = structs.DefaultNamespace 1481 } 1482 1483 if err := txn.Insert("evals", newEval); err != nil { 1484 return fmt.Errorf("eval insert failed: %v", err) 1485 } 1486 } 1487 } 1488 1489 // Insert the eval 1490 if err := txn.Insert("evals", eval); err != nil { 1491 return fmt.Errorf("eval insert failed: %v", err) 1492 } 1493 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 1494 return fmt.Errorf("index update failed: %v", err) 1495 } 1496 return nil 1497 } 1498 1499 // updateEvalModifyIndex is used to update the modify index of an evaluation that has been 1500 // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent 1501 // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply. 1502 func (s *StateStore) updateEvalModifyIndex(txn *memdb.Txn, index uint64, evalID string) error { 1503 // Lookup the evaluation 1504 existing, err := txn.First("evals", "id", evalID) 1505 if err != nil { 1506 return fmt.Errorf("eval lookup failed: %v", err) 1507 } 1508 if existing == nil { 1509 err := fmt.Errorf("unable to find eval id %q", evalID) 1510 s.logger.Printf("[ERR] state_store: %v", err) 1511 return err 1512 } 1513 eval := existing.(*structs.Evaluation).Copy() 1514 // Update the indexes 1515 eval.ModifyIndex = index 1516 1517 // Insert the eval 1518 if err := txn.Insert("evals", eval); err != nil { 1519 return fmt.Errorf("eval insert failed: %v", err) 1520 } 1521 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 1522 return fmt.Errorf("index update failed: %v", err) 1523 } 1524 return nil 1525 } 1526 1527 // DeleteEval is used to delete an evaluation 1528 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 1529 txn := s.db.Txn(true) 1530 defer txn.Abort() 1531 1532 jobs := make(map[structs.NamespacedID]string, len(evals)) 1533 for _, eval := range evals { 1534 existing, err := txn.First("evals", "id", eval) 1535 if err != nil { 1536 return fmt.Errorf("eval lookup failed: %v", err) 1537 } 1538 if existing == nil { 1539 continue 1540 } 1541 if err := txn.Delete("evals", existing); err != nil { 1542 return fmt.Errorf("eval delete failed: %v", err) 1543 } 1544 eval := existing.(*structs.Evaluation) 1545 1546 tuple := structs.NamespacedID{ 1547 ID: eval.JobID, 1548 Namespace: eval.Namespace, 1549 } 1550 jobs[tuple] = "" 1551 } 1552 1553 for _, alloc := range allocs { 1554 raw, err := txn.First("allocs", "id", alloc) 1555 if err != nil { 1556 return fmt.Errorf("alloc lookup failed: %v", err) 1557 } 1558 if raw == nil { 1559 continue 1560 } 1561 if err := txn.Delete("allocs", raw); err != nil { 1562 return fmt.Errorf("alloc delete failed: %v", err) 1563 } 1564 } 1565 1566 // Update the indexes 1567 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 1568 return fmt.Errorf("index update failed: %v", err) 1569 } 1570 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1571 return fmt.Errorf("index update failed: %v", err) 1572 } 1573 1574 // Set the job's status 1575 if err := s.setJobStatuses(index, txn, jobs, true); err != nil { 1576 return fmt.Errorf("setting job status failed: %v", err) 1577 } 1578 1579 txn.Commit() 1580 return nil 1581 } 1582 1583 // EvalByID is used to lookup an eval by its ID 1584 func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) { 1585 txn := s.db.Txn(false) 1586 1587 watchCh, existing, err := txn.FirstWatch("evals", "id", id) 1588 if err != nil { 1589 return nil, fmt.Errorf("eval lookup failed: %v", err) 1590 } 1591 1592 ws.Add(watchCh) 1593 1594 if existing != nil { 1595 return existing.(*structs.Evaluation), nil 1596 } 1597 return nil, nil 1598 } 1599 1600 // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular 1601 // namespace 1602 func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1603 txn := s.db.Txn(false) 1604 1605 // Get an iterator over all evals by the id prefix 1606 iter, err := txn.Get("evals", "id_prefix", id) 1607 if err != nil { 1608 return nil, fmt.Errorf("eval lookup failed: %v", err) 1609 } 1610 1611 ws.Add(iter.WatchCh()) 1612 1613 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1614 if namespace == "" { 1615 namespace = structs.DefaultNamespace 1616 } 1617 1618 // Wrap the iterator in a filter 1619 wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace)) 1620 return wrap, nil 1621 } 1622 1623 // evalNamespaceFilter returns a filter function that filters all evaluations 1624 // not in the given namespace. 1625 func evalNamespaceFilter(namespace string) func(interface{}) bool { 1626 return func(raw interface{}) bool { 1627 eval, ok := raw.(*structs.Evaluation) 1628 if !ok { 1629 return true 1630 } 1631 1632 return eval.Namespace != namespace 1633 } 1634 } 1635 1636 // EvalsByJob returns all the evaluations by job id 1637 func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) { 1638 txn := s.db.Txn(false) 1639 1640 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1641 if namespace == "" { 1642 namespace = structs.DefaultNamespace 1643 } 1644 1645 // Get an iterator over the node allocations 1646 iter, err := txn.Get("evals", "job_prefix", namespace, jobID) 1647 if err != nil { 1648 return nil, err 1649 } 1650 1651 ws.Add(iter.WatchCh()) 1652 1653 var out []*structs.Evaluation 1654 for { 1655 raw := iter.Next() 1656 if raw == nil { 1657 break 1658 } 1659 1660 e := raw.(*structs.Evaluation) 1661 1662 // Filter non-exact matches 1663 if e.JobID != jobID { 1664 continue 1665 } 1666 1667 out = append(out, e) 1668 } 1669 return out, nil 1670 } 1671 1672 // Evals returns an iterator over all the evaluations 1673 func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1674 txn := s.db.Txn(false) 1675 1676 // Walk the entire table 1677 iter, err := txn.Get("evals", "id") 1678 if err != nil { 1679 return nil, err 1680 } 1681 1682 ws.Add(iter.WatchCh()) 1683 1684 return iter, nil 1685 } 1686 1687 // EvalsByNamespace returns an iterator over all the evaluations in the given 1688 // namespace 1689 func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 1690 txn := s.db.Txn(false) 1691 1692 // Walk the entire table 1693 iter, err := txn.Get("evals", "namespace", namespace) 1694 if err != nil { 1695 return nil, err 1696 } 1697 1698 ws.Add(iter.WatchCh()) 1699 1700 return iter, nil 1701 } 1702 1703 // UpdateAllocsFromClient is used to update an allocation based on input 1704 // from a client. While the schedulers are the authority on the allocation for 1705 // most things, some updates are authoritative from the client. Specifically, 1706 // the desired state comes from the schedulers, while the actual state comes 1707 // from clients. 1708 func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error { 1709 txn := s.db.Txn(true) 1710 defer txn.Abort() 1711 1712 // Handle each of the updated allocations 1713 for _, alloc := range allocs { 1714 if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil { 1715 return err 1716 } 1717 } 1718 1719 // Update the indexes 1720 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1721 return fmt.Errorf("index update failed: %v", err) 1722 } 1723 1724 txn.Commit() 1725 return nil 1726 } 1727 1728 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 1729 func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, index uint64, alloc *structs.Allocation) error { 1730 // Look for existing alloc 1731 existing, err := txn.First("allocs", "id", alloc.ID) 1732 if err != nil { 1733 return fmt.Errorf("alloc lookup failed: %v", err) 1734 } 1735 1736 // Nothing to do if this does not exist 1737 if existing == nil { 1738 return nil 1739 } 1740 exist := existing.(*structs.Allocation) 1741 1742 // Copy everything from the existing allocation 1743 copyAlloc := exist.Copy() 1744 1745 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1746 if copyAlloc.Namespace == "" { 1747 copyAlloc.Namespace = structs.DefaultNamespace 1748 } 1749 1750 // Pull in anything the client is the authority on 1751 copyAlloc.ClientStatus = alloc.ClientStatus 1752 copyAlloc.ClientDescription = alloc.ClientDescription 1753 copyAlloc.TaskStates = alloc.TaskStates 1754 copyAlloc.DeploymentStatus = alloc.DeploymentStatus 1755 1756 // Update the modify index 1757 copyAlloc.ModifyIndex = index 1758 1759 // Update the modify time 1760 copyAlloc.ModifyTime = alloc.ModifyTime 1761 1762 if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil { 1763 return fmt.Errorf("error updating deployment: %v", err) 1764 } 1765 1766 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil { 1767 return fmt.Errorf("error updating job summary: %v", err) 1768 } 1769 1770 if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil { 1771 return err 1772 } 1773 1774 // Update the allocation 1775 if err := txn.Insert("allocs", copyAlloc); err != nil { 1776 return fmt.Errorf("alloc insert failed: %v", err) 1777 } 1778 1779 // Set the job's status 1780 forceStatus := "" 1781 if !copyAlloc.TerminalStatus() { 1782 forceStatus = structs.JobStatusRunning 1783 } 1784 1785 tuple := structs.NamespacedID{ 1786 ID: exist.JobID, 1787 Namespace: exist.Namespace, 1788 } 1789 jobs := map[structs.NamespacedID]string{tuple: forceStatus} 1790 1791 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1792 return fmt.Errorf("setting job status failed: %v", err) 1793 } 1794 return nil 1795 } 1796 1797 // UpsertAllocs is used to evict a set of allocations and allocate new ones at 1798 // the same time. 1799 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { 1800 txn := s.db.Txn(true) 1801 defer txn.Abort() 1802 if err := s.upsertAllocsImpl(index, allocs, txn); err != nil { 1803 return err 1804 } 1805 txn.Commit() 1806 return nil 1807 } 1808 1809 // upsertAllocs is the actual implementation of UpsertAllocs so that it may be 1810 // used with an existing transaction. 1811 func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *memdb.Txn) error { 1812 // Handle the allocations 1813 jobs := make(map[structs.NamespacedID]string, 1) 1814 for _, alloc := range allocs { 1815 existing, err := txn.First("allocs", "id", alloc.ID) 1816 if err != nil { 1817 return fmt.Errorf("alloc lookup failed: %v", err) 1818 } 1819 exist, _ := existing.(*structs.Allocation) 1820 1821 if exist == nil { 1822 alloc.CreateIndex = index 1823 alloc.ModifyIndex = index 1824 alloc.AllocModifyIndex = index 1825 1826 // Issue https://github.com/hashicorp/nomad/issues/2583 uncovered 1827 // the a race between a forced garbage collection and the scheduler 1828 // marking an allocation as terminal. The issue is that the 1829 // allocation from the scheduler has its job normalized and the FSM 1830 // will only denormalize if the allocation is not terminal. However 1831 // if the allocation is garbage collected, that will result in a 1832 // allocation being upserted for the first time without a job 1833 // attached. By returning an error here, it will cause the FSM to 1834 // error, causing the plan_apply to error and thus causing the 1835 // evaluation to be failed. This will force an index refresh that 1836 // should solve this issue. 1837 if alloc.Job == nil { 1838 return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID) 1839 } 1840 } else { 1841 alloc.CreateIndex = exist.CreateIndex 1842 alloc.ModifyIndex = index 1843 alloc.AllocModifyIndex = index 1844 1845 // Keep the clients task states 1846 alloc.TaskStates = exist.TaskStates 1847 1848 // If the scheduler is marking this allocation as lost we do not 1849 // want to reuse the status of the existing allocation. 1850 if alloc.ClientStatus != structs.AllocClientStatusLost { 1851 alloc.ClientStatus = exist.ClientStatus 1852 alloc.ClientDescription = exist.ClientDescription 1853 } 1854 1855 // The job has been denormalized so re-attach the original job 1856 if alloc.Job == nil { 1857 alloc.Job = exist.Job 1858 } 1859 } 1860 1861 // COMPAT 0.7: Upgrade old objects that do not have namespaces 1862 if alloc.Namespace == "" { 1863 alloc.Namespace = structs.DefaultNamespace 1864 } 1865 1866 // OPTIMIZATION: 1867 // These should be given a map of new to old allocation and the updates 1868 // should be one on all changes. The current implementation causes O(n) 1869 // lookups/copies/insertions rather than O(1) 1870 if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil { 1871 return fmt.Errorf("error updating deployment: %v", err) 1872 } 1873 1874 if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil { 1875 return fmt.Errorf("error updating job summary: %v", err) 1876 } 1877 1878 if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil { 1879 return err 1880 } 1881 1882 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 1883 // COMPAT 0.4.1 -> 0.5 1884 if alloc.Job != nil { 1885 s.addEphemeralDiskToTaskGroups(alloc.Job) 1886 } 1887 1888 if err := txn.Insert("allocs", alloc); err != nil { 1889 return fmt.Errorf("alloc insert failed: %v", err) 1890 } 1891 1892 if alloc.PreviousAllocation != "" { 1893 prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation) 1894 if err != nil { 1895 return fmt.Errorf("alloc lookup failed: %v", err) 1896 } 1897 existingPrevAlloc, _ := prevAlloc.(*structs.Allocation) 1898 if existingPrevAlloc != nil { 1899 prevAllocCopy := existingPrevAlloc.Copy() 1900 prevAllocCopy.NextAllocation = alloc.ID 1901 if err := txn.Insert("allocs", prevAllocCopy); err != nil { 1902 return fmt.Errorf("alloc insert failed: %v", err) 1903 } 1904 } 1905 } 1906 1907 // If the allocation is running, force the job to running status. 1908 forceStatus := "" 1909 if !alloc.TerminalStatus() { 1910 forceStatus = structs.JobStatusRunning 1911 } 1912 1913 tuple := structs.NamespacedID{ 1914 ID: alloc.JobID, 1915 Namespace: alloc.Namespace, 1916 } 1917 jobs[tuple] = forceStatus 1918 } 1919 1920 // Update the indexes 1921 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 1922 return fmt.Errorf("index update failed: %v", err) 1923 } 1924 1925 // Set the job's status 1926 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 1927 return fmt.Errorf("setting job status failed: %v", err) 1928 } 1929 1930 return nil 1931 } 1932 1933 // AllocByID is used to lookup an allocation by its ID 1934 func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) { 1935 txn := s.db.Txn(false) 1936 1937 watchCh, existing, err := txn.FirstWatch("allocs", "id", id) 1938 if err != nil { 1939 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1940 } 1941 1942 ws.Add(watchCh) 1943 1944 if existing != nil { 1945 return existing.(*structs.Allocation), nil 1946 } 1947 return nil, nil 1948 } 1949 1950 // AllocsByIDPrefix is used to lookup allocs by prefix 1951 func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1952 txn := s.db.Txn(false) 1953 1954 iter, err := txn.Get("allocs", "id_prefix", id) 1955 if err != nil { 1956 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1957 } 1958 1959 ws.Add(iter.WatchCh()) 1960 1961 // Wrap the iterator in a filter 1962 wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace)) 1963 return wrap, nil 1964 } 1965 1966 // allocNamespaceFilter returns a filter function that filters all allocations 1967 // not in the given namespace. 1968 func allocNamespaceFilter(namespace string) func(interface{}) bool { 1969 return func(raw interface{}) bool { 1970 alloc, ok := raw.(*structs.Allocation) 1971 if !ok { 1972 return true 1973 } 1974 1975 return alloc.Namespace != namespace 1976 } 1977 } 1978 1979 // AllocsByNode returns all the allocations by node 1980 func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 1981 txn := s.db.Txn(false) 1982 1983 // Get an iterator over the node allocations, using only the 1984 // node prefix which ignores the terminal status 1985 iter, err := txn.Get("allocs", "node_prefix", node) 1986 if err != nil { 1987 return nil, err 1988 } 1989 1990 ws.Add(iter.WatchCh()) 1991 1992 var out []*structs.Allocation 1993 for { 1994 raw := iter.Next() 1995 if raw == nil { 1996 break 1997 } 1998 out = append(out, raw.(*structs.Allocation)) 1999 } 2000 return out, nil 2001 } 2002 2003 // AllocsByNode returns all the allocations by node and terminal status 2004 func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) { 2005 txn := s.db.Txn(false) 2006 2007 // Get an iterator over the node allocations 2008 iter, err := txn.Get("allocs", "node", node, terminal) 2009 if err != nil { 2010 return nil, err 2011 } 2012 2013 ws.Add(iter.WatchCh()) 2014 2015 var out []*structs.Allocation 2016 for { 2017 raw := iter.Next() 2018 if raw == nil { 2019 break 2020 } 2021 out = append(out, raw.(*structs.Allocation)) 2022 } 2023 return out, nil 2024 } 2025 2026 // AllocsByJob returns all the allocations by job id 2027 func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Allocation, error) { 2028 txn := s.db.Txn(false) 2029 2030 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2031 if namespace == "" { 2032 namespace = structs.DefaultNamespace 2033 } 2034 2035 // Get the job 2036 var job *structs.Job 2037 rawJob, err := txn.First("jobs", "id", namespace, jobID) 2038 if err != nil { 2039 return nil, err 2040 } 2041 if rawJob != nil { 2042 job = rawJob.(*structs.Job) 2043 } 2044 2045 // Get an iterator over the node allocations 2046 iter, err := txn.Get("allocs", "job", namespace, jobID) 2047 if err != nil { 2048 return nil, err 2049 } 2050 2051 ws.Add(iter.WatchCh()) 2052 2053 var out []*structs.Allocation 2054 for { 2055 raw := iter.Next() 2056 if raw == nil { 2057 break 2058 } 2059 2060 alloc := raw.(*structs.Allocation) 2061 // If the allocation belongs to a job with the same ID but a different 2062 // create index and we are not getting all the allocations whose Jobs 2063 // matches the same Job ID then we skip it 2064 if !all && job != nil && alloc.Job.CreateIndex != job.CreateIndex { 2065 continue 2066 } 2067 out = append(out, raw.(*structs.Allocation)) 2068 } 2069 return out, nil 2070 } 2071 2072 // AllocsByEval returns all the allocations by eval id 2073 func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) { 2074 txn := s.db.Txn(false) 2075 2076 // Get an iterator over the eval allocations 2077 iter, err := txn.Get("allocs", "eval", evalID) 2078 if err != nil { 2079 return nil, err 2080 } 2081 2082 ws.Add(iter.WatchCh()) 2083 2084 var out []*structs.Allocation 2085 for { 2086 raw := iter.Next() 2087 if raw == nil { 2088 break 2089 } 2090 out = append(out, raw.(*structs.Allocation)) 2091 } 2092 return out, nil 2093 } 2094 2095 // AllocsByDeployment returns all the allocations by deployment id 2096 func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) { 2097 txn := s.db.Txn(false) 2098 2099 // Get an iterator over the deployments allocations 2100 iter, err := txn.Get("allocs", "deployment", deploymentID) 2101 if err != nil { 2102 return nil, err 2103 } 2104 2105 ws.Add(iter.WatchCh()) 2106 2107 var out []*structs.Allocation 2108 for { 2109 raw := iter.Next() 2110 if raw == nil { 2111 break 2112 } 2113 out = append(out, raw.(*structs.Allocation)) 2114 } 2115 return out, nil 2116 } 2117 2118 // Allocs returns an iterator over all the evaluations 2119 func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2120 txn := s.db.Txn(false) 2121 2122 // Walk the entire table 2123 iter, err := txn.Get("allocs", "id") 2124 if err != nil { 2125 return nil, err 2126 } 2127 2128 ws.Add(iter.WatchCh()) 2129 2130 return iter, nil 2131 } 2132 2133 // AllocsByNamespace returns an iterator over all the allocations in the 2134 // namespace 2135 func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 2136 txn := s.db.Txn(false) 2137 return s.allocsByNamespaceImpl(ws, txn, namespace) 2138 } 2139 2140 // allocsByNamespaceImpl returns an iterator over all the allocations in the 2141 // namespace 2142 func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *memdb.Txn, namespace string) (memdb.ResultIterator, error) { 2143 // Walk the entire table 2144 iter, err := txn.Get("allocs", "namespace", namespace) 2145 if err != nil { 2146 return nil, err 2147 } 2148 2149 ws.Add(iter.WatchCh()) 2150 2151 return iter, nil 2152 } 2153 2154 // UpsertVaultAccessors is used to register a set of Vault Accessors 2155 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 2156 txn := s.db.Txn(true) 2157 defer txn.Abort() 2158 2159 for _, accessor := range accessors { 2160 // Set the create index 2161 accessor.CreateIndex = index 2162 2163 // Insert the accessor 2164 if err := txn.Insert("vault_accessors", accessor); err != nil { 2165 return fmt.Errorf("accessor insert failed: %v", err) 2166 } 2167 } 2168 2169 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 2170 return fmt.Errorf("index update failed: %v", err) 2171 } 2172 2173 txn.Commit() 2174 return nil 2175 } 2176 2177 // DeleteVaultAccessors is used to delete a set of Vault Accessors 2178 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 2179 txn := s.db.Txn(true) 2180 defer txn.Abort() 2181 2182 // Lookup the accessor 2183 for _, accessor := range accessors { 2184 // Delete the accessor 2185 if err := txn.Delete("vault_accessors", accessor); err != nil { 2186 return fmt.Errorf("accessor delete failed: %v", err) 2187 } 2188 } 2189 2190 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 2191 return fmt.Errorf("index update failed: %v", err) 2192 } 2193 2194 txn.Commit() 2195 return nil 2196 } 2197 2198 // VaultAccessor returns the given Vault accessor 2199 func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) { 2200 txn := s.db.Txn(false) 2201 2202 watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor) 2203 if err != nil { 2204 return nil, fmt.Errorf("accessor lookup failed: %v", err) 2205 } 2206 2207 ws.Add(watchCh) 2208 2209 if existing != nil { 2210 return existing.(*structs.VaultAccessor), nil 2211 } 2212 2213 return nil, nil 2214 } 2215 2216 // VaultAccessors returns an iterator of Vault accessors. 2217 func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2218 txn := s.db.Txn(false) 2219 2220 iter, err := txn.Get("vault_accessors", "id") 2221 if err != nil { 2222 return nil, err 2223 } 2224 2225 ws.Add(iter.WatchCh()) 2226 2227 return iter, nil 2228 } 2229 2230 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 2231 func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) { 2232 txn := s.db.Txn(false) 2233 2234 // Get an iterator over the accessors 2235 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 2236 if err != nil { 2237 return nil, err 2238 } 2239 2240 ws.Add(iter.WatchCh()) 2241 2242 var out []*structs.VaultAccessor 2243 for { 2244 raw := iter.Next() 2245 if raw == nil { 2246 break 2247 } 2248 out = append(out, raw.(*structs.VaultAccessor)) 2249 } 2250 return out, nil 2251 } 2252 2253 // VaultAccessorsByNode returns all the Vault accessors by node id 2254 func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) { 2255 txn := s.db.Txn(false) 2256 2257 // Get an iterator over the accessors 2258 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 2259 if err != nil { 2260 return nil, err 2261 } 2262 2263 ws.Add(iter.WatchCh()) 2264 2265 var out []*structs.VaultAccessor 2266 for { 2267 raw := iter.Next() 2268 if raw == nil { 2269 break 2270 } 2271 out = append(out, raw.(*structs.VaultAccessor)) 2272 } 2273 return out, nil 2274 } 2275 2276 // UpdateDeploymentStatus is used to make deployment status updates and 2277 // potentially make a evaluation 2278 func (s *StateStore) UpdateDeploymentStatus(index uint64, req *structs.DeploymentStatusUpdateRequest) error { 2279 txn := s.db.Txn(true) 2280 defer txn.Abort() 2281 2282 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 2283 return err 2284 } 2285 2286 // Upsert the job if necessary 2287 if req.Job != nil { 2288 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 2289 return err 2290 } 2291 } 2292 2293 // Upsert the optional eval 2294 if req.Eval != nil { 2295 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 2296 return err 2297 } 2298 } 2299 2300 txn.Commit() 2301 return nil 2302 } 2303 2304 // updateDeploymentStatusImpl is used to make deployment status updates 2305 func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *memdb.Txn) error { 2306 // Retrieve deployment 2307 ws := memdb.NewWatchSet() 2308 deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn) 2309 if err != nil { 2310 return err 2311 } else if deployment == nil { 2312 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID) 2313 } else if !deployment.Active() { 2314 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 2315 } 2316 2317 // Apply the new status 2318 copy := deployment.Copy() 2319 copy.Status = u.Status 2320 copy.StatusDescription = u.StatusDescription 2321 copy.ModifyIndex = index 2322 2323 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2324 if copy.Namespace == "" { 2325 copy.Namespace = structs.DefaultNamespace 2326 } 2327 2328 // Insert the deployment 2329 if err := txn.Insert("deployment", copy); err != nil { 2330 return err 2331 } 2332 2333 // Update the index 2334 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 2335 return fmt.Errorf("index update failed: %v", err) 2336 } 2337 2338 // If the deployment is being marked as complete, set the job to stable. 2339 if copy.Status == structs.DeploymentStatusSuccessful { 2340 if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil { 2341 return fmt.Errorf("failed to update job stability: %v", err) 2342 } 2343 } 2344 2345 return nil 2346 } 2347 2348 // UpdateJobStability updates the stability of the given job and version to the 2349 // desired status. 2350 func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error { 2351 txn := s.db.Txn(true) 2352 defer txn.Abort() 2353 2354 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2355 if namespace == "" { 2356 namespace = structs.DefaultNamespace 2357 } 2358 2359 if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil { 2360 return err 2361 } 2362 2363 txn.Commit() 2364 return nil 2365 } 2366 2367 // updateJobStabilityImpl updates the stability of the given job and version 2368 func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *memdb.Txn) error { 2369 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2370 if namespace == "" { 2371 namespace = structs.DefaultNamespace 2372 } 2373 2374 // Get the job that is referenced 2375 job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn) 2376 if err != nil { 2377 return err 2378 } 2379 2380 // Has already been cleared, nothing to do 2381 if job == nil { 2382 return nil 2383 } 2384 2385 // If the job already has the desired stability, nothing to do 2386 if job.Stable == stable { 2387 return nil 2388 } 2389 2390 copy := job.Copy() 2391 copy.Stable = stable 2392 return s.upsertJobImpl(index, copy, true, txn) 2393 } 2394 2395 // UpdateDeploymentPromotion is used to promote canaries in a deployment and 2396 // potentially make a evaluation 2397 func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyDeploymentPromoteRequest) error { 2398 txn := s.db.Txn(true) 2399 defer txn.Abort() 2400 2401 // Retrieve deployment and ensure it is not terminal and is active 2402 ws := memdb.NewWatchSet() 2403 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 2404 if err != nil { 2405 return err 2406 } else if deployment == nil { 2407 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 2408 } else if !deployment.Active() { 2409 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 2410 } 2411 2412 // Retrieve effected allocations 2413 iter, err := txn.Get("allocs", "deployment", req.DeploymentID) 2414 if err != nil { 2415 return err 2416 } 2417 2418 groupIndex := make(map[string]struct{}, len(req.Groups)) 2419 for _, g := range req.Groups { 2420 groupIndex[g] = struct{}{} 2421 } 2422 2423 canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups)) 2424 for _, state := range deployment.TaskGroups { 2425 for _, c := range state.PlacedCanaries { 2426 canaryIndex[c] = struct{}{} 2427 } 2428 } 2429 2430 haveCanaries := false 2431 var unhealthyErr multierror.Error 2432 for { 2433 raw := iter.Next() 2434 if raw == nil { 2435 break 2436 } 2437 2438 alloc := raw.(*structs.Allocation) 2439 2440 // Check that the alloc is a canary 2441 if _, ok := canaryIndex[alloc.ID]; !ok { 2442 continue 2443 } 2444 2445 // Check that the canary is part of a group being promoted 2446 if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok { 2447 continue 2448 } 2449 2450 // Ensure the canaries are healthy 2451 if !alloc.DeploymentStatus.IsHealthy() { 2452 multierror.Append(&unhealthyErr, fmt.Errorf("Canary allocation %q for group %q is not healthy", alloc.ID, alloc.TaskGroup)) 2453 continue 2454 } 2455 2456 haveCanaries = true 2457 } 2458 2459 if err := unhealthyErr.ErrorOrNil(); err != nil { 2460 return err 2461 } 2462 2463 if !haveCanaries { 2464 return fmt.Errorf("no canaries to promote") 2465 } 2466 2467 // Update deployment 2468 copy := deployment.Copy() 2469 copy.ModifyIndex = index 2470 for tg, status := range copy.TaskGroups { 2471 _, ok := groupIndex[tg] 2472 if !req.All && !ok { 2473 continue 2474 } 2475 2476 status.Promoted = true 2477 } 2478 2479 // If the deployment no longer needs promotion, update its status 2480 if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning { 2481 copy.StatusDescription = structs.DeploymentStatusDescriptionRunning 2482 } 2483 2484 // Insert the deployment 2485 if err := s.upsertDeploymentImpl(index, copy, txn); err != nil { 2486 return err 2487 } 2488 2489 // Upsert the optional eval 2490 if req.Eval != nil { 2491 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 2492 return err 2493 } 2494 } 2495 2496 txn.Commit() 2497 return nil 2498 } 2499 2500 // UpdateDeploymentAllocHealth is used to update the health of allocations as 2501 // part of the deployment and potentially make a evaluation 2502 func (s *StateStore) UpdateDeploymentAllocHealth(index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error { 2503 txn := s.db.Txn(true) 2504 defer txn.Abort() 2505 2506 // Retrieve deployment and ensure it is not terminal and is active 2507 ws := memdb.NewWatchSet() 2508 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 2509 if err != nil { 2510 return err 2511 } else if deployment == nil { 2512 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 2513 } else if !deployment.Active() { 2514 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 2515 } 2516 2517 // Update the health status of each allocation 2518 if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 { 2519 setAllocHealth := func(id string, healthy bool) error { 2520 existing, err := txn.First("allocs", "id", id) 2521 if err != nil { 2522 return fmt.Errorf("alloc %q lookup failed: %v", id, err) 2523 } 2524 if existing == nil { 2525 return fmt.Errorf("unknown alloc %q", id) 2526 } 2527 2528 old := existing.(*structs.Allocation) 2529 if old.DeploymentID != req.DeploymentID { 2530 return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID) 2531 } 2532 2533 // Set the health 2534 copy := old.Copy() 2535 if copy.DeploymentStatus == nil { 2536 copy.DeploymentStatus = &structs.AllocDeploymentStatus{} 2537 } 2538 copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy) 2539 copy.DeploymentStatus.ModifyIndex = index 2540 2541 if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil { 2542 return fmt.Errorf("error updating deployment: %v", err) 2543 } 2544 2545 if err := txn.Insert("allocs", copy); err != nil { 2546 return fmt.Errorf("alloc insert failed: %v", err) 2547 } 2548 2549 return nil 2550 } 2551 2552 for _, id := range req.HealthyAllocationIDs { 2553 if err := setAllocHealth(id, true); err != nil { 2554 return err 2555 } 2556 } 2557 for _, id := range req.UnhealthyAllocationIDs { 2558 if err := setAllocHealth(id, false); err != nil { 2559 return err 2560 } 2561 } 2562 2563 // Update the indexes 2564 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 2565 return fmt.Errorf("index update failed: %v", err) 2566 } 2567 } 2568 2569 // Update the deployment status as needed. 2570 if req.DeploymentUpdate != nil { 2571 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 2572 return err 2573 } 2574 } 2575 2576 // Upsert the job if necessary 2577 if req.Job != nil { 2578 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 2579 return err 2580 } 2581 } 2582 2583 // Upsert the optional eval 2584 if req.Eval != nil { 2585 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 2586 return err 2587 } 2588 } 2589 2590 txn.Commit() 2591 return nil 2592 } 2593 2594 // LastIndex returns the greatest index value for all indexes 2595 func (s *StateStore) LatestIndex() (uint64, error) { 2596 indexes, err := s.Indexes() 2597 if err != nil { 2598 return 0, err 2599 } 2600 2601 var max uint64 = 0 2602 for { 2603 raw := indexes.Next() 2604 if raw == nil { 2605 break 2606 } 2607 2608 // Prepare the request struct 2609 idx := raw.(*IndexEntry) 2610 2611 // Determine the max 2612 if idx.Value > max { 2613 max = idx.Value 2614 } 2615 } 2616 2617 return max, nil 2618 } 2619 2620 // Index finds the matching index value 2621 func (s *StateStore) Index(name string) (uint64, error) { 2622 txn := s.db.Txn(false) 2623 2624 // Lookup the first matching index 2625 out, err := txn.First("index", "id", name) 2626 if err != nil { 2627 return 0, err 2628 } 2629 if out == nil { 2630 return 0, nil 2631 } 2632 return out.(*IndexEntry).Value, nil 2633 } 2634 2635 // RemoveIndex is a helper method to remove an index for testing purposes 2636 func (s *StateStore) RemoveIndex(name string) error { 2637 txn := s.db.Txn(true) 2638 defer txn.Abort() 2639 2640 if _, err := txn.DeleteAll("index", "id", name); err != nil { 2641 return err 2642 } 2643 2644 txn.Commit() 2645 return nil 2646 } 2647 2648 // Indexes returns an iterator over all the indexes 2649 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 2650 txn := s.db.Txn(false) 2651 2652 // Walk the entire nodes table 2653 iter, err := txn.Get("index", "id") 2654 if err != nil { 2655 return nil, err 2656 } 2657 return iter, nil 2658 } 2659 2660 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 2661 // store 2662 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 2663 txn := s.db.Txn(true) 2664 defer txn.Abort() 2665 2666 // Get all the jobs 2667 iter, err := txn.Get("jobs", "id") 2668 if err != nil { 2669 return err 2670 } 2671 for { 2672 rawJob := iter.Next() 2673 if rawJob == nil { 2674 break 2675 } 2676 job := rawJob.(*structs.Job) 2677 2678 // Create a job summary for the job 2679 summary := &structs.JobSummary{ 2680 JobID: job.ID, 2681 Namespace: job.Namespace, 2682 Summary: make(map[string]structs.TaskGroupSummary), 2683 } 2684 for _, tg := range job.TaskGroups { 2685 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 2686 } 2687 2688 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2689 if job.Namespace == "" { 2690 job.Namespace = structs.DefaultNamespace 2691 } 2692 2693 // Find all the allocations for the jobs 2694 iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 2695 if err != nil { 2696 return err 2697 } 2698 2699 // Calculate the summary for the job 2700 for { 2701 rawAlloc := iterAllocs.Next() 2702 if rawAlloc == nil { 2703 break 2704 } 2705 alloc := rawAlloc.(*structs.Allocation) 2706 2707 // Ignore the allocation if it doesn't belong to the currently 2708 // registered job. The allocation is checked because of issue #2304 2709 if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex { 2710 continue 2711 } 2712 2713 tg := summary.Summary[alloc.TaskGroup] 2714 switch alloc.ClientStatus { 2715 case structs.AllocClientStatusFailed: 2716 tg.Failed += 1 2717 case structs.AllocClientStatusLost: 2718 tg.Lost += 1 2719 case structs.AllocClientStatusComplete: 2720 tg.Complete += 1 2721 case structs.AllocClientStatusRunning: 2722 tg.Running += 1 2723 case structs.AllocClientStatusPending: 2724 tg.Starting += 1 2725 default: 2726 s.logger.Printf("[ERR] state_store: invalid client status: %v in allocation %q", alloc.ClientStatus, alloc.ID) 2727 } 2728 summary.Summary[alloc.TaskGroup] = tg 2729 } 2730 2731 // Set the create index of the summary same as the job's create index 2732 // and the modify index to the current index 2733 summary.CreateIndex = job.CreateIndex 2734 summary.ModifyIndex = index 2735 2736 // Insert the job summary 2737 if err := txn.Insert("job_summary", summary); err != nil { 2738 return fmt.Errorf("error inserting job summary: %v", err) 2739 } 2740 } 2741 2742 // Update the indexes table for job summary 2743 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2744 return fmt.Errorf("index update failed: %v", err) 2745 } 2746 txn.Commit() 2747 return nil 2748 } 2749 2750 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 2751 // It takes a map of job IDs to an optional forceStatus string. It returns an 2752 // error if the job doesn't exist or setJobStatus fails. 2753 func (s *StateStore) setJobStatuses(index uint64, txn *memdb.Txn, 2754 jobs map[structs.NamespacedID]string, evalDelete bool) error { 2755 for tuple, forceStatus := range jobs { 2756 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2757 if tuple.Namespace == "" { 2758 tuple.Namespace = structs.DefaultNamespace 2759 } 2760 2761 existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID) 2762 if err != nil { 2763 return fmt.Errorf("job lookup failed: %v", err) 2764 } 2765 2766 if existing == nil { 2767 continue 2768 } 2769 2770 if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 2771 return err 2772 } 2773 } 2774 2775 return nil 2776 } 2777 2778 // setJobStatus sets the status of the job by looking up associated evaluations 2779 // and allocations. evalDelete should be set to true if setJobStatus is being 2780 // called because an evaluation is being deleted (potentially because of garbage 2781 // collection). If forceStatus is non-empty, the job's status will be set to the 2782 // passed status. 2783 func (s *StateStore) setJobStatus(index uint64, txn *memdb.Txn, 2784 job *structs.Job, evalDelete bool, forceStatus string) error { 2785 2786 // Capture the current status so we can check if there is a change 2787 oldStatus := job.Status 2788 if index == job.CreateIndex { 2789 oldStatus = "" 2790 } 2791 newStatus := forceStatus 2792 2793 // If forceStatus is not set, compute the jobs status. 2794 if forceStatus == "" { 2795 var err error 2796 newStatus, err = s.getJobStatus(txn, job, evalDelete) 2797 if err != nil { 2798 return err 2799 } 2800 } 2801 2802 // Fast-path if nothing has changed. 2803 if oldStatus == newStatus { 2804 return nil 2805 } 2806 2807 // Copy and update the existing job 2808 updated := job.Copy() 2809 updated.Status = newStatus 2810 updated.ModifyIndex = index 2811 2812 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2813 if updated.Namespace == "" { 2814 updated.Namespace = structs.DefaultNamespace 2815 } 2816 2817 // Insert the job 2818 if err := txn.Insert("jobs", updated); err != nil { 2819 return fmt.Errorf("job insert failed: %v", err) 2820 } 2821 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 2822 return fmt.Errorf("index update failed: %v", err) 2823 } 2824 2825 // Update the children summary 2826 if updated.ParentID != "" { 2827 // Try to update the summary of the parent job summary 2828 summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID) 2829 if err != nil { 2830 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 2831 } 2832 2833 // Only continue if the summary exists. It could not exist if the parent 2834 // job was removed 2835 if summaryRaw != nil { 2836 existing := summaryRaw.(*structs.JobSummary) 2837 pSummary := existing.Copy() 2838 if pSummary.Children == nil { 2839 pSummary.Children = new(structs.JobChildrenSummary) 2840 } 2841 2842 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2843 if pSummary.Namespace == "" { 2844 pSummary.Namespace = structs.DefaultNamespace 2845 } 2846 2847 // Determine the transition and update the correct fields 2848 children := pSummary.Children 2849 2850 // Decrement old status 2851 if oldStatus != "" { 2852 switch oldStatus { 2853 case structs.JobStatusPending: 2854 children.Pending-- 2855 case structs.JobStatusRunning: 2856 children.Running-- 2857 case structs.JobStatusDead: 2858 children.Dead-- 2859 default: 2860 return fmt.Errorf("unknown old job status %q", oldStatus) 2861 } 2862 } 2863 2864 // Increment new status 2865 switch newStatus { 2866 case structs.JobStatusPending: 2867 children.Pending++ 2868 case structs.JobStatusRunning: 2869 children.Running++ 2870 case structs.JobStatusDead: 2871 children.Dead++ 2872 default: 2873 return fmt.Errorf("unknown new job status %q", newStatus) 2874 } 2875 2876 // Update the index 2877 pSummary.ModifyIndex = index 2878 2879 // Insert the summary 2880 if err := txn.Insert("job_summary", pSummary); err != nil { 2881 return fmt.Errorf("job summary insert failed: %v", err) 2882 } 2883 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2884 return fmt.Errorf("index update failed: %v", err) 2885 } 2886 } 2887 } 2888 2889 return nil 2890 } 2891 2892 func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) { 2893 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2894 if job.Namespace == "" { 2895 job.Namespace = structs.DefaultNamespace 2896 } 2897 2898 // System, Periodic and Parameterized jobs are running until explicitly 2899 // stopped 2900 if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() { 2901 if job.Stop { 2902 return structs.JobStatusDead, nil 2903 } 2904 2905 return structs.JobStatusRunning, nil 2906 } 2907 2908 allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 2909 if err != nil { 2910 return "", err 2911 } 2912 2913 // If there is a non-terminal allocation, the job is running. 2914 hasAlloc := false 2915 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 2916 hasAlloc = true 2917 if !alloc.(*structs.Allocation).TerminalStatus() { 2918 return structs.JobStatusRunning, nil 2919 } 2920 } 2921 2922 evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID) 2923 if err != nil { 2924 return "", err 2925 } 2926 2927 hasEval := false 2928 for raw := evals.Next(); raw != nil; raw = evals.Next() { 2929 e := raw.(*structs.Evaluation) 2930 2931 // Filter non-exact matches 2932 if e.JobID != job.ID { 2933 continue 2934 } 2935 2936 hasEval = true 2937 if !e.TerminalStatus() { 2938 return structs.JobStatusPending, nil 2939 } 2940 } 2941 2942 // The job is dead if all the allocations and evals are terminal or if there 2943 // are no evals because of garbage collection. 2944 if evalDelete || hasEval || hasAlloc { 2945 return structs.JobStatusDead, nil 2946 } 2947 2948 return structs.JobStatusPending, nil 2949 } 2950 2951 // updateSummaryWithJob creates or updates job summaries when new jobs are 2952 // upserted or existing ones are updated 2953 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 2954 txn *memdb.Txn) error { 2955 2956 // COMPAT 0.7: Upgrade old objects that do not have namespaces 2957 if job.Namespace == "" { 2958 job.Namespace = structs.DefaultNamespace 2959 } 2960 2961 // Update the job summary 2962 summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID) 2963 if err != nil { 2964 return fmt.Errorf("job summary lookup failed: %v", err) 2965 } 2966 2967 // Get the summary or create if necessary 2968 var summary *structs.JobSummary 2969 hasSummaryChanged := false 2970 if summaryRaw != nil { 2971 summary = summaryRaw.(*structs.JobSummary).Copy() 2972 } else { 2973 summary = &structs.JobSummary{ 2974 JobID: job.ID, 2975 Namespace: job.Namespace, 2976 Summary: make(map[string]structs.TaskGroupSummary), 2977 Children: new(structs.JobChildrenSummary), 2978 CreateIndex: index, 2979 } 2980 hasSummaryChanged = true 2981 } 2982 2983 for _, tg := range job.TaskGroups { 2984 if _, ok := summary.Summary[tg.Name]; !ok { 2985 newSummary := structs.TaskGroupSummary{ 2986 Complete: 0, 2987 Failed: 0, 2988 Running: 0, 2989 Starting: 0, 2990 } 2991 summary.Summary[tg.Name] = newSummary 2992 hasSummaryChanged = true 2993 } 2994 } 2995 2996 // The job summary has changed, so update the modify index. 2997 if hasSummaryChanged { 2998 summary.ModifyIndex = index 2999 3000 // COMPAT 0.7: Upgrade old objects that do not have namespaces 3001 if summary.Namespace == "" { 3002 summary.Namespace = structs.DefaultNamespace 3003 } 3004 3005 // Update the indexes table for job summary 3006 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 3007 return fmt.Errorf("index update failed: %v", err) 3008 } 3009 if err := txn.Insert("job_summary", summary); err != nil { 3010 return err 3011 } 3012 } 3013 3014 return nil 3015 } 3016 3017 // updateDeploymentWithAlloc is used to update the deployment state associated 3018 // with the given allocation. The passed alloc may be updated if the deployment 3019 // status has changed to capture the modify index at which it has changed. 3020 func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *memdb.Txn) error { 3021 // Nothing to do if the allocation is not associated with a deployment 3022 if alloc.DeploymentID == "" { 3023 return nil 3024 } 3025 3026 // Get the deployment 3027 ws := memdb.NewWatchSet() 3028 deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn) 3029 if err != nil { 3030 return err 3031 } 3032 if deployment == nil { 3033 return nil 3034 } 3035 3036 // Retrieve the deployment state object 3037 _, ok := deployment.TaskGroups[alloc.TaskGroup] 3038 if !ok { 3039 // If the task group isn't part of the deployment, the task group wasn't 3040 // part of a rolling update so nothing to do 3041 return nil 3042 } 3043 3044 // Do not modify in-place. Instead keep track of what must be done 3045 placed := 0 3046 healthy := 0 3047 unhealthy := 0 3048 3049 // If there was no existing allocation, this is a placement and we increment 3050 // the placement 3051 existingHealthSet := existing != nil && existing.DeploymentStatus != nil && existing.DeploymentStatus.Healthy != nil 3052 allocHealthSet := alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil 3053 if existing == nil || existing.DeploymentID != alloc.DeploymentID { 3054 placed++ 3055 } else if !existingHealthSet && allocHealthSet { 3056 if *alloc.DeploymentStatus.Healthy { 3057 healthy++ 3058 } else { 3059 unhealthy++ 3060 } 3061 } else if existingHealthSet && allocHealthSet { 3062 // See if it has gone from healthy to unhealthy 3063 if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy { 3064 healthy-- 3065 unhealthy++ 3066 } 3067 } 3068 3069 // Nothing to do 3070 if placed == 0 && healthy == 0 && unhealthy == 0 { 3071 return nil 3072 } 3073 3074 // Update the allocation's deployment status modify index 3075 if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 { 3076 alloc.DeploymentStatus.ModifyIndex = index 3077 } 3078 3079 // Create a copy of the deployment object 3080 deploymentCopy := deployment.Copy() 3081 deploymentCopy.ModifyIndex = index 3082 3083 state := deploymentCopy.TaskGroups[alloc.TaskGroup] 3084 state.PlacedAllocs += placed 3085 state.HealthyAllocs += healthy 3086 state.UnhealthyAllocs += unhealthy 3087 3088 // Upsert the deployment 3089 if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil { 3090 return err 3091 } 3092 3093 return nil 3094 } 3095 3096 // updateSummaryWithAlloc updates the job summary when allocations are updated 3097 // or inserted 3098 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 3099 existingAlloc *structs.Allocation, txn *memdb.Txn) error { 3100 3101 // We don't have to update the summary if the job is missing 3102 if alloc.Job == nil { 3103 return nil 3104 } 3105 // COMPAT 0.7: Upgrade old objects that do not have namespaces 3106 if alloc.Namespace == "" { 3107 alloc.Namespace = structs.DefaultNamespace 3108 } 3109 3110 summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID) 3111 if err != nil { 3112 return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err) 3113 } 3114 3115 if summaryRaw == nil { 3116 // Check if the job is de-registered 3117 rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID) 3118 if err != nil { 3119 return fmt.Errorf("unable to query job: %v", err) 3120 } 3121 3122 // If the job is de-registered then we skip updating it's summary 3123 if rawJob == nil { 3124 return nil 3125 } 3126 3127 return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace) 3128 } 3129 3130 // Get a copy of the existing summary 3131 jobSummary := summaryRaw.(*structs.JobSummary).Copy() 3132 3133 // Not updating the job summary because the allocation doesn't belong to the 3134 // currently registered job 3135 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 3136 return nil 3137 } 3138 3139 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 3140 if !ok { 3141 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 3142 } 3143 3144 summaryChanged := false 3145 if existingAlloc == nil { 3146 switch alloc.DesiredStatus { 3147 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 3148 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 3149 alloc.ID, alloc.DesiredStatus) 3150 } 3151 switch alloc.ClientStatus { 3152 case structs.AllocClientStatusPending: 3153 tgSummary.Starting += 1 3154 if tgSummary.Queued > 0 { 3155 tgSummary.Queued -= 1 3156 } 3157 summaryChanged = true 3158 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 3159 structs.AllocClientStatusComplete: 3160 s.logger.Printf("[ERR] state_store: new allocation inserted into state store with id: %v and state: %v", 3161 alloc.ID, alloc.ClientStatus) 3162 } 3163 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 3164 // Incrementing the client of the bin of the current state 3165 switch alloc.ClientStatus { 3166 case structs.AllocClientStatusRunning: 3167 tgSummary.Running += 1 3168 case structs.AllocClientStatusFailed: 3169 tgSummary.Failed += 1 3170 case structs.AllocClientStatusPending: 3171 tgSummary.Starting += 1 3172 case structs.AllocClientStatusComplete: 3173 tgSummary.Complete += 1 3174 case structs.AllocClientStatusLost: 3175 tgSummary.Lost += 1 3176 } 3177 3178 // Decrementing the count of the bin of the last state 3179 switch existingAlloc.ClientStatus { 3180 case structs.AllocClientStatusRunning: 3181 tgSummary.Running -= 1 3182 case structs.AllocClientStatusPending: 3183 tgSummary.Starting -= 1 3184 case structs.AllocClientStatusLost: 3185 tgSummary.Lost -= 1 3186 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 3187 default: 3188 s.logger.Printf("[ERR] state_store: invalid old state of allocation with id: %v, and state: %v", 3189 existingAlloc.ID, existingAlloc.ClientStatus) 3190 } 3191 summaryChanged = true 3192 } 3193 jobSummary.Summary[alloc.TaskGroup] = tgSummary 3194 3195 if summaryChanged { 3196 jobSummary.ModifyIndex = index 3197 3198 // COMPAT 0.7: Upgrade old objects that do not have namespaces 3199 if jobSummary.Namespace == "" { 3200 jobSummary.Namespace = structs.DefaultNamespace 3201 } 3202 3203 // Update the indexes table for job summary 3204 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 3205 return fmt.Errorf("index update failed: %v", err) 3206 } 3207 3208 if err := txn.Insert("job_summary", jobSummary); err != nil { 3209 return fmt.Errorf("updating job summary failed: %v", err) 3210 } 3211 } 3212 3213 return nil 3214 } 3215 3216 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 3217 func (s *StateStore) addEphemeralDiskToTaskGroups(job *structs.Job) { 3218 for _, tg := range job.TaskGroups { 3219 var diskMB int 3220 for _, task := range tg.Tasks { 3221 if task.Resources != nil { 3222 diskMB += task.Resources.DiskMB 3223 task.Resources.DiskMB = 0 3224 } 3225 } 3226 if tg.EphemeralDisk != nil { 3227 continue 3228 } 3229 tg.EphemeralDisk = &structs.EphemeralDisk{ 3230 SizeMB: diskMB, 3231 } 3232 } 3233 } 3234 3235 // UpsertACLPolicies is used to create or update a set of ACL policies 3236 func (s *StateStore) UpsertACLPolicies(index uint64, policies []*structs.ACLPolicy) error { 3237 txn := s.db.Txn(true) 3238 defer txn.Abort() 3239 3240 for _, policy := range policies { 3241 // Ensure the policy hash is non-nil. This should be done outside the state store 3242 // for performance reasons, but we check here for defense in depth. 3243 if len(policy.Hash) == 0 { 3244 policy.SetHash() 3245 } 3246 3247 // Check if the policy already exists 3248 existing, err := txn.First("acl_policy", "id", policy.Name) 3249 if err != nil { 3250 return fmt.Errorf("policy lookup failed: %v", err) 3251 } 3252 3253 // Update all the indexes 3254 if existing != nil { 3255 policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex 3256 policy.ModifyIndex = index 3257 } else { 3258 policy.CreateIndex = index 3259 policy.ModifyIndex = index 3260 } 3261 3262 // Update the policy 3263 if err := txn.Insert("acl_policy", policy); err != nil { 3264 return fmt.Errorf("upserting policy failed: %v", err) 3265 } 3266 } 3267 3268 // Update the indexes tabl 3269 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 3270 return fmt.Errorf("index update failed: %v", err) 3271 } 3272 3273 txn.Commit() 3274 return nil 3275 } 3276 3277 // DeleteACLPolicies deletes the policies with the given names 3278 func (s *StateStore) DeleteACLPolicies(index uint64, names []string) error { 3279 txn := s.db.Txn(true) 3280 defer txn.Abort() 3281 3282 // Delete the policy 3283 for _, name := range names { 3284 if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil { 3285 return fmt.Errorf("deleting acl policy failed: %v", err) 3286 } 3287 } 3288 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 3289 return fmt.Errorf("index update failed: %v", err) 3290 } 3291 txn.Commit() 3292 return nil 3293 } 3294 3295 // ACLPolicyByName is used to lookup a policy by name 3296 func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) { 3297 txn := s.db.Txn(false) 3298 3299 watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name) 3300 if err != nil { 3301 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 3302 } 3303 ws.Add(watchCh) 3304 3305 if existing != nil { 3306 return existing.(*structs.ACLPolicy), nil 3307 } 3308 return nil, nil 3309 } 3310 3311 // ACLPolicyByNamePrefix is used to lookup policies by prefix 3312 func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 3313 txn := s.db.Txn(false) 3314 3315 iter, err := txn.Get("acl_policy", "id_prefix", prefix) 3316 if err != nil { 3317 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 3318 } 3319 ws.Add(iter.WatchCh()) 3320 3321 return iter, nil 3322 } 3323 3324 // ACLPolicies returns an iterator over all the acl policies 3325 func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3326 txn := s.db.Txn(false) 3327 3328 // Walk the entire table 3329 iter, err := txn.Get("acl_policy", "id") 3330 if err != nil { 3331 return nil, err 3332 } 3333 ws.Add(iter.WatchCh()) 3334 return iter, nil 3335 } 3336 3337 // UpsertACLTokens is used to create or update a set of ACL tokens 3338 func (s *StateStore) UpsertACLTokens(index uint64, tokens []*structs.ACLToken) error { 3339 txn := s.db.Txn(true) 3340 defer txn.Abort() 3341 3342 for _, token := range tokens { 3343 // Ensure the policy hash is non-nil. This should be done outside the state store 3344 // for performance reasons, but we check here for defense in depth. 3345 if len(token.Hash) == 0 { 3346 token.SetHash() 3347 } 3348 3349 // Check if the token already exists 3350 existing, err := txn.First("acl_token", "id", token.AccessorID) 3351 if err != nil { 3352 return fmt.Errorf("token lookup failed: %v", err) 3353 } 3354 3355 // Update all the indexes 3356 if existing != nil { 3357 existTK := existing.(*structs.ACLToken) 3358 token.CreateIndex = existTK.CreateIndex 3359 token.ModifyIndex = index 3360 3361 // Do not allow SecretID or create time to change 3362 token.SecretID = existTK.SecretID 3363 token.CreateTime = existTK.CreateTime 3364 3365 } else { 3366 token.CreateIndex = index 3367 token.ModifyIndex = index 3368 } 3369 3370 // Update the token 3371 if err := txn.Insert("acl_token", token); err != nil { 3372 return fmt.Errorf("upserting token failed: %v", err) 3373 } 3374 } 3375 3376 // Update the indexes table 3377 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 3378 return fmt.Errorf("index update failed: %v", err) 3379 } 3380 txn.Commit() 3381 return nil 3382 } 3383 3384 // DeleteACLTokens deletes the tokens with the given accessor ids 3385 func (s *StateStore) DeleteACLTokens(index uint64, ids []string) error { 3386 txn := s.db.Txn(true) 3387 defer txn.Abort() 3388 3389 // Delete the tokens 3390 for _, id := range ids { 3391 if _, err := txn.DeleteAll("acl_token", "id", id); err != nil { 3392 return fmt.Errorf("deleting acl token failed: %v", err) 3393 } 3394 } 3395 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 3396 return fmt.Errorf("index update failed: %v", err) 3397 } 3398 txn.Commit() 3399 return nil 3400 } 3401 3402 // ACLTokenByAccessorID is used to lookup a token by accessor ID 3403 func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) { 3404 txn := s.db.Txn(false) 3405 3406 watchCh, existing, err := txn.FirstWatch("acl_token", "id", id) 3407 if err != nil { 3408 return nil, fmt.Errorf("acl token lookup failed: %v", err) 3409 } 3410 ws.Add(watchCh) 3411 3412 if existing != nil { 3413 return existing.(*structs.ACLToken), nil 3414 } 3415 return nil, nil 3416 } 3417 3418 // ACLTokenBySecretID is used to lookup a token by secret ID 3419 func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) { 3420 txn := s.db.Txn(false) 3421 3422 watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID) 3423 if err != nil { 3424 return nil, fmt.Errorf("acl token lookup failed: %v", err) 3425 } 3426 ws.Add(watchCh) 3427 3428 if existing != nil { 3429 return existing.(*structs.ACLToken), nil 3430 } 3431 return nil, nil 3432 } 3433 3434 // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix 3435 func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 3436 txn := s.db.Txn(false) 3437 3438 iter, err := txn.Get("acl_token", "id_prefix", prefix) 3439 if err != nil { 3440 return nil, fmt.Errorf("acl token lookup failed: %v", err) 3441 } 3442 ws.Add(iter.WatchCh()) 3443 return iter, nil 3444 } 3445 3446 // ACLTokens returns an iterator over all the tokens 3447 func (s *StateStore) ACLTokens(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3448 txn := s.db.Txn(false) 3449 3450 // Walk the entire table 3451 iter, err := txn.Get("acl_token", "id") 3452 if err != nil { 3453 return nil, err 3454 } 3455 ws.Add(iter.WatchCh()) 3456 return iter, nil 3457 } 3458 3459 // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value 3460 func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool) (memdb.ResultIterator, error) { 3461 txn := s.db.Txn(false) 3462 3463 // Walk the entire table 3464 iter, err := txn.Get("acl_token", "global", globalVal) 3465 if err != nil { 3466 return nil, err 3467 } 3468 ws.Add(iter.WatchCh()) 3469 return iter, nil 3470 } 3471 3472 // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index 3473 func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) { 3474 txn := s.db.Txn(false) 3475 3476 // Lookup the bootstrap sentinel 3477 out, err := txn.First("index", "id", "acl_token_bootstrap") 3478 if err != nil { 3479 return false, 0, err 3480 } 3481 3482 // No entry, we haven't bootstrapped yet 3483 if out == nil { 3484 return true, 0, nil 3485 } 3486 3487 // Return the reset index if we've already bootstrapped 3488 return false, out.(*IndexEntry).Value, nil 3489 } 3490 3491 // BootstrapACLToken is used to create an initial ACL token 3492 func (s *StateStore) BootstrapACLTokens(index, resetIndex uint64, token *structs.ACLToken) error { 3493 txn := s.db.Txn(true) 3494 defer txn.Abort() 3495 3496 // Check if we have already done a bootstrap 3497 existing, err := txn.First("index", "id", "acl_token_bootstrap") 3498 if err != nil { 3499 return fmt.Errorf("bootstrap check failed: %v", err) 3500 } 3501 if existing != nil { 3502 if resetIndex == 0 { 3503 return fmt.Errorf("ACL bootstrap already done") 3504 } else if resetIndex != existing.(*IndexEntry).Value { 3505 return fmt.Errorf("Invalid reset index for ACL bootstrap") 3506 } 3507 } 3508 3509 // Update the Create/Modify time 3510 token.CreateIndex = index 3511 token.ModifyIndex = index 3512 3513 // Insert the token 3514 if err := txn.Insert("acl_token", token); err != nil { 3515 return fmt.Errorf("upserting token failed: %v", err) 3516 } 3517 3518 // Update the indexes table, prevents future bootstrap until reset 3519 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 3520 return fmt.Errorf("index update failed: %v", err) 3521 } 3522 if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil { 3523 return fmt.Errorf("index update failed: %v", err) 3524 } 3525 txn.Commit() 3526 return nil 3527 } 3528 3529 // StateSnapshot is used to provide a point-in-time snapshot 3530 type StateSnapshot struct { 3531 StateStore 3532 } 3533 3534 // StateRestore is used to optimize the performance when 3535 // restoring state by only using a single large transaction 3536 // instead of thousands of sub transactions 3537 type StateRestore struct { 3538 txn *memdb.Txn 3539 } 3540 3541 // Abort is used to abort the restore operation 3542 func (s *StateRestore) Abort() { 3543 s.txn.Abort() 3544 } 3545 3546 // Commit is used to commit the restore operation 3547 func (s *StateRestore) Commit() { 3548 s.txn.Commit() 3549 } 3550 3551 // NodeRestore is used to restore a node 3552 func (r *StateRestore) NodeRestore(node *structs.Node) error { 3553 if err := r.txn.Insert("nodes", node); err != nil { 3554 return fmt.Errorf("node insert failed: %v", err) 3555 } 3556 return nil 3557 } 3558 3559 // JobRestore is used to restore a job 3560 func (r *StateRestore) JobRestore(job *structs.Job) error { 3561 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 3562 // COMPAT 0.4.1 -> 0.5 3563 r.addEphemeralDiskToTaskGroups(job) 3564 3565 if err := r.txn.Insert("jobs", job); err != nil { 3566 return fmt.Errorf("job insert failed: %v", err) 3567 } 3568 return nil 3569 } 3570 3571 // EvalRestore is used to restore an evaluation 3572 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 3573 if err := r.txn.Insert("evals", eval); err != nil { 3574 return fmt.Errorf("eval insert failed: %v", err) 3575 } 3576 return nil 3577 } 3578 3579 // AllocRestore is used to restore an allocation 3580 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 3581 // Set the shared resources if it's not present 3582 // COMPAT 0.4.1 -> 0.5 3583 if alloc.SharedResources == nil { 3584 alloc.SharedResources = &structs.Resources{ 3585 DiskMB: alloc.Resources.DiskMB, 3586 } 3587 } 3588 3589 // Create the EphemeralDisk if it's nil by adding up DiskMB from task resources. 3590 if alloc.Job != nil { 3591 r.addEphemeralDiskToTaskGroups(alloc.Job) 3592 } 3593 3594 if err := r.txn.Insert("allocs", alloc); err != nil { 3595 return fmt.Errorf("alloc insert failed: %v", err) 3596 } 3597 return nil 3598 } 3599 3600 // IndexRestore is used to restore an index 3601 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 3602 if err := r.txn.Insert("index", idx); err != nil { 3603 return fmt.Errorf("index insert failed: %v", err) 3604 } 3605 return nil 3606 } 3607 3608 // PeriodicLaunchRestore is used to restore a periodic launch. 3609 func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { 3610 if err := r.txn.Insert("periodic_launch", launch); err != nil { 3611 return fmt.Errorf("periodic launch insert failed: %v", err) 3612 } 3613 return nil 3614 } 3615 3616 // JobSummaryRestore is used to restore a job summary 3617 func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error { 3618 if err := r.txn.Insert("job_summary", jobSummary); err != nil { 3619 return fmt.Errorf("job summary insert failed: %v", err) 3620 } 3621 return nil 3622 } 3623 3624 // JobVersionRestore is used to restore a job version 3625 func (r *StateRestore) JobVersionRestore(version *structs.Job) error { 3626 if err := r.txn.Insert("job_version", version); err != nil { 3627 return fmt.Errorf("job version insert failed: %v", err) 3628 } 3629 return nil 3630 } 3631 3632 // DeploymentRestore is used to restore a deployment 3633 func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error { 3634 if err := r.txn.Insert("deployment", deployment); err != nil { 3635 return fmt.Errorf("deployment insert failed: %v", err) 3636 } 3637 return nil 3638 } 3639 3640 // VaultAccessorRestore is used to restore a vault accessor 3641 func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error { 3642 if err := r.txn.Insert("vault_accessors", accessor); err != nil { 3643 return fmt.Errorf("vault accessor insert failed: %v", err) 3644 } 3645 return nil 3646 } 3647 3648 // ACLPolicyRestore is used to restore an ACL policy 3649 func (r *StateRestore) ACLPolicyRestore(policy *structs.ACLPolicy) error { 3650 if err := r.txn.Insert("acl_policy", policy); err != nil { 3651 return fmt.Errorf("inserting acl policy failed: %v", err) 3652 } 3653 return nil 3654 } 3655 3656 // ACLTokenRestore is used to restore an ACL token 3657 func (r *StateRestore) ACLTokenRestore(token *structs.ACLToken) error { 3658 if err := r.txn.Insert("acl_token", token); err != nil { 3659 return fmt.Errorf("inserting acl token failed: %v", err) 3660 } 3661 return nil 3662 } 3663 3664 // addEphemeralDiskToTaskGroups adds missing EphemeralDisk objects to TaskGroups 3665 func (r *StateRestore) addEphemeralDiskToTaskGroups(job *structs.Job) { 3666 for _, tg := range job.TaskGroups { 3667 if tg.EphemeralDisk != nil { 3668 continue 3669 } 3670 var sizeMB int 3671 for _, task := range tg.Tasks { 3672 if task.Resources != nil { 3673 sizeMB += task.Resources.DiskMB 3674 task.Resources.DiskMB = 0 3675 } 3676 } 3677 tg.EphemeralDisk = &structs.EphemeralDisk{ 3678 SizeMB: sizeMB, 3679 } 3680 } 3681 }