github.com/ThomasObenaus/nomad@v0.11.1/nomad/state/state_store.go (about) 1 package state 2 3 import ( 4 "context" 5 "fmt" 6 "reflect" 7 "sort" 8 "time" 9 10 log "github.com/hashicorp/go-hclog" 11 memdb "github.com/hashicorp/go-memdb" 12 multierror "github.com/hashicorp/go-multierror" 13 "github.com/pkg/errors" 14 15 "github.com/hashicorp/nomad/helper" 16 "github.com/hashicorp/nomad/nomad/structs" 17 ) 18 19 // Txn is a transaction against a state store. 20 // This can be a read or write transaction. 21 type Txn = *memdb.Txn 22 23 const ( 24 // NodeRegisterEventReregistered is the message used when the node becomes 25 // reregistered. 26 NodeRegisterEventRegistered = "Node registered" 27 28 // NodeRegisterEventReregistered is the message used when the node becomes 29 // reregistered. 30 NodeRegisterEventReregistered = "Node re-registered" 31 ) 32 33 // IndexEntry is used with the "index" table 34 // for managing the latest Raft index affecting a table. 35 type IndexEntry struct { 36 Key string 37 Value uint64 38 } 39 40 // StateStoreConfig is used to configure a new state store 41 type StateStoreConfig struct { 42 // Logger is used to output the state store's logs 43 Logger log.Logger 44 45 // Region is the region of the server embedding the state store. 46 Region string 47 } 48 49 // The StateStore is responsible for maintaining all the Nomad 50 // state. It is manipulated by the FSM which maintains consistency 51 // through the use of Raft. The goals of the StateStore are to provide 52 // high concurrency for read operations without blocking writes, and 53 // to provide write availability in the face of reads. EVERY object 54 // returned as a result of a read against the state store should be 55 // considered a constant and NEVER modified in place. 56 type StateStore struct { 57 logger log.Logger 58 db *memdb.MemDB 59 60 // config is the passed in configuration 61 config *StateStoreConfig 62 63 // abandonCh is used to signal watchers that this state store has been 64 // abandoned (usually during a restore). This is only ever closed. 65 abandonCh chan struct{} 66 } 67 68 // NewStateStore is used to create a new state store 69 func NewStateStore(config *StateStoreConfig) (*StateStore, error) { 70 // Create the MemDB 71 db, err := memdb.NewMemDB(stateStoreSchema()) 72 if err != nil { 73 return nil, fmt.Errorf("state store setup failed: %v", err) 74 } 75 76 // Create the state store 77 s := &StateStore{ 78 logger: config.Logger.Named("state_store"), 79 db: db, 80 config: config, 81 abandonCh: make(chan struct{}), 82 } 83 return s, nil 84 } 85 86 // Config returns the state store configuration. 87 func (s *StateStore) Config() *StateStoreConfig { 88 return s.config 89 } 90 91 // Snapshot is used to create a point in time snapshot. Because 92 // we use MemDB, we just need to snapshot the state of the underlying 93 // database. 94 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 95 snap := &StateSnapshot{ 96 StateStore: StateStore{ 97 logger: s.logger, 98 config: s.config, 99 db: s.db.Snapshot(), 100 }, 101 } 102 return snap, nil 103 } 104 105 // SnapshotMinIndex is used to create a state snapshot where the index is 106 // guaranteed to be greater than or equal to the index parameter. 107 // 108 // Some server operations (such as scheduling) exchange objects via RPC 109 // concurrent with Raft log application, so they must ensure the state store 110 // snapshot they are operating on is at or after the index the objects 111 // retrieved via RPC were applied to the Raft log at. 112 // 113 // Callers should maintain their own timer metric as the time this method 114 // blocks indicates Raft log application latency relative to scheduling. 115 func (s *StateStore) SnapshotMinIndex(ctx context.Context, index uint64) (*StateSnapshot, error) { 116 // Ported from work.go:waitForIndex prior to 0.9 117 118 const backoffBase = 20 * time.Millisecond 119 const backoffLimit = 1 * time.Second 120 var retries uint 121 var retryTimer *time.Timer 122 123 // XXX: Potential optimization is to set up a watch on the state 124 // store's index table and only unblock via a trigger rather than 125 // polling. 126 for { 127 // Get the states current index 128 snapshotIndex, err := s.LatestIndex() 129 if err != nil { 130 return nil, fmt.Errorf("failed to determine state store's index: %v", err) 131 } 132 133 // We only need the FSM state to be as recent as the given index 134 if snapshotIndex >= index { 135 return s.Snapshot() 136 } 137 138 // Exponential back off 139 retries++ 140 if retryTimer == nil { 141 // First retry, start at baseline 142 retryTimer = time.NewTimer(backoffBase) 143 } else { 144 // Subsequent retry, reset timer 145 deadline := 1 << (2 * retries) * backoffBase 146 if deadline > backoffLimit { 147 deadline = backoffLimit 148 } 149 retryTimer.Reset(deadline) 150 } 151 152 select { 153 case <-ctx.Done(): 154 return nil, ctx.Err() 155 case <-retryTimer.C: 156 } 157 } 158 } 159 160 // Restore is used to optimize the efficiency of rebuilding 161 // state by minimizing the number of transactions and checking 162 // overhead. 163 func (s *StateStore) Restore() (*StateRestore, error) { 164 txn := s.db.Txn(true) 165 r := &StateRestore{ 166 txn: txn, 167 } 168 return r, nil 169 } 170 171 // AbandonCh returns a channel you can wait on to know if the state store was 172 // abandoned. 173 func (s *StateStore) AbandonCh() <-chan struct{} { 174 return s.abandonCh 175 } 176 177 // Abandon is used to signal that the given state store has been abandoned. 178 // Calling this more than one time will panic. 179 func (s *StateStore) Abandon() { 180 close(s.abandonCh) 181 } 182 183 // QueryFn is the definition of a function that can be used to implement a basic 184 // blocking query against the state store. 185 type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error) 186 187 // BlockingQuery takes a query function and runs the function until the minimum 188 // query index is met or until the passed context is cancelled. 189 func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) ( 190 resp interface{}, index uint64, err error) { 191 192 RUN_QUERY: 193 // We capture the state store and its abandon channel but pass a snapshot to 194 // the blocking query function. We operate on the snapshot to allow separate 195 // calls to the state store not all wrapped within the same transaction. 196 abandonCh := s.AbandonCh() 197 snap, _ := s.Snapshot() 198 stateSnap := &snap.StateStore 199 200 // We can skip all watch tracking if this isn't a blocking query. 201 var ws memdb.WatchSet 202 if minIndex > 0 { 203 ws = memdb.NewWatchSet() 204 205 // This channel will be closed if a snapshot is restored and the 206 // whole state store is abandoned. 207 ws.Add(abandonCh) 208 } 209 210 resp, index, err = query(ws, stateSnap) 211 if err != nil { 212 return nil, index, err 213 } 214 215 // We haven't reached the min-index yet. 216 if minIndex > 0 && index <= minIndex { 217 if err := ws.WatchCtx(ctx); err != nil { 218 return nil, index, err 219 } 220 221 goto RUN_QUERY 222 } 223 224 return resp, index, nil 225 } 226 227 // UpsertPlanResults is used to upsert the results of a plan. 228 func (s *StateStore) UpsertPlanResults(index uint64, results *structs.ApplyPlanResultsRequest) error { 229 snapshot, err := s.Snapshot() 230 if err != nil { 231 return err 232 } 233 234 allocsStopped, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsStopped) 235 if err != nil { 236 return err 237 } 238 239 allocsPreempted, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsPreempted) 240 if err != nil { 241 return err 242 } 243 244 // COMPAT 0.11: Remove this denormalization when NodePreemptions is removed 245 results.NodePreemptions, err = snapshot.DenormalizeAllocationSlice(results.NodePreemptions) 246 if err != nil { 247 return err 248 } 249 250 txn := s.db.Txn(true) 251 defer txn.Abort() 252 253 // Upsert the newly created or updated deployment 254 if results.Deployment != nil { 255 if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil { 256 return err 257 } 258 } 259 260 // Update the status of deployments effected by the plan. 261 if len(results.DeploymentUpdates) != 0 { 262 s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn) 263 } 264 265 if results.EvalID != "" { 266 // Update the modify index of the eval id 267 if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil { 268 return err 269 } 270 } 271 272 numAllocs := 0 273 if len(results.Alloc) > 0 || len(results.NodePreemptions) > 0 { 274 // COMPAT 0.11: This branch will be removed, when Alloc is removed 275 // Attach the job to all the allocations. It is pulled out in the payload to 276 // avoid the redundancy of encoding, but should be denormalized prior to 277 // being inserted into MemDB. 278 addComputedAllocAttrs(results.Alloc, results.Job) 279 numAllocs = len(results.Alloc) + len(results.NodePreemptions) 280 } else { 281 // Attach the job to all the allocations. It is pulled out in the payload to 282 // avoid the redundancy of encoding, but should be denormalized prior to 283 // being inserted into MemDB. 284 addComputedAllocAttrs(results.AllocsUpdated, results.Job) 285 numAllocs = len(allocsStopped) + len(results.AllocsUpdated) + len(allocsPreempted) 286 } 287 288 allocsToUpsert := make([]*structs.Allocation, 0, numAllocs) 289 290 // COMPAT 0.11: Both these appends should be removed when Alloc and NodePreemptions are removed 291 allocsToUpsert = append(allocsToUpsert, results.Alloc...) 292 allocsToUpsert = append(allocsToUpsert, results.NodePreemptions...) 293 294 allocsToUpsert = append(allocsToUpsert, allocsStopped...) 295 allocsToUpsert = append(allocsToUpsert, results.AllocsUpdated...) 296 allocsToUpsert = append(allocsToUpsert, allocsPreempted...) 297 298 // handle upgrade path 299 for _, alloc := range allocsToUpsert { 300 alloc.Canonicalize() 301 } 302 303 if err := s.upsertAllocsImpl(index, allocsToUpsert, txn); err != nil { 304 return err 305 } 306 307 // Upsert followup evals for allocs that were preempted 308 for _, eval := range results.PreemptionEvals { 309 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 310 return err 311 } 312 } 313 314 txn.Commit() 315 return nil 316 } 317 318 // addComputedAllocAttrs adds the computed/derived attributes to the allocation. 319 // This method is used when an allocation is being denormalized. 320 func addComputedAllocAttrs(allocs []*structs.Allocation, job *structs.Job) { 321 structs.DenormalizeAllocationJobs(job, allocs) 322 323 // COMPAT(0.11): Remove in 0.11 324 // Calculate the total resources of allocations. It is pulled out in the 325 // payload to avoid encoding something that can be computed, but should be 326 // denormalized prior to being inserted into MemDB. 327 for _, alloc := range allocs { 328 if alloc.Resources != nil { 329 continue 330 } 331 332 alloc.Resources = new(structs.Resources) 333 for _, task := range alloc.TaskResources { 334 alloc.Resources.Add(task) 335 } 336 337 // Add the shared resources 338 alloc.Resources.Add(alloc.SharedResources) 339 } 340 } 341 342 // upsertDeploymentUpdates updates the deployments given the passed status 343 // updates. 344 func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *memdb.Txn) error { 345 for _, u := range updates { 346 if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil { 347 return err 348 } 349 } 350 351 return nil 352 } 353 354 // UpsertJobSummary upserts a job summary into the state store. 355 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 356 txn := s.db.Txn(true) 357 defer txn.Abort() 358 359 // Check if the job summary already exists 360 existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID) 361 if err != nil { 362 return fmt.Errorf("job summary lookup failed: %v", err) 363 } 364 365 // Setup the indexes correctly 366 if existing != nil { 367 jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex 368 jobSummary.ModifyIndex = index 369 } else { 370 jobSummary.CreateIndex = index 371 jobSummary.ModifyIndex = index 372 } 373 374 // Update the index 375 if err := txn.Insert("job_summary", jobSummary); err != nil { 376 return err 377 } 378 379 // Update the indexes table for job summary 380 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 381 return fmt.Errorf("index update failed: %v", err) 382 } 383 384 txn.Commit() 385 return nil 386 } 387 388 // DeleteJobSummary deletes the job summary with the given ID. This is for 389 // testing purposes only. 390 func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error { 391 txn := s.db.Txn(true) 392 defer txn.Abort() 393 394 // Delete the job summary 395 if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil { 396 return fmt.Errorf("deleting job summary failed: %v", err) 397 } 398 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 399 return fmt.Errorf("index update failed: %v", err) 400 } 401 txn.Commit() 402 return nil 403 } 404 405 // UpsertDeployment is used to insert a new deployment. If cancelPrior is set to 406 // true, all prior deployments for the same job will be cancelled. 407 func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error { 408 txn := s.db.Txn(true) 409 defer txn.Abort() 410 if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil { 411 return err 412 } 413 txn.Commit() 414 return nil 415 } 416 417 func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *memdb.Txn) error { 418 // Check if the deployment already exists 419 existing, err := txn.First("deployment", "id", deployment.ID) 420 if err != nil { 421 return fmt.Errorf("deployment lookup failed: %v", err) 422 } 423 424 // Setup the indexes correctly 425 if existing != nil { 426 deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex 427 deployment.ModifyIndex = index 428 } else { 429 deployment.CreateIndex = index 430 deployment.ModifyIndex = index 431 } 432 433 // Insert the deployment 434 if err := txn.Insert("deployment", deployment); err != nil { 435 return err 436 } 437 438 // Update the indexes table for deployment 439 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 440 return fmt.Errorf("index update failed: %v", err) 441 } 442 443 // If the deployment is being marked as complete, set the job to stable. 444 if deployment.Status == structs.DeploymentStatusSuccessful { 445 if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil { 446 return fmt.Errorf("failed to update job stability: %v", err) 447 } 448 } 449 450 return nil 451 } 452 453 func (s *StateStore) Deployments(ws memdb.WatchSet) (memdb.ResultIterator, error) { 454 txn := s.db.Txn(false) 455 456 // Walk the entire deployments table 457 iter, err := txn.Get("deployment", "id") 458 if err != nil { 459 return nil, err 460 } 461 462 ws.Add(iter.WatchCh()) 463 return iter, nil 464 } 465 466 func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 467 txn := s.db.Txn(false) 468 469 // Walk the entire deployments table 470 iter, err := txn.Get("deployment", "namespace", namespace) 471 if err != nil { 472 return nil, err 473 } 474 475 ws.Add(iter.WatchCh()) 476 return iter, nil 477 } 478 479 func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string) (memdb.ResultIterator, error) { 480 txn := s.db.Txn(false) 481 482 // Walk the entire deployments table 483 iter, err := txn.Get("deployment", "id_prefix", deploymentID) 484 if err != nil { 485 return nil, err 486 } 487 488 ws.Add(iter.WatchCh()) 489 490 // Wrap the iterator in a filter 491 wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace)) 492 return wrap, nil 493 } 494 495 // deploymentNamespaceFilter returns a filter function that filters all 496 // deployment not in the given namespace. 497 func deploymentNamespaceFilter(namespace string) func(interface{}) bool { 498 return func(raw interface{}) bool { 499 d, ok := raw.(*structs.Deployment) 500 if !ok { 501 return true 502 } 503 504 return d.Namespace != namespace 505 } 506 } 507 508 func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) { 509 txn := s.db.Txn(false) 510 return s.deploymentByIDImpl(ws, deploymentID, txn) 511 } 512 513 func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *memdb.Txn) (*structs.Deployment, error) { 514 watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID) 515 if err != nil { 516 return nil, fmt.Errorf("deployment lookup failed: %v", err) 517 } 518 ws.Add(watchCh) 519 520 if existing != nil { 521 return existing.(*structs.Deployment), nil 522 } 523 524 return nil, nil 525 } 526 527 func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Deployment, error) { 528 txn := s.db.Txn(false) 529 530 var job *structs.Job 531 // Read job from state store 532 _, existing, err := txn.FirstWatch("jobs", "id", namespace, jobID) 533 if err != nil { 534 return nil, fmt.Errorf("job lookup failed: %v", err) 535 } 536 if existing != nil { 537 job = existing.(*structs.Job) 538 } 539 540 // Get an iterator over the deployments 541 iter, err := txn.Get("deployment", "job", namespace, jobID) 542 if err != nil { 543 return nil, err 544 } 545 546 ws.Add(iter.WatchCh()) 547 548 var out []*structs.Deployment 549 for { 550 raw := iter.Next() 551 if raw == nil { 552 break 553 } 554 d := raw.(*structs.Deployment) 555 556 // If the allocation belongs to a job with the same ID but a different 557 // create index and we are not getting all the allocations whose Jobs 558 // matches the same Job ID then we skip it 559 if !all && job != nil && d.JobCreateIndex != job.CreateIndex { 560 continue 561 } 562 out = append(out, d) 563 } 564 565 return out, nil 566 } 567 568 // LatestDeploymentByJobID returns the latest deployment for the given job. The 569 // latest is determined strictly by CreateIndex. 570 func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) { 571 txn := s.db.Txn(false) 572 573 // Get an iterator over the deployments 574 iter, err := txn.Get("deployment", "job", namespace, jobID) 575 if err != nil { 576 return nil, err 577 } 578 579 ws.Add(iter.WatchCh()) 580 581 var out *structs.Deployment 582 for { 583 raw := iter.Next() 584 if raw == nil { 585 break 586 } 587 588 d := raw.(*structs.Deployment) 589 if out == nil || out.CreateIndex < d.CreateIndex { 590 out = d 591 } 592 } 593 594 return out, nil 595 } 596 597 // DeleteDeployment is used to delete a set of deployments by ID 598 func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error { 599 txn := s.db.Txn(true) 600 defer txn.Abort() 601 602 if len(deploymentIDs) == 0 { 603 return nil 604 } 605 606 for _, deploymentID := range deploymentIDs { 607 // Lookup the deployment 608 existing, err := txn.First("deployment", "id", deploymentID) 609 if err != nil { 610 return fmt.Errorf("deployment lookup failed: %v", err) 611 } 612 if existing == nil { 613 return fmt.Errorf("deployment not found") 614 } 615 616 // Delete the deployment 617 if err := txn.Delete("deployment", existing); err != nil { 618 return fmt.Errorf("deployment delete failed: %v", err) 619 } 620 } 621 622 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 623 return fmt.Errorf("index update failed: %v", err) 624 } 625 626 txn.Commit() 627 return nil 628 } 629 630 // UpsertScalingEvent is used to insert a new scaling event. 631 // Only the most recent JobTrackedScalingEvents will be kept. 632 func (s *StateStore) UpsertScalingEvent(index uint64, req *structs.ScalingEventRequest) error { 633 txn := s.db.Txn(true) 634 defer txn.Abort() 635 636 // Get the existing events 637 existing, err := txn.First("scaling_event", "id", req.Namespace, req.JobID) 638 if err != nil { 639 return fmt.Errorf("scaling event lookup failed: %v", err) 640 } 641 642 var jobEvents *structs.JobScalingEvents 643 if existing != nil { 644 jobEvents = existing.(*structs.JobScalingEvents) 645 } else { 646 jobEvents = &structs.JobScalingEvents{ 647 Namespace: req.Namespace, 648 JobID: req.JobID, 649 ScalingEvents: make(map[string][]*structs.ScalingEvent), 650 } 651 } 652 653 jobEvents.ModifyIndex = index 654 req.ScalingEvent.CreateIndex = index 655 656 events := jobEvents.ScalingEvents[req.TaskGroup] 657 // Prepend this latest event 658 events = append( 659 []*structs.ScalingEvent{req.ScalingEvent}, 660 events..., 661 ) 662 // Truncate older events 663 if len(events) > structs.JobTrackedScalingEvents { 664 events = events[0:structs.JobTrackedScalingEvents] 665 } 666 jobEvents.ScalingEvents[req.TaskGroup] = events 667 668 // Insert the new event 669 if err := txn.Insert("scaling_event", jobEvents); err != nil { 670 return fmt.Errorf("scaling event insert failed: %v", err) 671 } 672 673 // Update the indexes table for scaling_event 674 if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil { 675 return fmt.Errorf("index update failed: %v", err) 676 } 677 678 txn.Commit() 679 return nil 680 } 681 682 // ScalingEvents returns an iterator over all the job scaling events 683 func (s *StateStore) ScalingEvents(ws memdb.WatchSet) (memdb.ResultIterator, error) { 684 txn := s.db.Txn(false) 685 686 // Walk the entire scaling_event table 687 iter, err := txn.Get("scaling_event", "id") 688 if err != nil { 689 return nil, err 690 } 691 692 ws.Add(iter.WatchCh()) 693 694 return iter, nil 695 } 696 697 func (s *StateStore) ScalingEventsByJob(ws memdb.WatchSet, namespace, jobID string) (map[string][]*structs.ScalingEvent, uint64, error) { 698 txn := s.db.Txn(false) 699 700 watchCh, existing, err := txn.FirstWatch("scaling_event", "id", namespace, jobID) 701 if err != nil { 702 return nil, 0, fmt.Errorf("job scaling events lookup failed: %v", err) 703 } 704 ws.Add(watchCh) 705 706 if existing != nil { 707 events := existing.(*structs.JobScalingEvents) 708 return events.ScalingEvents, events.ModifyIndex, nil 709 } 710 return nil, 0, nil 711 } 712 713 // UpsertNode is used to register a node or update a node definition 714 // This is assumed to be triggered by the client, so we retain the value 715 // of drain/eligibility which is set by the scheduler. 716 func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { 717 txn := s.db.Txn(true) 718 defer txn.Abort() 719 720 // Check if the node already exists 721 existing, err := txn.First("nodes", "id", node.ID) 722 if err != nil { 723 return fmt.Errorf("node lookup failed: %v", err) 724 } 725 726 // Setup the indexes correctly 727 if existing != nil { 728 exist := existing.(*structs.Node) 729 node.CreateIndex = exist.CreateIndex 730 node.ModifyIndex = index 731 732 // Retain node events that have already been set on the node 733 node.Events = exist.Events 734 735 // If we are transitioning from down, record the re-registration 736 if exist.Status == structs.NodeStatusDown && node.Status != structs.NodeStatusDown { 737 appendNodeEvents(index, node, []*structs.NodeEvent{ 738 structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster). 739 SetMessage(NodeRegisterEventReregistered). 740 SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))}) 741 } 742 743 node.Drain = exist.Drain // Retain the drain mode 744 node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility 745 node.DrainStrategy = exist.DrainStrategy // Retain the drain strategy 746 } else { 747 // Because this is the first time the node is being registered, we should 748 // also create a node registration event 749 nodeEvent := structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster). 750 SetMessage(NodeRegisterEventRegistered). 751 SetTimestamp(time.Unix(node.StatusUpdatedAt, 0)) 752 node.Events = []*structs.NodeEvent{nodeEvent} 753 node.CreateIndex = index 754 node.ModifyIndex = index 755 } 756 757 // Insert the node 758 if err := txn.Insert("nodes", node); err != nil { 759 return fmt.Errorf("node insert failed: %v", err) 760 } 761 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 762 return fmt.Errorf("index update failed: %v", err) 763 } 764 if err := upsertNodeCSIPlugins(txn, node, index); err != nil { 765 return fmt.Errorf("csi plugin update failed: %v", err) 766 } 767 768 txn.Commit() 769 return nil 770 } 771 772 // DeleteNode deregisters a batch of nodes 773 func (s *StateStore) DeleteNode(index uint64, nodes []string) error { 774 if len(nodes) == 0 { 775 return fmt.Errorf("node ids missing") 776 } 777 778 txn := s.db.Txn(true) 779 defer txn.Abort() 780 781 for _, nodeID := range nodes { 782 existing, err := txn.First("nodes", "id", nodeID) 783 if err != nil { 784 return fmt.Errorf("node lookup failed: %s: %v", nodeID, err) 785 } 786 if existing == nil { 787 return fmt.Errorf("node not found: %s", nodeID) 788 } 789 790 // Delete the node 791 if err := txn.Delete("nodes", existing); err != nil { 792 return fmt.Errorf("node delete failed: %s: %v", nodeID, err) 793 } 794 795 node := existing.(*structs.Node) 796 if err := deleteNodeCSIPlugins(txn, node, index); err != nil { 797 return fmt.Errorf("csi plugin delete failed: %v", err) 798 } 799 } 800 801 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 802 return fmt.Errorf("index update failed: %v", err) 803 } 804 805 txn.Commit() 806 return nil 807 } 808 809 // UpdateNodeStatus is used to update the status of a node 810 func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error { 811 txn := s.db.Txn(true) 812 defer txn.Abort() 813 814 // Lookup the node 815 existing, err := txn.First("nodes", "id", nodeID) 816 if err != nil { 817 return fmt.Errorf("node lookup failed: %v", err) 818 } 819 if existing == nil { 820 return fmt.Errorf("node not found") 821 } 822 823 // Copy the existing node 824 existingNode := existing.(*structs.Node) 825 copyNode := existingNode.Copy() 826 copyNode.StatusUpdatedAt = updatedAt 827 828 // Add the event if given 829 if event != nil { 830 appendNodeEvents(index, copyNode, []*structs.NodeEvent{event}) 831 } 832 833 // Update the status in the copy 834 copyNode.Status = status 835 copyNode.ModifyIndex = index 836 837 // Insert the node 838 if err := txn.Insert("nodes", copyNode); err != nil { 839 return fmt.Errorf("node update failed: %v", err) 840 } 841 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 842 return fmt.Errorf("index update failed: %v", err) 843 } 844 845 txn.Commit() 846 return nil 847 } 848 849 // BatchUpdateNodeDrain is used to update the drain of a node set of nodes 850 func (s *StateStore) BatchUpdateNodeDrain(index uint64, updatedAt int64, updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error { 851 txn := s.db.Txn(true) 852 defer txn.Abort() 853 for node, update := range updates { 854 if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, updatedAt, events[node]); err != nil { 855 return err 856 } 857 } 858 txn.Commit() 859 return nil 860 } 861 862 // UpdateNodeDrain is used to update the drain of a node 863 func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, 864 drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error { 865 866 txn := s.db.Txn(true) 867 defer txn.Abort() 868 if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, updatedAt, event); err != nil { 869 return err 870 } 871 txn.Commit() 872 return nil 873 } 874 875 func (s *StateStore) updateNodeDrainImpl(txn *memdb.Txn, index uint64, nodeID string, 876 drain *structs.DrainStrategy, markEligible bool, updatedAt int64, event *structs.NodeEvent) error { 877 878 // Lookup the node 879 existing, err := txn.First("nodes", "id", nodeID) 880 if err != nil { 881 return fmt.Errorf("node lookup failed: %v", err) 882 } 883 if existing == nil { 884 return fmt.Errorf("node not found") 885 } 886 887 // Copy the existing node 888 existingNode := existing.(*structs.Node) 889 copyNode := existingNode.Copy() 890 copyNode.StatusUpdatedAt = updatedAt 891 892 // Add the event if given 893 if event != nil { 894 appendNodeEvents(index, copyNode, []*structs.NodeEvent{event}) 895 } 896 897 // Update the drain in the copy 898 copyNode.Drain = drain != nil // COMPAT: Remove in Nomad 0.10 899 copyNode.DrainStrategy = drain 900 if drain != nil { 901 copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible 902 } else if markEligible { 903 copyNode.SchedulingEligibility = structs.NodeSchedulingEligible 904 } 905 906 copyNode.ModifyIndex = index 907 908 // Insert the node 909 if err := txn.Insert("nodes", copyNode); err != nil { 910 return fmt.Errorf("node update failed: %v", err) 911 } 912 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 913 return fmt.Errorf("index update failed: %v", err) 914 } 915 916 return nil 917 } 918 919 // UpdateNodeEligibility is used to update the scheduling eligibility of a node 920 func (s *StateStore) UpdateNodeEligibility(index uint64, nodeID string, eligibility string, updatedAt int64, event *structs.NodeEvent) error { 921 922 txn := s.db.Txn(true) 923 defer txn.Abort() 924 925 // Lookup the node 926 existing, err := txn.First("nodes", "id", nodeID) 927 if err != nil { 928 return fmt.Errorf("node lookup failed: %v", err) 929 } 930 if existing == nil { 931 return fmt.Errorf("node not found") 932 } 933 934 // Copy the existing node 935 existingNode := existing.(*structs.Node) 936 copyNode := existingNode.Copy() 937 copyNode.StatusUpdatedAt = updatedAt 938 939 // Add the event if given 940 if event != nil { 941 appendNodeEvents(index, copyNode, []*structs.NodeEvent{event}) 942 } 943 944 // Check if this is a valid action 945 if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible { 946 return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") 947 } 948 949 // Update the eligibility in the copy 950 copyNode.SchedulingEligibility = eligibility 951 copyNode.ModifyIndex = index 952 953 // Insert the node 954 if err := txn.Insert("nodes", copyNode); err != nil { 955 return fmt.Errorf("node update failed: %v", err) 956 } 957 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 958 return fmt.Errorf("index update failed: %v", err) 959 } 960 961 txn.Commit() 962 return nil 963 } 964 965 // UpsertNodeEvents adds the node events to the nodes, rotating events as 966 // necessary. 967 func (s *StateStore) UpsertNodeEvents(index uint64, nodeEvents map[string][]*structs.NodeEvent) error { 968 txn := s.db.Txn(true) 969 defer txn.Abort() 970 971 for nodeID, events := range nodeEvents { 972 if err := s.upsertNodeEvents(index, nodeID, events, txn); err != nil { 973 return err 974 } 975 } 976 977 txn.Commit() 978 return nil 979 } 980 981 // upsertNodeEvent upserts a node event for a respective node. It also maintains 982 // that a fixed number of node events are ever stored simultaneously, deleting 983 // older events once this bound has been reached. 984 func (s *StateStore) upsertNodeEvents(index uint64, nodeID string, events []*structs.NodeEvent, txn *memdb.Txn) error { 985 // Lookup the node 986 existing, err := txn.First("nodes", "id", nodeID) 987 if err != nil { 988 return fmt.Errorf("node lookup failed: %v", err) 989 } 990 if existing == nil { 991 return fmt.Errorf("node not found") 992 } 993 994 // Copy the existing node 995 existingNode := existing.(*structs.Node) 996 copyNode := existingNode.Copy() 997 appendNodeEvents(index, copyNode, events) 998 999 // Insert the node 1000 if err := txn.Insert("nodes", copyNode); err != nil { 1001 return fmt.Errorf("node update failed: %v", err) 1002 } 1003 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1004 return fmt.Errorf("index update failed: %v", err) 1005 } 1006 1007 return nil 1008 } 1009 1010 // appendNodeEvents is a helper that takes a node and new events and appends 1011 // them, pruning older events as needed. 1012 func appendNodeEvents(index uint64, node *structs.Node, events []*structs.NodeEvent) { 1013 // Add the events, updating the indexes 1014 for _, e := range events { 1015 e.CreateIndex = index 1016 node.Events = append(node.Events, e) 1017 } 1018 1019 // Keep node events pruned to not exceed the max allowed 1020 if l := len(node.Events); l > structs.MaxRetainedNodeEvents { 1021 delta := l - structs.MaxRetainedNodeEvents 1022 node.Events = node.Events[delta:] 1023 } 1024 } 1025 1026 // upsertNodeCSIPlugins indexes csi plugins for volume retrieval, with health. It's called 1027 // on upsertNodeEvents, so that event driven health changes are updated 1028 func upsertNodeCSIPlugins(txn *memdb.Txn, node *structs.Node, index uint64) error { 1029 1030 loop := func(info *structs.CSIInfo) error { 1031 raw, err := txn.First("csi_plugins", "id", info.PluginID) 1032 if err != nil { 1033 return fmt.Errorf("csi_plugin lookup error: %s %v", info.PluginID, err) 1034 } 1035 1036 var plug *structs.CSIPlugin 1037 if raw != nil { 1038 plug = raw.(*structs.CSIPlugin).Copy() 1039 } else { 1040 plug = structs.NewCSIPlugin(info.PluginID, index) 1041 plug.Provider = info.Provider 1042 plug.Version = info.ProviderVersion 1043 } 1044 1045 err = plug.AddPlugin(node.ID, info) 1046 if err != nil { 1047 return err 1048 } 1049 1050 plug.ModifyIndex = index 1051 1052 err = txn.Insert("csi_plugins", plug) 1053 if err != nil { 1054 return fmt.Errorf("csi_plugins insert error: %v", err) 1055 } 1056 1057 return nil 1058 } 1059 1060 inUse := map[string]struct{}{} 1061 for _, info := range node.CSIControllerPlugins { 1062 err := loop(info) 1063 if err != nil { 1064 return err 1065 } 1066 inUse[info.PluginID] = struct{}{} 1067 } 1068 1069 for _, info := range node.CSINodePlugins { 1070 err := loop(info) 1071 if err != nil { 1072 return err 1073 } 1074 inUse[info.PluginID] = struct{}{} 1075 } 1076 1077 // remove the client node from any plugin that's not 1078 // running on it. 1079 iter, err := txn.Get("csi_plugins", "id") 1080 if err != nil { 1081 return fmt.Errorf("csi_plugins lookup failed: %v", err) 1082 } 1083 for { 1084 raw := iter.Next() 1085 if raw == nil { 1086 break 1087 } 1088 plug := raw.(*structs.CSIPlugin) 1089 _, ok := inUse[plug.ID] 1090 if !ok { 1091 _, asController := plug.Controllers[node.ID] 1092 _, asNode := plug.Nodes[node.ID] 1093 if asController || asNode { 1094 err = deleteNodeFromPlugin(txn, plug.Copy(), node, index) 1095 if err != nil { 1096 return err 1097 } 1098 } 1099 } 1100 } 1101 1102 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1103 return fmt.Errorf("index update failed: %v", err) 1104 } 1105 1106 return nil 1107 } 1108 1109 // deleteNodeCSIPlugins cleans up CSIInfo node health status, called in DeleteNode 1110 func deleteNodeCSIPlugins(txn *memdb.Txn, node *structs.Node, index uint64) error { 1111 if len(node.CSIControllerPlugins) == 0 && len(node.CSINodePlugins) == 0 { 1112 return nil 1113 } 1114 1115 names := map[string]struct{}{} 1116 for _, info := range node.CSIControllerPlugins { 1117 names[info.PluginID] = struct{}{} 1118 } 1119 for _, info := range node.CSINodePlugins { 1120 names[info.PluginID] = struct{}{} 1121 } 1122 1123 for id := range names { 1124 raw, err := txn.First("csi_plugins", "id", id) 1125 if err != nil { 1126 return fmt.Errorf("csi_plugins lookup error %s: %v", id, err) 1127 } 1128 if raw == nil { 1129 return fmt.Errorf("csi_plugins missing plugin %s", id) 1130 } 1131 1132 plug := raw.(*structs.CSIPlugin).Copy() 1133 err = deleteNodeFromPlugin(txn, plug, node, index) 1134 if err != nil { 1135 return err 1136 } 1137 } 1138 1139 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1140 return fmt.Errorf("index update failed: %v", err) 1141 } 1142 1143 return nil 1144 } 1145 1146 func deleteNodeFromPlugin(txn *memdb.Txn, plug *structs.CSIPlugin, node *structs.Node, index uint64) error { 1147 err := plug.DeleteNode(node.ID) 1148 if err != nil { 1149 return err 1150 } 1151 return updateOrGCPlugin(index, txn, plug) 1152 } 1153 1154 // updateOrGCPlugin updates a plugin but will delete it if the plugin is empty 1155 func updateOrGCPlugin(index uint64, txn *memdb.Txn, plug *structs.CSIPlugin) error { 1156 plug.ModifyIndex = index 1157 1158 if plug.IsEmpty() { 1159 err := txn.Delete("csi_plugins", plug) 1160 if err != nil { 1161 return fmt.Errorf("csi_plugins delete error: %v", err) 1162 } 1163 } else { 1164 err := txn.Insert("csi_plugins", plug) 1165 if err != nil { 1166 return fmt.Errorf("csi_plugins update error %s: %v", plug.ID, err) 1167 } 1168 } 1169 return nil 1170 } 1171 1172 // deleteJobFromPlugin removes the allocations of this job from any plugins the job is 1173 // running, possibly deleting the plugin if it's no longer in use. It's called in DeleteJobTxn 1174 func (s *StateStore) deleteJobFromPlugin(index uint64, txn *memdb.Txn, job *structs.Job) error { 1175 ws := memdb.NewWatchSet() 1176 allocs, err := s.AllocsByJob(ws, job.Namespace, job.ID, false) 1177 if err != nil { 1178 return fmt.Errorf("error getting allocations: %v", err) 1179 } 1180 1181 type pair struct { 1182 pluginID string 1183 alloc *structs.Allocation 1184 } 1185 1186 plugAllocs := []*pair{} 1187 plugins := map[string]*structs.CSIPlugin{} 1188 1189 for _, a := range allocs { 1190 // if its nil, we can just panic 1191 tg := a.Job.LookupTaskGroup(a.TaskGroup) 1192 for _, t := range tg.Tasks { 1193 if t.CSIPluginConfig != nil { 1194 plugAllocs = append(plugAllocs, &pair{ 1195 pluginID: t.CSIPluginConfig.ID, 1196 alloc: a, 1197 }) 1198 } 1199 } 1200 } 1201 1202 for _, x := range plugAllocs { 1203 plug, ok := plugins[x.pluginID] 1204 1205 if !ok { 1206 plug, err = s.CSIPluginByID(ws, x.pluginID) 1207 if err != nil { 1208 return fmt.Errorf("error getting plugin: %s, %v", x.pluginID, err) 1209 } 1210 if plug == nil { 1211 return fmt.Errorf("plugin missing: %s %v", x.pluginID, err) 1212 } 1213 // only copy once, so we update the same plugin on each alloc 1214 plugins[x.pluginID] = plug.Copy() 1215 plug = plugins[x.pluginID] 1216 } 1217 1218 err := plug.DeleteAlloc(x.alloc.ID, x.alloc.NodeID) 1219 if err != nil { 1220 return err 1221 } 1222 } 1223 1224 for _, plug := range plugins { 1225 err = updateOrGCPlugin(index, txn, plug) 1226 if err != nil { 1227 return err 1228 } 1229 } 1230 1231 if err = txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1232 return fmt.Errorf("index update failed: %v", err) 1233 } 1234 1235 return nil 1236 } 1237 1238 // NodeByID is used to lookup a node by ID 1239 func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) { 1240 txn := s.db.Txn(false) 1241 1242 watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID) 1243 if err != nil { 1244 return nil, fmt.Errorf("node lookup failed: %v", err) 1245 } 1246 ws.Add(watchCh) 1247 1248 if existing != nil { 1249 return existing.(*structs.Node), nil 1250 } 1251 return nil, nil 1252 } 1253 1254 // NodesByIDPrefix is used to lookup nodes by prefix 1255 func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 1256 txn := s.db.Txn(false) 1257 1258 iter, err := txn.Get("nodes", "id_prefix", nodeID) 1259 if err != nil { 1260 return nil, fmt.Errorf("node lookup failed: %v", err) 1261 } 1262 ws.Add(iter.WatchCh()) 1263 1264 return iter, nil 1265 } 1266 1267 // NodeBySecretID is used to lookup a node by SecretID 1268 func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) { 1269 txn := s.db.Txn(false) 1270 1271 watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID) 1272 if err != nil { 1273 return nil, fmt.Errorf("node lookup by SecretID failed: %v", err) 1274 } 1275 ws.Add(watchCh) 1276 1277 if existing != nil { 1278 return existing.(*structs.Node), nil 1279 } 1280 return nil, nil 1281 } 1282 1283 // Nodes returns an iterator over all the nodes 1284 func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1285 txn := s.db.Txn(false) 1286 1287 // Walk the entire nodes table 1288 iter, err := txn.Get("nodes", "id") 1289 if err != nil { 1290 return nil, err 1291 } 1292 ws.Add(iter.WatchCh()) 1293 return iter, nil 1294 } 1295 1296 // UpsertJob is used to register a job or update a job definition 1297 func (s *StateStore) UpsertJob(index uint64, job *structs.Job) error { 1298 txn := s.db.Txn(true) 1299 defer txn.Abort() 1300 if err := s.upsertJobImpl(index, job, false, txn); err != nil { 1301 return err 1302 } 1303 txn.Commit() 1304 return nil 1305 } 1306 1307 // UpsertJobTxn is used to register a job or update a job definition, like UpsertJob, 1308 // but in a transaction. Useful for when making multiple modifications atomically 1309 func (s *StateStore) UpsertJobTxn(index uint64, job *structs.Job, txn Txn) error { 1310 return s.upsertJobImpl(index, job, false, txn) 1311 } 1312 1313 // upsertJobImpl is the implementation for registering a job or updating a job definition 1314 func (s *StateStore) upsertJobImpl(index uint64, job *structs.Job, keepVersion bool, txn *memdb.Txn) error { 1315 // Assert the namespace exists 1316 if exists, err := s.namespaceExists(txn, job.Namespace); err != nil { 1317 return err 1318 } else if !exists { 1319 return fmt.Errorf("job %q is in nonexistent namespace %q", job.ID, job.Namespace) 1320 } 1321 1322 // Check if the job already exists 1323 existing, err := txn.First("jobs", "id", job.Namespace, job.ID) 1324 if err != nil { 1325 return fmt.Errorf("job lookup failed: %v", err) 1326 } 1327 1328 // Setup the indexes correctly 1329 if existing != nil { 1330 job.CreateIndex = existing.(*structs.Job).CreateIndex 1331 job.ModifyIndex = index 1332 1333 // Bump the version unless asked to keep it. This should only be done 1334 // when changing an internal field such as Stable. A spec change should 1335 // always come with a version bump 1336 if !keepVersion { 1337 job.JobModifyIndex = index 1338 job.Version = existing.(*structs.Job).Version + 1 1339 } 1340 1341 // Compute the job status 1342 var err error 1343 job.Status, err = s.getJobStatus(txn, job, false) 1344 if err != nil { 1345 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 1346 } 1347 } else { 1348 job.CreateIndex = index 1349 job.ModifyIndex = index 1350 job.JobModifyIndex = index 1351 job.Version = 0 1352 1353 if err := s.setJobStatus(index, txn, job, false, ""); err != nil { 1354 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 1355 } 1356 1357 // Have to get the job again since it could have been updated 1358 updated, err := txn.First("jobs", "id", job.Namespace, job.ID) 1359 if err != nil { 1360 return fmt.Errorf("job lookup failed: %v", err) 1361 } 1362 if updated != nil { 1363 job = updated.(*structs.Job) 1364 } 1365 } 1366 1367 if err := s.updateSummaryWithJob(index, job, txn); err != nil { 1368 return fmt.Errorf("unable to create job summary: %v", err) 1369 } 1370 1371 if err := s.upsertJobVersion(index, job, txn); err != nil { 1372 return fmt.Errorf("unable to upsert job into job_version table: %v", err) 1373 } 1374 1375 if err := s.updateJobScalingPolicies(index, job, txn); err != nil { 1376 return fmt.Errorf("unable to update job scaling policies: %v", err) 1377 } 1378 1379 // Insert the job 1380 if err := txn.Insert("jobs", job); err != nil { 1381 return fmt.Errorf("job insert failed: %v", err) 1382 } 1383 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1384 return fmt.Errorf("index update failed: %v", err) 1385 } 1386 1387 return nil 1388 } 1389 1390 // DeleteJob is used to deregister a job 1391 func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error { 1392 txn := s.db.Txn(true) 1393 defer txn.Abort() 1394 1395 err := s.DeleteJobTxn(index, namespace, jobID, txn) 1396 if err == nil { 1397 txn.Commit() 1398 } 1399 return err 1400 } 1401 1402 // DeleteJobTxn is used to deregister a job, like DeleteJob, 1403 // but in a transaction. Useful for when making multiple modifications atomically 1404 func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error { 1405 // Lookup the node 1406 existing, err := txn.First("jobs", "id", namespace, jobID) 1407 if err != nil { 1408 return fmt.Errorf("job lookup failed: %v", err) 1409 } 1410 if existing == nil { 1411 return fmt.Errorf("job not found") 1412 } 1413 1414 // Check if we should update a parent job summary 1415 job := existing.(*structs.Job) 1416 if job.ParentID != "" { 1417 summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID) 1418 if err != nil { 1419 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 1420 } 1421 1422 // Only continue if the summary exists. It could not exist if the parent 1423 // job was removed 1424 if summaryRaw != nil { 1425 existing := summaryRaw.(*structs.JobSummary) 1426 pSummary := existing.Copy() 1427 if pSummary.Children != nil { 1428 1429 modified := false 1430 switch job.Status { 1431 case structs.JobStatusPending: 1432 pSummary.Children.Pending-- 1433 pSummary.Children.Dead++ 1434 modified = true 1435 case structs.JobStatusRunning: 1436 pSummary.Children.Running-- 1437 pSummary.Children.Dead++ 1438 modified = true 1439 case structs.JobStatusDead: 1440 default: 1441 return fmt.Errorf("unknown old job status %q", job.Status) 1442 } 1443 1444 if modified { 1445 // Update the modify index 1446 pSummary.ModifyIndex = index 1447 1448 // Insert the summary 1449 if err := txn.Insert("job_summary", pSummary); err != nil { 1450 return fmt.Errorf("job summary insert failed: %v", err) 1451 } 1452 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1453 return fmt.Errorf("index update failed: %v", err) 1454 } 1455 } 1456 } 1457 } 1458 } 1459 1460 // Delete the job 1461 if err := txn.Delete("jobs", existing); err != nil { 1462 return fmt.Errorf("job delete failed: %v", err) 1463 } 1464 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1465 return fmt.Errorf("index update failed: %v", err) 1466 } 1467 1468 // Delete the job versions 1469 if err := s.deleteJobVersions(index, job, txn); err != nil { 1470 return err 1471 } 1472 1473 // Delete the job summary 1474 if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil { 1475 return fmt.Errorf("deleting job summary failed: %v", err) 1476 } 1477 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1478 return fmt.Errorf("index update failed: %v", err) 1479 } 1480 1481 // Delete any job scaling policies 1482 numDeletedScalingPolicies, err := txn.DeleteAll("scaling_policy", "target_prefix", namespace, jobID) 1483 if err != nil { 1484 return fmt.Errorf("deleting job scaling policies failed: %v", err) 1485 } 1486 if numDeletedScalingPolicies > 0 { 1487 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 1488 return fmt.Errorf("index update failed: %v", err) 1489 } 1490 } 1491 1492 // Delete the scaling events 1493 if _, err = txn.DeleteAll("scaling_event", "id", namespace, jobID); err != nil { 1494 return fmt.Errorf("deleting job scaling events failed: %v", err) 1495 } 1496 if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil { 1497 return fmt.Errorf("index update failed: %v", err) 1498 } 1499 1500 // Cleanup plugins registered by this job 1501 err = s.deleteJobFromPlugin(index, txn, job) 1502 if err != nil { 1503 return fmt.Errorf("deleting job from plugin: %v", err) 1504 } 1505 1506 return nil 1507 } 1508 1509 // deleteJobVersions deletes all versions of the given job. 1510 func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error { 1511 iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID) 1512 if err != nil { 1513 return err 1514 } 1515 1516 // Put them into a slice so there are no safety concerns while actually 1517 // performing the deletes 1518 jobs := []*structs.Job{} 1519 for { 1520 raw := iter.Next() 1521 if raw == nil { 1522 break 1523 } 1524 1525 // Ensure the ID is an exact match 1526 j := raw.(*structs.Job) 1527 if j.ID != job.ID { 1528 continue 1529 } 1530 1531 jobs = append(jobs, j) 1532 } 1533 1534 // Do the deletes 1535 for _, j := range jobs { 1536 if err := txn.Delete("job_version", j); err != nil { 1537 return fmt.Errorf("deleting job versions failed: %v", err) 1538 } 1539 } 1540 1541 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 1542 return fmt.Errorf("index update failed: %v", err) 1543 } 1544 1545 return nil 1546 } 1547 1548 // upsertJobVersion inserts a job into its historic version table and limits the 1549 // number of job versions that are tracked. 1550 func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *memdb.Txn) error { 1551 // Insert the job 1552 if err := txn.Insert("job_version", job); err != nil { 1553 return fmt.Errorf("failed to insert job into job_version table: %v", err) 1554 } 1555 1556 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 1557 return fmt.Errorf("index update failed: %v", err) 1558 } 1559 1560 // Get all the historic jobs for this ID 1561 all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID) 1562 if err != nil { 1563 return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err) 1564 } 1565 1566 // If we are below the limit there is no GCing to be done 1567 if len(all) <= structs.JobTrackedVersions { 1568 return nil 1569 } 1570 1571 // We have to delete a historic job to make room. 1572 // Find index of the highest versioned stable job 1573 stableIdx := -1 1574 for i, j := range all { 1575 if j.Stable { 1576 stableIdx = i 1577 break 1578 } 1579 } 1580 1581 // If the stable job is the oldest version, do a swap to bring it into the 1582 // keep set. 1583 max := structs.JobTrackedVersions 1584 if stableIdx == max { 1585 all[max-1], all[max] = all[max], all[max-1] 1586 } 1587 1588 // Delete the job outside of the set that are being kept. 1589 d := all[max] 1590 if err := txn.Delete("job_version", d); err != nil { 1591 return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version) 1592 } 1593 1594 return nil 1595 } 1596 1597 // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job 1598 // version. 1599 func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) { 1600 txn := s.db.Txn(false) 1601 return s.JobByIDTxn(ws, namespace, id, txn) 1602 } 1603 1604 // JobByIDTxn is used to lookup a job by its ID, like JobByID. JobByID returns the job version 1605 // accessible through in the transaction 1606 func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) { 1607 watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id) 1608 if err != nil { 1609 return nil, fmt.Errorf("job lookup failed: %v", err) 1610 } 1611 ws.Add(watchCh) 1612 1613 if existing != nil { 1614 return existing.(*structs.Job), nil 1615 } 1616 return nil, nil 1617 } 1618 1619 // JobsByIDPrefix is used to lookup a job by prefix 1620 func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1621 txn := s.db.Txn(false) 1622 1623 iter, err := txn.Get("jobs", "id_prefix", namespace, id) 1624 if err != nil { 1625 return nil, fmt.Errorf("job lookup failed: %v", err) 1626 } 1627 1628 ws.Add(iter.WatchCh()) 1629 1630 return iter, nil 1631 } 1632 1633 // JobVersionsByID returns all the tracked versions of a job. 1634 func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 1635 txn := s.db.Txn(false) 1636 1637 return s.jobVersionByID(txn, &ws, namespace, id) 1638 } 1639 1640 // jobVersionByID is the underlying implementation for retrieving all tracked 1641 // versions of a job and is called under an existing transaction. A watch set 1642 // can optionally be passed in to add the job histories to the watch set. 1643 func (s *StateStore) jobVersionByID(txn *memdb.Txn, ws *memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 1644 // Get all the historic jobs for this ID 1645 iter, err := txn.Get("job_version", "id_prefix", namespace, id) 1646 if err != nil { 1647 return nil, err 1648 } 1649 1650 if ws != nil { 1651 ws.Add(iter.WatchCh()) 1652 } 1653 1654 var all []*structs.Job 1655 for { 1656 raw := iter.Next() 1657 if raw == nil { 1658 break 1659 } 1660 1661 // Ensure the ID is an exact match 1662 j := raw.(*structs.Job) 1663 if j.ID != id { 1664 continue 1665 } 1666 1667 all = append(all, j) 1668 } 1669 1670 // Sort in reverse order so that the highest version is first 1671 sort.Slice(all, func(i, j int) bool { 1672 return all[i].Version > all[j].Version 1673 }) 1674 1675 return all, nil 1676 } 1677 1678 // JobByIDAndVersion returns the job identified by its ID and Version. The 1679 // passed watchset may be nil. 1680 func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) { 1681 txn := s.db.Txn(false) 1682 return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn) 1683 } 1684 1685 // jobByIDAndVersionImpl returns the job identified by its ID and Version. The 1686 // passed watchset may be nil. 1687 func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string, 1688 version uint64, txn *memdb.Txn) (*structs.Job, error) { 1689 1690 watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version) 1691 if err != nil { 1692 return nil, err 1693 } 1694 1695 if ws != nil { 1696 ws.Add(watchCh) 1697 } 1698 1699 if existing != nil { 1700 job := existing.(*structs.Job) 1701 return job, nil 1702 } 1703 1704 return nil, nil 1705 } 1706 1707 func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1708 txn := s.db.Txn(false) 1709 1710 // Walk the entire deployments table 1711 iter, err := txn.Get("job_version", "id") 1712 if err != nil { 1713 return nil, err 1714 } 1715 1716 ws.Add(iter.WatchCh()) 1717 return iter, nil 1718 } 1719 1720 // Jobs returns an iterator over all the jobs 1721 func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1722 txn := s.db.Txn(false) 1723 1724 // Walk the entire jobs table 1725 iter, err := txn.Get("jobs", "id") 1726 if err != nil { 1727 return nil, err 1728 } 1729 1730 ws.Add(iter.WatchCh()) 1731 1732 return iter, nil 1733 } 1734 1735 // JobsByNamespace returns an iterator over all the jobs for the given namespace 1736 func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 1737 txn := s.db.Txn(false) 1738 return s.jobsByNamespaceImpl(ws, namespace, txn) 1739 } 1740 1741 // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace 1742 func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *memdb.Txn) (memdb.ResultIterator, error) { 1743 // Walk the entire jobs table 1744 iter, err := txn.Get("jobs", "id_prefix", namespace, "") 1745 if err != nil { 1746 return nil, err 1747 } 1748 1749 ws.Add(iter.WatchCh()) 1750 1751 return iter, nil 1752 } 1753 1754 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 1755 func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) { 1756 txn := s.db.Txn(false) 1757 1758 iter, err := txn.Get("jobs", "periodic", periodic) 1759 if err != nil { 1760 return nil, err 1761 } 1762 1763 ws.Add(iter.WatchCh()) 1764 1765 return iter, nil 1766 } 1767 1768 // JobsByScheduler returns an iterator over all the jobs with the specific 1769 // scheduler type. 1770 func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) { 1771 txn := s.db.Txn(false) 1772 1773 // Return an iterator for jobs with the specific type. 1774 iter, err := txn.Get("jobs", "type", schedulerType) 1775 if err != nil { 1776 return nil, err 1777 } 1778 1779 ws.Add(iter.WatchCh()) 1780 1781 return iter, nil 1782 } 1783 1784 // JobsByGC returns an iterator over all jobs eligible or uneligible for garbage 1785 // collection. 1786 func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) { 1787 txn := s.db.Txn(false) 1788 1789 iter, err := txn.Get("jobs", "gc", gc) 1790 if err != nil { 1791 return nil, err 1792 } 1793 1794 ws.Add(iter.WatchCh()) 1795 1796 return iter, nil 1797 } 1798 1799 // JobSummary returns a job summary object which matches a specific id. 1800 func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) { 1801 txn := s.db.Txn(false) 1802 1803 watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID) 1804 if err != nil { 1805 return nil, err 1806 } 1807 1808 ws.Add(watchCh) 1809 1810 if existing != nil { 1811 summary := existing.(*structs.JobSummary) 1812 return summary, nil 1813 } 1814 1815 return nil, nil 1816 } 1817 1818 // JobSummaries walks the entire job summary table and returns all the job 1819 // summary objects 1820 func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1821 txn := s.db.Txn(false) 1822 1823 iter, err := txn.Get("job_summary", "id") 1824 if err != nil { 1825 return nil, err 1826 } 1827 1828 ws.Add(iter.WatchCh()) 1829 1830 return iter, nil 1831 } 1832 1833 // JobSummaryByPrefix is used to look up Job Summary by id prefix 1834 func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 1835 txn := s.db.Txn(false) 1836 1837 iter, err := txn.Get("job_summary", "id_prefix", namespace, id) 1838 if err != nil { 1839 return nil, fmt.Errorf("job_summary lookup failed: %v", err) 1840 } 1841 1842 ws.Add(iter.WatchCh()) 1843 1844 return iter, nil 1845 } 1846 1847 // CSIVolumeRegister adds a volume to the server store, failing if it already exists 1848 func (s *StateStore) CSIVolumeRegister(index uint64, volumes []*structs.CSIVolume) error { 1849 txn := s.db.Txn(true) 1850 defer txn.Abort() 1851 1852 for _, v := range volumes { 1853 if exists, err := s.namespaceExists(txn, v.Namespace); err != nil { 1854 return err 1855 } else if !exists { 1856 return fmt.Errorf("volume %s is in nonexistent namespace %s", v.ID, v.Namespace) 1857 } 1858 1859 // Check for volume existence 1860 obj, err := txn.First("csi_volumes", "id", v.Namespace, v.ID) 1861 if err != nil { 1862 return fmt.Errorf("volume existence check error: %v", err) 1863 } 1864 if obj != nil { 1865 // Allow some properties of a volume to be updated in place, but 1866 // prevent accidentally overwriting important properties, or 1867 // overwriting a volume in use 1868 old, ok := obj.(*structs.CSIVolume) 1869 if ok && 1870 old.InUse() || 1871 old.ExternalID != v.ExternalID || 1872 old.PluginID != v.PluginID || 1873 old.Provider != v.Provider { 1874 return fmt.Errorf("volume exists: %s", v.ID) 1875 } 1876 } 1877 1878 if v.CreateIndex == 0 { 1879 v.CreateIndex = index 1880 v.ModifyIndex = index 1881 } 1882 1883 err = txn.Insert("csi_volumes", v) 1884 if err != nil { 1885 return fmt.Errorf("volume insert: %v", err) 1886 } 1887 } 1888 1889 if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 1890 return fmt.Errorf("index update failed: %v", err) 1891 } 1892 1893 txn.Commit() 1894 return nil 1895 } 1896 1897 // CSIVolumes returns the unfiltered list of all volumes 1898 func (s *StateStore) CSIVolumes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1899 txn := s.db.Txn(false) 1900 defer txn.Abort() 1901 1902 iter, err := txn.Get("csi_volumes", "id") 1903 if err != nil { 1904 return nil, fmt.Errorf("csi_volumes lookup failed: %v", err) 1905 } 1906 1907 ws.Add(iter.WatchCh()) 1908 1909 return iter, nil 1910 } 1911 1912 // CSIVolumeByID is used to lookup a single volume. Returns a copy of the volume 1913 // because its plugins are denormalized to provide accurate Health. 1914 func (s *StateStore) CSIVolumeByID(ws memdb.WatchSet, namespace, id string) (*structs.CSIVolume, error) { 1915 txn := s.db.Txn(false) 1916 1917 watchCh, obj, err := txn.FirstWatch("csi_volumes", "id_prefix", namespace, id) 1918 if err != nil { 1919 return nil, fmt.Errorf("volume lookup failed: %s %v", id, err) 1920 } 1921 ws.Add(watchCh) 1922 1923 if obj == nil { 1924 return nil, nil 1925 } 1926 1927 vol := obj.(*structs.CSIVolume) 1928 return s.CSIVolumeDenormalizePlugins(ws, vol.Copy()) 1929 } 1930 1931 // CSIVolumes looks up csi_volumes by pluginID 1932 func (s *StateStore) CSIVolumesByPluginID(ws memdb.WatchSet, namespace, pluginID string) (memdb.ResultIterator, error) { 1933 txn := s.db.Txn(false) 1934 1935 iter, err := txn.Get("csi_volumes", "plugin_id", pluginID) 1936 if err != nil { 1937 return nil, fmt.Errorf("volume lookup failed: %v", err) 1938 } 1939 1940 // Filter the iterator by namespace 1941 f := func(raw interface{}) bool { 1942 v, ok := raw.(*structs.CSIVolume) 1943 if !ok { 1944 return false 1945 } 1946 return v.Namespace != namespace 1947 } 1948 1949 wrap := memdb.NewFilterIterator(iter, f) 1950 return wrap, nil 1951 } 1952 1953 // CSIVolumesByIDPrefix supports search 1954 func (s *StateStore) CSIVolumesByIDPrefix(ws memdb.WatchSet, namespace, volumeID string) (memdb.ResultIterator, error) { 1955 txn := s.db.Txn(false) 1956 1957 iter, err := txn.Get("csi_volumes", "id_prefix", namespace, volumeID) 1958 if err != nil { 1959 return nil, err 1960 } 1961 1962 ws.Add(iter.WatchCh()) 1963 return iter, nil 1964 } 1965 1966 // CSIVolumesByNodeID looks up CSIVolumes in use on a node 1967 func (s *StateStore) CSIVolumesByNodeID(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 1968 allocs, err := s.AllocsByNode(ws, nodeID) 1969 if err != nil { 1970 return nil, fmt.Errorf("alloc lookup failed: %v", err) 1971 } 1972 1973 // Find volume ids for CSI volumes in running allocs, or allocs that we desire to run 1974 ids := map[string]string{} // Map volumeID to Namespace 1975 for _, a := range allocs { 1976 tg := a.Job.LookupTaskGroup(a.TaskGroup) 1977 1978 if !(a.DesiredStatus == structs.AllocDesiredStatusRun || 1979 a.ClientStatus == structs.AllocClientStatusRunning) || 1980 len(tg.Volumes) == 0 { 1981 continue 1982 } 1983 1984 for _, v := range tg.Volumes { 1985 if v.Type != structs.VolumeTypeCSI { 1986 continue 1987 } 1988 ids[v.Source] = a.Namespace 1989 } 1990 } 1991 1992 // Lookup the raw CSIVolumes to match the other list interfaces 1993 iter := NewSliceIterator() 1994 txn := s.db.Txn(false) 1995 for id, namespace := range ids { 1996 raw, err := txn.First("csi_volumes", "id", namespace, id) 1997 if err != nil { 1998 return nil, fmt.Errorf("volume lookup failed: %s %v", id, err) 1999 } 2000 iter.Add(raw) 2001 } 2002 2003 return iter, nil 2004 } 2005 2006 // CSIVolumesByNamespace looks up the entire csi_volumes table 2007 func (s *StateStore) CSIVolumesByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 2008 txn := s.db.Txn(false) 2009 2010 iter, err := txn.Get("csi_volumes", "id_prefix", namespace, "") 2011 if err != nil { 2012 return nil, fmt.Errorf("volume lookup failed: %v", err) 2013 } 2014 ws.Add(iter.WatchCh()) 2015 2016 return iter, nil 2017 } 2018 2019 // CSIVolumeClaim updates the volume's claim count and allocation list 2020 func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, alloc *structs.Allocation, claim structs.CSIVolumeClaimMode) error { 2021 txn := s.db.Txn(true) 2022 defer txn.Abort() 2023 2024 row, err := txn.First("csi_volumes", "id", namespace, id) 2025 if err != nil { 2026 return fmt.Errorf("volume lookup failed: %s: %v", id, err) 2027 } 2028 if row == nil { 2029 return fmt.Errorf("volume not found: %s", id) 2030 } 2031 2032 orig, ok := row.(*structs.CSIVolume) 2033 if !ok { 2034 return fmt.Errorf("volume row conversion error") 2035 } 2036 2037 ws := memdb.NewWatchSet() 2038 volume, err := s.CSIVolumeDenormalizePlugins(ws, orig.Copy()) 2039 if err != nil { 2040 return err 2041 } 2042 2043 volume, err = s.CSIVolumeDenormalize(ws, volume) 2044 if err != nil { 2045 return err 2046 } 2047 2048 err = volume.Claim(claim, alloc) 2049 if err != nil { 2050 return err 2051 } 2052 2053 volume.ModifyIndex = index 2054 2055 if err = txn.Insert("csi_volumes", volume); err != nil { 2056 return fmt.Errorf("volume update failed: %s: %v", id, err) 2057 } 2058 2059 if err = txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2060 return fmt.Errorf("index update failed: %v", err) 2061 } 2062 2063 txn.Commit() 2064 return nil 2065 } 2066 2067 // CSIVolumeDeregister removes the volume from the server 2068 func (s *StateStore) CSIVolumeDeregister(index uint64, namespace string, ids []string) error { 2069 txn := s.db.Txn(true) 2070 defer txn.Abort() 2071 2072 for _, id := range ids { 2073 existing, err := txn.First("csi_volumes", "id_prefix", namespace, id) 2074 if err != nil { 2075 return fmt.Errorf("volume lookup failed: %s: %v", id, err) 2076 } 2077 2078 if existing == nil { 2079 return fmt.Errorf("volume not found: %s", id) 2080 } 2081 2082 vol, ok := existing.(*structs.CSIVolume) 2083 if !ok { 2084 return fmt.Errorf("volume row conversion error: %s", id) 2085 } 2086 2087 if vol.InUse() { 2088 return fmt.Errorf("volume in use: %s", id) 2089 } 2090 2091 if err = txn.Delete("csi_volumes", existing); err != nil { 2092 return fmt.Errorf("volume delete failed: %s: %v", id, err) 2093 } 2094 } 2095 2096 if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2097 return fmt.Errorf("index update failed: %v", err) 2098 } 2099 2100 txn.Commit() 2101 return nil 2102 } 2103 2104 // CSIVolumeDenormalizePlugins returns a CSIVolume with current health and plugins, but 2105 // without allocations 2106 // Use this for current volume metadata, handling lists of volumes 2107 // Use CSIVolumeDenormalize for volumes containing both health and current allocations 2108 func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2109 if vol == nil { 2110 return nil, nil 2111 } 2112 2113 // Lookup CSIPlugin, the health records, and calculate volume health 2114 txn := s.db.Txn(false) 2115 defer txn.Abort() 2116 2117 plug, err := s.CSIPluginByID(ws, vol.PluginID) 2118 if err != nil { 2119 return nil, fmt.Errorf("plugin lookup error: %s %v", vol.PluginID, err) 2120 } 2121 if plug == nil { 2122 vol.ControllersHealthy = 0 2123 vol.NodesHealthy = 0 2124 vol.Schedulable = false 2125 return vol, nil 2126 } 2127 2128 vol.Provider = plug.Provider 2129 vol.ProviderVersion = plug.Version 2130 vol.ControllerRequired = plug.ControllerRequired 2131 vol.ControllersHealthy = plug.ControllersHealthy 2132 vol.NodesHealthy = plug.NodesHealthy 2133 // This number is incorrect! The expected number of node plugins is actually this + 2134 // the number of blocked evaluations for the jobs controlling these plugins 2135 vol.ControllersExpected = len(plug.Controllers) 2136 vol.NodesExpected = len(plug.Nodes) 2137 2138 vol.Schedulable = vol.NodesHealthy > 0 2139 if vol.ControllerRequired { 2140 vol.Schedulable = vol.ControllersHealthy > 0 && vol.Schedulable 2141 } 2142 2143 return vol, nil 2144 } 2145 2146 // CSIVolumeDenormalize returns a CSIVolume with allocations 2147 func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2148 for id := range vol.ReadAllocs { 2149 a, err := s.AllocByID(ws, id) 2150 if err != nil { 2151 return nil, err 2152 } 2153 if a != nil { 2154 vol.ReadAllocs[id] = a 2155 } 2156 } 2157 2158 for id := range vol.WriteAllocs { 2159 a, err := s.AllocByID(ws, id) 2160 if err != nil { 2161 return nil, err 2162 } 2163 if a != nil { 2164 vol.WriteAllocs[id] = a 2165 } 2166 } 2167 2168 return vol, nil 2169 } 2170 2171 // CSIPlugins returns the unfiltered list of all plugin health status 2172 func (s *StateStore) CSIPlugins(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2173 txn := s.db.Txn(false) 2174 defer txn.Abort() 2175 2176 iter, err := txn.Get("csi_plugins", "id") 2177 if err != nil { 2178 return nil, fmt.Errorf("csi_plugins lookup failed: %v", err) 2179 } 2180 2181 ws.Add(iter.WatchCh()) 2182 2183 return iter, nil 2184 } 2185 2186 // CSIPluginsByIDPrefix supports search 2187 func (s *StateStore) CSIPluginsByIDPrefix(ws memdb.WatchSet, pluginID string) (memdb.ResultIterator, error) { 2188 txn := s.db.Txn(false) 2189 2190 iter, err := txn.Get("csi_plugins", "id_prefix", pluginID) 2191 if err != nil { 2192 return nil, err 2193 } 2194 2195 ws.Add(iter.WatchCh()) 2196 2197 return iter, nil 2198 } 2199 2200 // CSIPluginByID returns the one named CSIPlugin 2201 func (s *StateStore) CSIPluginByID(ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) { 2202 txn := s.db.Txn(false) 2203 defer txn.Abort() 2204 2205 raw, err := txn.First("csi_plugins", "id_prefix", id) 2206 if err != nil { 2207 return nil, fmt.Errorf("csi_plugin lookup failed: %s %v", id, err) 2208 } 2209 2210 if raw == nil { 2211 return nil, nil 2212 } 2213 2214 plug := raw.(*structs.CSIPlugin) 2215 2216 return plug, nil 2217 } 2218 2219 // CSIPluginDenormalize returns a CSIPlugin with allocation details 2220 func (s *StateStore) CSIPluginDenormalize(ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) { 2221 if plug == nil { 2222 return nil, nil 2223 } 2224 2225 // Get the unique list of allocation ids 2226 ids := map[string]struct{}{} 2227 for _, info := range plug.Controllers { 2228 ids[info.AllocID] = struct{}{} 2229 } 2230 for _, info := range plug.Nodes { 2231 ids[info.AllocID] = struct{}{} 2232 } 2233 2234 for id := range ids { 2235 alloc, err := s.AllocByID(ws, id) 2236 if err != nil { 2237 return nil, err 2238 } 2239 if alloc == nil { 2240 continue 2241 } 2242 plug.Allocations = append(plug.Allocations, alloc.Stub()) 2243 } 2244 2245 return plug, nil 2246 } 2247 2248 // UpsertPeriodicLaunch is used to register a launch or update it. 2249 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 2250 txn := s.db.Txn(true) 2251 defer txn.Abort() 2252 2253 // Check if the job already exists 2254 existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID) 2255 if err != nil { 2256 return fmt.Errorf("periodic launch lookup failed: %v", err) 2257 } 2258 2259 // Setup the indexes correctly 2260 if existing != nil { 2261 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 2262 launch.ModifyIndex = index 2263 } else { 2264 launch.CreateIndex = index 2265 launch.ModifyIndex = index 2266 } 2267 2268 // Insert the job 2269 if err := txn.Insert("periodic_launch", launch); err != nil { 2270 return fmt.Errorf("launch insert failed: %v", err) 2271 } 2272 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 2273 return fmt.Errorf("index update failed: %v", err) 2274 } 2275 2276 txn.Commit() 2277 return nil 2278 } 2279 2280 // DeletePeriodicLaunch is used to delete the periodic launch 2281 func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error { 2282 txn := s.db.Txn(true) 2283 defer txn.Abort() 2284 2285 err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn) 2286 if err == nil { 2287 txn.Commit() 2288 } 2289 return err 2290 } 2291 2292 // DeletePeriodicLaunchTxn is used to delete the periodic launch, like DeletePeriodicLaunch 2293 // but in a transaction. Useful for when making multiple modifications atomically 2294 func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error { 2295 // Lookup the launch 2296 existing, err := txn.First("periodic_launch", "id", namespace, jobID) 2297 if err != nil { 2298 return fmt.Errorf("launch lookup failed: %v", err) 2299 } 2300 if existing == nil { 2301 return fmt.Errorf("launch not found") 2302 } 2303 2304 // Delete the launch 2305 if err := txn.Delete("periodic_launch", existing); err != nil { 2306 return fmt.Errorf("launch delete failed: %v", err) 2307 } 2308 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 2309 return fmt.Errorf("index update failed: %v", err) 2310 } 2311 2312 return nil 2313 } 2314 2315 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 2316 // ID. 2317 func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) { 2318 txn := s.db.Txn(false) 2319 2320 watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id) 2321 if err != nil { 2322 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 2323 } 2324 2325 ws.Add(watchCh) 2326 2327 if existing != nil { 2328 return existing.(*structs.PeriodicLaunch), nil 2329 } 2330 return nil, nil 2331 } 2332 2333 // PeriodicLaunches returns an iterator over all the periodic launches 2334 func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2335 txn := s.db.Txn(false) 2336 2337 // Walk the entire table 2338 iter, err := txn.Get("periodic_launch", "id") 2339 if err != nil { 2340 return nil, err 2341 } 2342 2343 ws.Add(iter.WatchCh()) 2344 2345 return iter, nil 2346 } 2347 2348 // UpsertEvals is used to upsert a set of evaluations 2349 func (s *StateStore) UpsertEvals(index uint64, evals []*structs.Evaluation) error { 2350 txn := s.db.Txn(true) 2351 defer txn.Abort() 2352 2353 err := s.UpsertEvalsTxn(index, evals, txn) 2354 if err == nil { 2355 txn.Commit() 2356 } 2357 return err 2358 } 2359 2360 // UpsertEvals is used to upsert a set of evaluations, like UpsertEvals 2361 // but in a transaction. Useful for when making multiple modifications atomically 2362 func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error { 2363 // Do a nested upsert 2364 jobs := make(map[structs.NamespacedID]string, len(evals)) 2365 for _, eval := range evals { 2366 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 2367 return err 2368 } 2369 2370 tuple := structs.NamespacedID{ 2371 ID: eval.JobID, 2372 Namespace: eval.Namespace, 2373 } 2374 jobs[tuple] = "" 2375 } 2376 2377 // Set the job's status 2378 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 2379 return fmt.Errorf("setting job status failed: %v", err) 2380 } 2381 2382 return nil 2383 } 2384 2385 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 2386 func (s *StateStore) nestedUpsertEval(txn *memdb.Txn, index uint64, eval *structs.Evaluation) error { 2387 // Lookup the evaluation 2388 existing, err := txn.First("evals", "id", eval.ID) 2389 if err != nil { 2390 return fmt.Errorf("eval lookup failed: %v", err) 2391 } 2392 2393 // Update the indexes 2394 if existing != nil { 2395 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 2396 eval.ModifyIndex = index 2397 } else { 2398 eval.CreateIndex = index 2399 eval.ModifyIndex = index 2400 } 2401 2402 // Update the job summary 2403 summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID) 2404 if err != nil { 2405 return fmt.Errorf("job summary lookup failed: %v", err) 2406 } 2407 if summaryRaw != nil { 2408 js := summaryRaw.(*structs.JobSummary).Copy() 2409 hasSummaryChanged := false 2410 for tg, num := range eval.QueuedAllocations { 2411 if summary, ok := js.Summary[tg]; ok { 2412 if summary.Queued != num { 2413 summary.Queued = num 2414 js.Summary[tg] = summary 2415 hasSummaryChanged = true 2416 } 2417 } else { 2418 s.logger.Error("unable to update queued for job and task group", "job_id", eval.JobID, "task_group", tg, "namespace", eval.Namespace) 2419 } 2420 } 2421 2422 // Insert the job summary 2423 if hasSummaryChanged { 2424 js.ModifyIndex = index 2425 if err := txn.Insert("job_summary", js); err != nil { 2426 return fmt.Errorf("job summary insert failed: %v", err) 2427 } 2428 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 2429 return fmt.Errorf("index update failed: %v", err) 2430 } 2431 } 2432 } 2433 2434 // Check if the job has any blocked evaluations and cancel them 2435 if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 { 2436 // Get the blocked evaluation for a job if it exists 2437 iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked) 2438 if err != nil { 2439 return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err) 2440 } 2441 2442 var blocked []*structs.Evaluation 2443 for { 2444 raw := iter.Next() 2445 if raw == nil { 2446 break 2447 } 2448 blocked = append(blocked, raw.(*structs.Evaluation)) 2449 } 2450 2451 // Go through and update the evals 2452 for _, eval := range blocked { 2453 newEval := eval.Copy() 2454 newEval.Status = structs.EvalStatusCancelled 2455 newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", newEval.ID) 2456 newEval.ModifyIndex = index 2457 2458 if err := txn.Insert("evals", newEval); err != nil { 2459 return fmt.Errorf("eval insert failed: %v", err) 2460 } 2461 } 2462 } 2463 2464 // Insert the eval 2465 if err := txn.Insert("evals", eval); err != nil { 2466 return fmt.Errorf("eval insert failed: %v", err) 2467 } 2468 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 2469 return fmt.Errorf("index update failed: %v", err) 2470 } 2471 return nil 2472 } 2473 2474 // updateEvalModifyIndex is used to update the modify index of an evaluation that has been 2475 // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent 2476 // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply. 2477 func (s *StateStore) updateEvalModifyIndex(txn *memdb.Txn, index uint64, evalID string) error { 2478 // Lookup the evaluation 2479 existing, err := txn.First("evals", "id", evalID) 2480 if err != nil { 2481 return fmt.Errorf("eval lookup failed: %v", err) 2482 } 2483 if existing == nil { 2484 s.logger.Error("unable to find eval", "eval_id", evalID) 2485 return fmt.Errorf("unable to find eval id %q", evalID) 2486 } 2487 eval := existing.(*structs.Evaluation).Copy() 2488 // Update the indexes 2489 eval.ModifyIndex = index 2490 2491 // Insert the eval 2492 if err := txn.Insert("evals", eval); err != nil { 2493 return fmt.Errorf("eval insert failed: %v", err) 2494 } 2495 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 2496 return fmt.Errorf("index update failed: %v", err) 2497 } 2498 return nil 2499 } 2500 2501 // DeleteEval is used to delete an evaluation 2502 func (s *StateStore) DeleteEval(index uint64, evals []string, allocs []string) error { 2503 txn := s.db.Txn(true) 2504 defer txn.Abort() 2505 2506 jobs := make(map[structs.NamespacedID]string, len(evals)) 2507 for _, eval := range evals { 2508 existing, err := txn.First("evals", "id", eval) 2509 if err != nil { 2510 return fmt.Errorf("eval lookup failed: %v", err) 2511 } 2512 if existing == nil { 2513 continue 2514 } 2515 if err := txn.Delete("evals", existing); err != nil { 2516 return fmt.Errorf("eval delete failed: %v", err) 2517 } 2518 eval := existing.(*structs.Evaluation) 2519 2520 tuple := structs.NamespacedID{ 2521 ID: eval.JobID, 2522 Namespace: eval.Namespace, 2523 } 2524 jobs[tuple] = "" 2525 } 2526 2527 for _, alloc := range allocs { 2528 raw, err := txn.First("allocs", "id", alloc) 2529 if err != nil { 2530 return fmt.Errorf("alloc lookup failed: %v", err) 2531 } 2532 if raw == nil { 2533 continue 2534 } 2535 if err := txn.Delete("allocs", raw); err != nil { 2536 return fmt.Errorf("alloc delete failed: %v", err) 2537 } 2538 } 2539 2540 // Update the indexes 2541 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 2542 return fmt.Errorf("index update failed: %v", err) 2543 } 2544 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 2545 return fmt.Errorf("index update failed: %v", err) 2546 } 2547 2548 // Set the job's status 2549 if err := s.setJobStatuses(index, txn, jobs, true); err != nil { 2550 return fmt.Errorf("setting job status failed: %v", err) 2551 } 2552 2553 txn.Commit() 2554 return nil 2555 } 2556 2557 // EvalByID is used to lookup an eval by its ID 2558 func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) { 2559 txn := s.db.Txn(false) 2560 2561 watchCh, existing, err := txn.FirstWatch("evals", "id", id) 2562 if err != nil { 2563 return nil, fmt.Errorf("eval lookup failed: %v", err) 2564 } 2565 2566 ws.Add(watchCh) 2567 2568 if existing != nil { 2569 return existing.(*structs.Evaluation), nil 2570 } 2571 return nil, nil 2572 } 2573 2574 // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular 2575 // namespace 2576 func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 2577 txn := s.db.Txn(false) 2578 2579 // Get an iterator over all evals by the id prefix 2580 iter, err := txn.Get("evals", "id_prefix", id) 2581 if err != nil { 2582 return nil, fmt.Errorf("eval lookup failed: %v", err) 2583 } 2584 2585 ws.Add(iter.WatchCh()) 2586 2587 // Wrap the iterator in a filter 2588 wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace)) 2589 return wrap, nil 2590 } 2591 2592 // evalNamespaceFilter returns a filter function that filters all evaluations 2593 // not in the given namespace. 2594 func evalNamespaceFilter(namespace string) func(interface{}) bool { 2595 return func(raw interface{}) bool { 2596 eval, ok := raw.(*structs.Evaluation) 2597 if !ok { 2598 return true 2599 } 2600 2601 return eval.Namespace != namespace 2602 } 2603 } 2604 2605 // EvalsByJob returns all the evaluations by job id 2606 func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) { 2607 txn := s.db.Txn(false) 2608 2609 // Get an iterator over the node allocations 2610 iter, err := txn.Get("evals", "job_prefix", namespace, jobID) 2611 if err != nil { 2612 return nil, err 2613 } 2614 2615 ws.Add(iter.WatchCh()) 2616 2617 var out []*structs.Evaluation 2618 for { 2619 raw := iter.Next() 2620 if raw == nil { 2621 break 2622 } 2623 2624 e := raw.(*structs.Evaluation) 2625 2626 // Filter non-exact matches 2627 if e.JobID != jobID { 2628 continue 2629 } 2630 2631 out = append(out, e) 2632 } 2633 return out, nil 2634 } 2635 2636 // Evals returns an iterator over all the evaluations 2637 func (s *StateStore) Evals(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2638 txn := s.db.Txn(false) 2639 2640 // Walk the entire table 2641 iter, err := txn.Get("evals", "id") 2642 if err != nil { 2643 return nil, err 2644 } 2645 2646 ws.Add(iter.WatchCh()) 2647 2648 return iter, nil 2649 } 2650 2651 // EvalsByNamespace returns an iterator over all the evaluations in the given 2652 // namespace 2653 func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 2654 txn := s.db.Txn(false) 2655 2656 // Walk the entire table 2657 iter, err := txn.Get("evals", "namespace", namespace) 2658 if err != nil { 2659 return nil, err 2660 } 2661 2662 ws.Add(iter.WatchCh()) 2663 2664 return iter, nil 2665 } 2666 2667 // UpdateAllocsFromClient is used to update an allocation based on input 2668 // from a client. While the schedulers are the authority on the allocation for 2669 // most things, some updates are authoritative from the client. Specifically, 2670 // the desired state comes from the schedulers, while the actual state comes 2671 // from clients. 2672 func (s *StateStore) UpdateAllocsFromClient(index uint64, allocs []*structs.Allocation) error { 2673 txn := s.db.Txn(true) 2674 defer txn.Abort() 2675 2676 // Handle each of the updated allocations 2677 for _, alloc := range allocs { 2678 if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil { 2679 return err 2680 } 2681 } 2682 2683 // Update the indexes 2684 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 2685 return fmt.Errorf("index update failed: %v", err) 2686 } 2687 2688 txn.Commit() 2689 return nil 2690 } 2691 2692 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 2693 func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, index uint64, alloc *structs.Allocation) error { 2694 // Look for existing alloc 2695 existing, err := txn.First("allocs", "id", alloc.ID) 2696 if err != nil { 2697 return fmt.Errorf("alloc lookup failed: %v", err) 2698 } 2699 2700 // Nothing to do if this does not exist 2701 if existing == nil { 2702 return nil 2703 } 2704 exist := existing.(*structs.Allocation) 2705 2706 // Copy everything from the existing allocation 2707 copyAlloc := exist.Copy() 2708 2709 // Pull in anything the client is the authority on 2710 copyAlloc.ClientStatus = alloc.ClientStatus 2711 copyAlloc.ClientDescription = alloc.ClientDescription 2712 copyAlloc.TaskStates = alloc.TaskStates 2713 2714 // The client can only set its deployment health and timestamp, so just take 2715 // those 2716 if copyAlloc.DeploymentStatus != nil && alloc.DeploymentStatus != nil { 2717 oldHasHealthy := copyAlloc.DeploymentStatus.HasHealth() 2718 newHasHealthy := alloc.DeploymentStatus.HasHealth() 2719 2720 // We got new health information from the client 2721 if newHasHealthy && (!oldHasHealthy || *copyAlloc.DeploymentStatus.Healthy != *alloc.DeploymentStatus.Healthy) { 2722 // Updated deployment health and timestamp 2723 copyAlloc.DeploymentStatus.Healthy = helper.BoolToPtr(*alloc.DeploymentStatus.Healthy) 2724 copyAlloc.DeploymentStatus.Timestamp = alloc.DeploymentStatus.Timestamp 2725 copyAlloc.DeploymentStatus.ModifyIndex = index 2726 } 2727 } else if alloc.DeploymentStatus != nil { 2728 // First time getting a deployment status so copy everything and just 2729 // set the index 2730 copyAlloc.DeploymentStatus = alloc.DeploymentStatus.Copy() 2731 copyAlloc.DeploymentStatus.ModifyIndex = index 2732 } 2733 2734 // Update the modify index 2735 copyAlloc.ModifyIndex = index 2736 2737 // Update the modify time 2738 copyAlloc.ModifyTime = alloc.ModifyTime 2739 2740 if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil { 2741 return fmt.Errorf("error updating deployment: %v", err) 2742 } 2743 2744 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil { 2745 return fmt.Errorf("error updating job summary: %v", err) 2746 } 2747 2748 if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil { 2749 return err 2750 } 2751 2752 // Update the allocation 2753 if err := txn.Insert("allocs", copyAlloc); err != nil { 2754 return fmt.Errorf("alloc insert failed: %v", err) 2755 } 2756 2757 // Set the job's status 2758 forceStatus := "" 2759 if !copyAlloc.TerminalStatus() { 2760 forceStatus = structs.JobStatusRunning 2761 } 2762 2763 tuple := structs.NamespacedID{ 2764 ID: exist.JobID, 2765 Namespace: exist.Namespace, 2766 } 2767 jobs := map[structs.NamespacedID]string{tuple: forceStatus} 2768 2769 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 2770 return fmt.Errorf("setting job status failed: %v", err) 2771 } 2772 return nil 2773 } 2774 2775 // UpsertAllocs is used to evict a set of allocations and allocate new ones at 2776 // the same time. 2777 func (s *StateStore) UpsertAllocs(index uint64, allocs []*structs.Allocation) error { 2778 txn := s.db.Txn(true) 2779 defer txn.Abort() 2780 if err := s.upsertAllocsImpl(index, allocs, txn); err != nil { 2781 return err 2782 } 2783 txn.Commit() 2784 return nil 2785 } 2786 2787 // upsertAllocs is the actual implementation of UpsertAllocs so that it may be 2788 // used with an existing transaction. 2789 func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *memdb.Txn) error { 2790 // Handle the allocations 2791 jobs := make(map[structs.NamespacedID]string, 1) 2792 for _, alloc := range allocs { 2793 existing, err := txn.First("allocs", "id", alloc.ID) 2794 if err != nil { 2795 return fmt.Errorf("alloc lookup failed: %v", err) 2796 } 2797 exist, _ := existing.(*structs.Allocation) 2798 2799 if exist == nil { 2800 alloc.CreateIndex = index 2801 alloc.ModifyIndex = index 2802 alloc.AllocModifyIndex = index 2803 if alloc.DeploymentStatus != nil { 2804 alloc.DeploymentStatus.ModifyIndex = index 2805 } 2806 2807 // Issue https://github.com/hashicorp/nomad/issues/2583 uncovered 2808 // the a race between a forced garbage collection and the scheduler 2809 // marking an allocation as terminal. The issue is that the 2810 // allocation from the scheduler has its job normalized and the FSM 2811 // will only denormalize if the allocation is not terminal. However 2812 // if the allocation is garbage collected, that will result in a 2813 // allocation being upserted for the first time without a job 2814 // attached. By returning an error here, it will cause the FSM to 2815 // error, causing the plan_apply to error and thus causing the 2816 // evaluation to be failed. This will force an index refresh that 2817 // should solve this issue. 2818 if alloc.Job == nil { 2819 return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID) 2820 } 2821 } else { 2822 alloc.CreateIndex = exist.CreateIndex 2823 alloc.ModifyIndex = index 2824 alloc.AllocModifyIndex = index 2825 2826 // Keep the clients task states 2827 alloc.TaskStates = exist.TaskStates 2828 2829 // If the scheduler is marking this allocation as lost we do not 2830 // want to reuse the status of the existing allocation. 2831 if alloc.ClientStatus != structs.AllocClientStatusLost { 2832 alloc.ClientStatus = exist.ClientStatus 2833 alloc.ClientDescription = exist.ClientDescription 2834 } 2835 2836 // The job has been denormalized so re-attach the original job 2837 if alloc.Job == nil { 2838 alloc.Job = exist.Job 2839 } 2840 } 2841 2842 // OPTIMIZATION: 2843 // These should be given a map of new to old allocation and the updates 2844 // should be one on all changes. The current implementation causes O(n) 2845 // lookups/copies/insertions rather than O(1) 2846 if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil { 2847 return fmt.Errorf("error updating deployment: %v", err) 2848 } 2849 2850 if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil { 2851 return fmt.Errorf("error updating job summary: %v", err) 2852 } 2853 2854 if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil { 2855 return err 2856 } 2857 2858 if err := txn.Insert("allocs", alloc); err != nil { 2859 return fmt.Errorf("alloc insert failed: %v", err) 2860 } 2861 2862 if alloc.PreviousAllocation != "" { 2863 prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation) 2864 if err != nil { 2865 return fmt.Errorf("alloc lookup failed: %v", err) 2866 } 2867 existingPrevAlloc, _ := prevAlloc.(*structs.Allocation) 2868 if existingPrevAlloc != nil { 2869 prevAllocCopy := existingPrevAlloc.Copy() 2870 prevAllocCopy.NextAllocation = alloc.ID 2871 prevAllocCopy.ModifyIndex = index 2872 if err := txn.Insert("allocs", prevAllocCopy); err != nil { 2873 return fmt.Errorf("alloc insert failed: %v", err) 2874 } 2875 } 2876 } 2877 2878 // If the allocation is running, force the job to running status. 2879 forceStatus := "" 2880 if !alloc.TerminalStatus() { 2881 forceStatus = structs.JobStatusRunning 2882 } 2883 2884 tuple := structs.NamespacedID{ 2885 ID: alloc.JobID, 2886 Namespace: alloc.Namespace, 2887 } 2888 jobs[tuple] = forceStatus 2889 } 2890 2891 // Update the indexes 2892 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 2893 return fmt.Errorf("index update failed: %v", err) 2894 } 2895 2896 // Set the job's status 2897 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 2898 return fmt.Errorf("setting job status failed: %v", err) 2899 } 2900 2901 return nil 2902 } 2903 2904 // UpdateAllocsDesiredTransitions is used to update a set of allocations 2905 // desired transitions. 2906 func (s *StateStore) UpdateAllocsDesiredTransitions(index uint64, allocs map[string]*structs.DesiredTransition, 2907 evals []*structs.Evaluation) error { 2908 2909 txn := s.db.Txn(true) 2910 defer txn.Abort() 2911 2912 // Handle each of the updated allocations 2913 for id, transition := range allocs { 2914 if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transition); err != nil { 2915 return err 2916 } 2917 } 2918 2919 for _, eval := range evals { 2920 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 2921 return err 2922 } 2923 } 2924 2925 // Update the indexes 2926 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 2927 return fmt.Errorf("index update failed: %v", err) 2928 } 2929 2930 txn.Commit() 2931 return nil 2932 } 2933 2934 // nestedUpdateAllocDesiredTransition is used to nest an update of an 2935 // allocations desired transition 2936 func (s *StateStore) nestedUpdateAllocDesiredTransition( 2937 txn *memdb.Txn, index uint64, allocID string, 2938 transition *structs.DesiredTransition) error { 2939 2940 // Look for existing alloc 2941 existing, err := txn.First("allocs", "id", allocID) 2942 if err != nil { 2943 return fmt.Errorf("alloc lookup failed: %v", err) 2944 } 2945 2946 // Nothing to do if this does not exist 2947 if existing == nil { 2948 return nil 2949 } 2950 exist := existing.(*structs.Allocation) 2951 2952 // Copy everything from the existing allocation 2953 copyAlloc := exist.Copy() 2954 2955 // Merge the desired transitions 2956 copyAlloc.DesiredTransition.Merge(transition) 2957 2958 // Update the modify index 2959 copyAlloc.ModifyIndex = index 2960 2961 // Update the allocation 2962 if err := txn.Insert("allocs", copyAlloc); err != nil { 2963 return fmt.Errorf("alloc insert failed: %v", err) 2964 } 2965 2966 return nil 2967 } 2968 2969 // AllocByID is used to lookup an allocation by its ID 2970 func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) { 2971 txn := s.db.Txn(false) 2972 2973 watchCh, existing, err := txn.FirstWatch("allocs", "id", id) 2974 if err != nil { 2975 return nil, fmt.Errorf("alloc lookup failed: %v", err) 2976 } 2977 2978 ws.Add(watchCh) 2979 2980 if existing != nil { 2981 return existing.(*structs.Allocation), nil 2982 } 2983 return nil, nil 2984 } 2985 2986 // AllocsByIDPrefix is used to lookup allocs by prefix 2987 func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 2988 txn := s.db.Txn(false) 2989 2990 iter, err := txn.Get("allocs", "id_prefix", id) 2991 if err != nil { 2992 return nil, fmt.Errorf("alloc lookup failed: %v", err) 2993 } 2994 2995 ws.Add(iter.WatchCh()) 2996 2997 // Wrap the iterator in a filter 2998 wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace)) 2999 return wrap, nil 3000 } 3001 3002 // allocNamespaceFilter returns a filter function that filters all allocations 3003 // not in the given namespace. 3004 func allocNamespaceFilter(namespace string) func(interface{}) bool { 3005 return func(raw interface{}) bool { 3006 alloc, ok := raw.(*structs.Allocation) 3007 if !ok { 3008 return true 3009 } 3010 3011 return alloc.Namespace != namespace 3012 } 3013 } 3014 3015 // AllocsByNode returns all the allocations by node 3016 func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 3017 txn := s.db.Txn(false) 3018 3019 // Get an iterator over the node allocations, using only the 3020 // node prefix which ignores the terminal status 3021 iter, err := txn.Get("allocs", "node_prefix", node) 3022 if err != nil { 3023 return nil, err 3024 } 3025 3026 ws.Add(iter.WatchCh()) 3027 3028 var out []*structs.Allocation 3029 for { 3030 raw := iter.Next() 3031 if raw == nil { 3032 break 3033 } 3034 out = append(out, raw.(*structs.Allocation)) 3035 } 3036 return out, nil 3037 } 3038 3039 // AllocsByNode returns all the allocations by node and terminal status 3040 func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) { 3041 txn := s.db.Txn(false) 3042 3043 // Get an iterator over the node allocations 3044 iter, err := txn.Get("allocs", "node", node, terminal) 3045 if err != nil { 3046 return nil, err 3047 } 3048 3049 ws.Add(iter.WatchCh()) 3050 3051 var out []*structs.Allocation 3052 for { 3053 raw := iter.Next() 3054 if raw == nil { 3055 break 3056 } 3057 out = append(out, raw.(*structs.Allocation)) 3058 } 3059 return out, nil 3060 } 3061 3062 // AllocsByJob returns allocations by job id 3063 func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, anyCreateIndex bool) ([]*structs.Allocation, error) { 3064 txn := s.db.Txn(false) 3065 3066 // Get the job 3067 var job *structs.Job 3068 rawJob, err := txn.First("jobs", "id", namespace, jobID) 3069 if err != nil { 3070 return nil, err 3071 } 3072 if rawJob != nil { 3073 job = rawJob.(*structs.Job) 3074 } 3075 3076 // Get an iterator over the node allocations 3077 iter, err := txn.Get("allocs", "job", namespace, jobID) 3078 if err != nil { 3079 return nil, err 3080 } 3081 3082 ws.Add(iter.WatchCh()) 3083 3084 var out []*structs.Allocation 3085 for { 3086 raw := iter.Next() 3087 if raw == nil { 3088 break 3089 } 3090 3091 alloc := raw.(*structs.Allocation) 3092 // If the allocation belongs to a job with the same ID but a different 3093 // create index and we are not getting all the allocations whose Jobs 3094 // matches the same Job ID then we skip it 3095 if !anyCreateIndex && job != nil && alloc.Job.CreateIndex != job.CreateIndex { 3096 continue 3097 } 3098 out = append(out, raw.(*structs.Allocation)) 3099 } 3100 return out, nil 3101 } 3102 3103 // AllocsByEval returns all the allocations by eval id 3104 func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) { 3105 txn := s.db.Txn(false) 3106 3107 // Get an iterator over the eval allocations 3108 iter, err := txn.Get("allocs", "eval", evalID) 3109 if err != nil { 3110 return nil, err 3111 } 3112 3113 ws.Add(iter.WatchCh()) 3114 3115 var out []*structs.Allocation 3116 for { 3117 raw := iter.Next() 3118 if raw == nil { 3119 break 3120 } 3121 out = append(out, raw.(*structs.Allocation)) 3122 } 3123 return out, nil 3124 } 3125 3126 // AllocsByDeployment returns all the allocations by deployment id 3127 func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) { 3128 txn := s.db.Txn(false) 3129 3130 // Get an iterator over the deployments allocations 3131 iter, err := txn.Get("allocs", "deployment", deploymentID) 3132 if err != nil { 3133 return nil, err 3134 } 3135 3136 ws.Add(iter.WatchCh()) 3137 3138 var out []*structs.Allocation 3139 for { 3140 raw := iter.Next() 3141 if raw == nil { 3142 break 3143 } 3144 out = append(out, raw.(*structs.Allocation)) 3145 } 3146 return out, nil 3147 } 3148 3149 // Allocs returns an iterator over all the evaluations 3150 func (s *StateStore) Allocs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3151 txn := s.db.Txn(false) 3152 3153 // Walk the entire table 3154 iter, err := txn.Get("allocs", "id") 3155 if err != nil { 3156 return nil, err 3157 } 3158 3159 ws.Add(iter.WatchCh()) 3160 3161 return iter, nil 3162 } 3163 3164 // AllocsByNamespace returns an iterator over all the allocations in the 3165 // namespace 3166 func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 3167 txn := s.db.Txn(false) 3168 return s.allocsByNamespaceImpl(ws, txn, namespace) 3169 } 3170 3171 // allocsByNamespaceImpl returns an iterator over all the allocations in the 3172 // namespace 3173 func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *memdb.Txn, namespace string) (memdb.ResultIterator, error) { 3174 // Walk the entire table 3175 iter, err := txn.Get("allocs", "namespace", namespace) 3176 if err != nil { 3177 return nil, err 3178 } 3179 3180 ws.Add(iter.WatchCh()) 3181 3182 return iter, nil 3183 } 3184 3185 // UpsertVaultAccessors is used to register a set of Vault Accessors 3186 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 3187 txn := s.db.Txn(true) 3188 defer txn.Abort() 3189 3190 for _, accessor := range accessors { 3191 // Set the create index 3192 accessor.CreateIndex = index 3193 3194 // Insert the accessor 3195 if err := txn.Insert("vault_accessors", accessor); err != nil { 3196 return fmt.Errorf("accessor insert failed: %v", err) 3197 } 3198 } 3199 3200 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 3201 return fmt.Errorf("index update failed: %v", err) 3202 } 3203 3204 txn.Commit() 3205 return nil 3206 } 3207 3208 // DeleteVaultAccessors is used to delete a set of Vault Accessors 3209 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 3210 txn := s.db.Txn(true) 3211 defer txn.Abort() 3212 3213 // Lookup the accessor 3214 for _, accessor := range accessors { 3215 // Delete the accessor 3216 if err := txn.Delete("vault_accessors", accessor); err != nil { 3217 return fmt.Errorf("accessor delete failed: %v", err) 3218 } 3219 } 3220 3221 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 3222 return fmt.Errorf("index update failed: %v", err) 3223 } 3224 3225 txn.Commit() 3226 return nil 3227 } 3228 3229 // VaultAccessor returns the given Vault accessor 3230 func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) { 3231 txn := s.db.Txn(false) 3232 3233 watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor) 3234 if err != nil { 3235 return nil, fmt.Errorf("accessor lookup failed: %v", err) 3236 } 3237 3238 ws.Add(watchCh) 3239 3240 if existing != nil { 3241 return existing.(*structs.VaultAccessor), nil 3242 } 3243 3244 return nil, nil 3245 } 3246 3247 // VaultAccessors returns an iterator of Vault accessors. 3248 func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3249 txn := s.db.Txn(false) 3250 3251 iter, err := txn.Get("vault_accessors", "id") 3252 if err != nil { 3253 return nil, err 3254 } 3255 3256 ws.Add(iter.WatchCh()) 3257 3258 return iter, nil 3259 } 3260 3261 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 3262 func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) { 3263 txn := s.db.Txn(false) 3264 3265 // Get an iterator over the accessors 3266 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 3267 if err != nil { 3268 return nil, err 3269 } 3270 3271 ws.Add(iter.WatchCh()) 3272 3273 var out []*structs.VaultAccessor 3274 for { 3275 raw := iter.Next() 3276 if raw == nil { 3277 break 3278 } 3279 out = append(out, raw.(*structs.VaultAccessor)) 3280 } 3281 return out, nil 3282 } 3283 3284 // VaultAccessorsByNode returns all the Vault accessors by node id 3285 func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) { 3286 txn := s.db.Txn(false) 3287 3288 // Get an iterator over the accessors 3289 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 3290 if err != nil { 3291 return nil, err 3292 } 3293 3294 ws.Add(iter.WatchCh()) 3295 3296 var out []*structs.VaultAccessor 3297 for { 3298 raw := iter.Next() 3299 if raw == nil { 3300 break 3301 } 3302 out = append(out, raw.(*structs.VaultAccessor)) 3303 } 3304 return out, nil 3305 } 3306 3307 func indexEntry(table string, index uint64) *IndexEntry { 3308 return &IndexEntry{ 3309 Key: table, 3310 Value: index, 3311 } 3312 } 3313 3314 const siTokenAccessorTable = "si_token_accessors" 3315 3316 // UpsertSITokenAccessors is used to register a set of Service Identity token accessors. 3317 func (s *StateStore) UpsertSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error { 3318 txn := s.db.Txn(true) 3319 defer txn.Abort() 3320 3321 for _, accessor := range accessors { 3322 // set the create index 3323 accessor.CreateIndex = index 3324 3325 // insert the accessor 3326 if err := txn.Insert(siTokenAccessorTable, accessor); err != nil { 3327 return errors.Wrap(err, "accessor insert failed") 3328 } 3329 } 3330 3331 // update the index for this table 3332 if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil { 3333 return errors.Wrap(err, "index update failed") 3334 } 3335 3336 txn.Commit() 3337 return nil 3338 } 3339 3340 // DeleteSITokenAccessors is used to delete a set of Service Identity token accessors. 3341 func (s *StateStore) DeleteSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error { 3342 txn := s.db.Txn(true) 3343 defer txn.Abort() 3344 3345 // Lookup each accessor 3346 for _, accessor := range accessors { 3347 // Delete the accessor 3348 if err := txn.Delete(siTokenAccessorTable, accessor); err != nil { 3349 return errors.Wrap(err, "accessor delete failed") 3350 } 3351 } 3352 3353 // update the index for this table 3354 if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil { 3355 return errors.Wrap(err, "index update failed") 3356 } 3357 3358 txn.Commit() 3359 return nil 3360 } 3361 3362 // SITokenAccessor returns the given Service Identity token accessor. 3363 func (s *StateStore) SITokenAccessor(ws memdb.WatchSet, accessorID string) (*structs.SITokenAccessor, error) { 3364 txn := s.db.Txn(false) 3365 defer txn.Abort() 3366 3367 watchCh, existing, err := txn.FirstWatch(siTokenAccessorTable, "id", accessorID) 3368 if err != nil { 3369 return nil, errors.Wrap(err, "accessor lookup failed") 3370 } 3371 3372 ws.Add(watchCh) 3373 3374 if existing != nil { 3375 return existing.(*structs.SITokenAccessor), nil 3376 } 3377 3378 return nil, nil 3379 } 3380 3381 // SITokenAccessors returns an iterator of Service Identity token accessors. 3382 func (s *StateStore) SITokenAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3383 txn := s.db.Txn(false) 3384 defer txn.Abort() 3385 3386 iter, err := txn.Get(siTokenAccessorTable, "id") 3387 if err != nil { 3388 return nil, err 3389 } 3390 3391 ws.Add(iter.WatchCh()) 3392 3393 return iter, nil 3394 } 3395 3396 // SITokenAccessorsByAlloc returns all the Service Identity token accessors by alloc ID. 3397 func (s *StateStore) SITokenAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.SITokenAccessor, error) { 3398 txn := s.db.Txn(false) 3399 defer txn.Abort() 3400 3401 // Get an iterator over the accessors 3402 iter, err := txn.Get(siTokenAccessorTable, "alloc_id", allocID) 3403 if err != nil { 3404 return nil, err 3405 } 3406 3407 ws.Add(iter.WatchCh()) 3408 3409 var result []*structs.SITokenAccessor 3410 for raw := iter.Next(); raw != nil; raw = iter.Next() { 3411 result = append(result, raw.(*structs.SITokenAccessor)) 3412 } 3413 3414 return result, nil 3415 } 3416 3417 // SITokenAccessorsByNode returns all the Service Identity token accessors by node ID. 3418 func (s *StateStore) SITokenAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.SITokenAccessor, error) { 3419 txn := s.db.Txn(false) 3420 defer txn.Abort() 3421 3422 // Get an iterator over the accessors 3423 iter, err := txn.Get(siTokenAccessorTable, "node_id", nodeID) 3424 if err != nil { 3425 return nil, err 3426 } 3427 3428 ws.Add(iter.WatchCh()) 3429 3430 var result []*structs.SITokenAccessor 3431 for raw := iter.Next(); raw != nil; raw = iter.Next() { 3432 result = append(result, raw.(*structs.SITokenAccessor)) 3433 } 3434 3435 return result, nil 3436 } 3437 3438 // UpdateDeploymentStatus is used to make deployment status updates and 3439 // potentially make a evaluation 3440 func (s *StateStore) UpdateDeploymentStatus(index uint64, req *structs.DeploymentStatusUpdateRequest) error { 3441 txn := s.db.Txn(true) 3442 defer txn.Abort() 3443 3444 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 3445 return err 3446 } 3447 3448 // Upsert the job if necessary 3449 if req.Job != nil { 3450 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 3451 return err 3452 } 3453 } 3454 3455 // Upsert the optional eval 3456 if req.Eval != nil { 3457 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 3458 return err 3459 } 3460 } 3461 3462 txn.Commit() 3463 return nil 3464 } 3465 3466 // updateDeploymentStatusImpl is used to make deployment status updates 3467 func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *memdb.Txn) error { 3468 // Retrieve deployment 3469 ws := memdb.NewWatchSet() 3470 deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn) 3471 if err != nil { 3472 return err 3473 } else if deployment == nil { 3474 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID) 3475 } else if !deployment.Active() { 3476 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 3477 } 3478 3479 // Apply the new status 3480 copy := deployment.Copy() 3481 copy.Status = u.Status 3482 copy.StatusDescription = u.StatusDescription 3483 copy.ModifyIndex = index 3484 3485 // Insert the deployment 3486 if err := txn.Insert("deployment", copy); err != nil { 3487 return err 3488 } 3489 3490 // Update the index 3491 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 3492 return fmt.Errorf("index update failed: %v", err) 3493 } 3494 3495 // If the deployment is being marked as complete, set the job to stable. 3496 if copy.Status == structs.DeploymentStatusSuccessful { 3497 if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil { 3498 return fmt.Errorf("failed to update job stability: %v", err) 3499 } 3500 } 3501 3502 return nil 3503 } 3504 3505 // UpdateJobStability updates the stability of the given job and version to the 3506 // desired status. 3507 func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error { 3508 txn := s.db.Txn(true) 3509 defer txn.Abort() 3510 3511 if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil { 3512 return err 3513 } 3514 3515 txn.Commit() 3516 return nil 3517 } 3518 3519 // updateJobStabilityImpl updates the stability of the given job and version 3520 func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *memdb.Txn) error { 3521 // Get the job that is referenced 3522 job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn) 3523 if err != nil { 3524 return err 3525 } 3526 3527 // Has already been cleared, nothing to do 3528 if job == nil { 3529 return nil 3530 } 3531 3532 // If the job already has the desired stability, nothing to do 3533 if job.Stable == stable { 3534 return nil 3535 } 3536 3537 copy := job.Copy() 3538 copy.Stable = stable 3539 return s.upsertJobImpl(index, copy, true, txn) 3540 } 3541 3542 // UpdateDeploymentPromotion is used to promote canaries in a deployment and 3543 // potentially make a evaluation 3544 func (s *StateStore) UpdateDeploymentPromotion(index uint64, req *structs.ApplyDeploymentPromoteRequest) error { 3545 txn := s.db.Txn(true) 3546 defer txn.Abort() 3547 3548 // Retrieve deployment and ensure it is not terminal and is active 3549 ws := memdb.NewWatchSet() 3550 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 3551 if err != nil { 3552 return err 3553 } else if deployment == nil { 3554 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 3555 } else if !deployment.Active() { 3556 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 3557 } 3558 3559 // Retrieve effected allocations 3560 iter, err := txn.Get("allocs", "deployment", req.DeploymentID) 3561 if err != nil { 3562 return err 3563 } 3564 3565 // groupIndex is a map of groups being promoted 3566 groupIndex := make(map[string]struct{}, len(req.Groups)) 3567 for _, g := range req.Groups { 3568 groupIndex[g] = struct{}{} 3569 } 3570 3571 // canaryIndex is the set of placed canaries in the deployment 3572 canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups)) 3573 for _, state := range deployment.TaskGroups { 3574 for _, c := range state.PlacedCanaries { 3575 canaryIndex[c] = struct{}{} 3576 } 3577 } 3578 3579 // healthyCounts is a mapping of group to the number of healthy canaries 3580 healthyCounts := make(map[string]int, len(deployment.TaskGroups)) 3581 3582 // promotable is the set of allocations that we can move from canary to 3583 // non-canary 3584 var promotable []*structs.Allocation 3585 3586 for { 3587 raw := iter.Next() 3588 if raw == nil { 3589 break 3590 } 3591 3592 alloc := raw.(*structs.Allocation) 3593 3594 // Check that the alloc is a canary 3595 if _, ok := canaryIndex[alloc.ID]; !ok { 3596 continue 3597 } 3598 3599 // Check that the canary is part of a group being promoted 3600 if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok { 3601 continue 3602 } 3603 3604 // Ensure the canaries are healthy 3605 if alloc.TerminalStatus() || !alloc.DeploymentStatus.IsHealthy() { 3606 continue 3607 } 3608 3609 healthyCounts[alloc.TaskGroup]++ 3610 promotable = append(promotable, alloc) 3611 } 3612 3613 // Determine if we have enough healthy allocations 3614 var unhealthyErr multierror.Error 3615 for tg, state := range deployment.TaskGroups { 3616 if _, ok := groupIndex[tg]; !req.All && !ok { 3617 continue 3618 } 3619 3620 need := state.DesiredCanaries 3621 if need == 0 { 3622 continue 3623 } 3624 3625 if have := healthyCounts[tg]; have < need { 3626 multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need)) 3627 } 3628 } 3629 3630 if err := unhealthyErr.ErrorOrNil(); err != nil { 3631 return err 3632 } 3633 3634 // Update deployment 3635 copy := deployment.Copy() 3636 copy.ModifyIndex = index 3637 for tg, status := range copy.TaskGroups { 3638 _, ok := groupIndex[tg] 3639 if !req.All && !ok { 3640 continue 3641 } 3642 3643 status.Promoted = true 3644 } 3645 3646 // If the deployment no longer needs promotion, update its status 3647 if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning { 3648 copy.StatusDescription = structs.DeploymentStatusDescriptionRunning 3649 } 3650 3651 // Insert the deployment 3652 if err := s.upsertDeploymentImpl(index, copy, txn); err != nil { 3653 return err 3654 } 3655 3656 // Upsert the optional eval 3657 if req.Eval != nil { 3658 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 3659 return err 3660 } 3661 } 3662 3663 // For each promotable allocation remove the canary field 3664 for _, alloc := range promotable { 3665 promoted := alloc.Copy() 3666 promoted.DeploymentStatus.Canary = false 3667 promoted.DeploymentStatus.ModifyIndex = index 3668 promoted.ModifyIndex = index 3669 promoted.AllocModifyIndex = index 3670 3671 if err := txn.Insert("allocs", promoted); err != nil { 3672 return fmt.Errorf("alloc insert failed: %v", err) 3673 } 3674 } 3675 3676 // Update the alloc index 3677 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3678 return fmt.Errorf("index update failed: %v", err) 3679 } 3680 3681 txn.Commit() 3682 return nil 3683 } 3684 3685 // UpdateDeploymentAllocHealth is used to update the health of allocations as 3686 // part of the deployment and potentially make a evaluation 3687 func (s *StateStore) UpdateDeploymentAllocHealth(index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error { 3688 txn := s.db.Txn(true) 3689 defer txn.Abort() 3690 3691 // Retrieve deployment and ensure it is not terminal and is active 3692 ws := memdb.NewWatchSet() 3693 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 3694 if err != nil { 3695 return err 3696 } else if deployment == nil { 3697 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 3698 } else if !deployment.Active() { 3699 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 3700 } 3701 3702 // Update the health status of each allocation 3703 if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 { 3704 setAllocHealth := func(id string, healthy bool, ts time.Time) error { 3705 existing, err := txn.First("allocs", "id", id) 3706 if err != nil { 3707 return fmt.Errorf("alloc %q lookup failed: %v", id, err) 3708 } 3709 if existing == nil { 3710 return fmt.Errorf("unknown alloc %q", id) 3711 } 3712 3713 old := existing.(*structs.Allocation) 3714 if old.DeploymentID != req.DeploymentID { 3715 return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID) 3716 } 3717 3718 // Set the health 3719 copy := old.Copy() 3720 if copy.DeploymentStatus == nil { 3721 copy.DeploymentStatus = &structs.AllocDeploymentStatus{} 3722 } 3723 copy.DeploymentStatus.Healthy = helper.BoolToPtr(healthy) 3724 copy.DeploymentStatus.Timestamp = ts 3725 copy.DeploymentStatus.ModifyIndex = index 3726 copy.ModifyIndex = index 3727 3728 if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil { 3729 return fmt.Errorf("error updating deployment: %v", err) 3730 } 3731 3732 if err := txn.Insert("allocs", copy); err != nil { 3733 return fmt.Errorf("alloc insert failed: %v", err) 3734 } 3735 3736 return nil 3737 } 3738 3739 for _, id := range req.HealthyAllocationIDs { 3740 if err := setAllocHealth(id, true, req.Timestamp); err != nil { 3741 return err 3742 } 3743 } 3744 for _, id := range req.UnhealthyAllocationIDs { 3745 if err := setAllocHealth(id, false, req.Timestamp); err != nil { 3746 return err 3747 } 3748 } 3749 3750 // Update the indexes 3751 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3752 return fmt.Errorf("index update failed: %v", err) 3753 } 3754 } 3755 3756 // Update the deployment status as needed. 3757 if req.DeploymentUpdate != nil { 3758 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 3759 return err 3760 } 3761 } 3762 3763 // Upsert the job if necessary 3764 if req.Job != nil { 3765 if err := s.upsertJobImpl(index, req.Job, false, txn); err != nil { 3766 return err 3767 } 3768 } 3769 3770 // Upsert the optional eval 3771 if req.Eval != nil { 3772 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 3773 return err 3774 } 3775 } 3776 3777 txn.Commit() 3778 return nil 3779 } 3780 3781 // LastIndex returns the greatest index value for all indexes 3782 func (s *StateStore) LatestIndex() (uint64, error) { 3783 indexes, err := s.Indexes() 3784 if err != nil { 3785 return 0, err 3786 } 3787 3788 var max uint64 = 0 3789 for { 3790 raw := indexes.Next() 3791 if raw == nil { 3792 break 3793 } 3794 3795 // Prepare the request struct 3796 idx := raw.(*IndexEntry) 3797 3798 // Determine the max 3799 if idx.Value > max { 3800 max = idx.Value 3801 } 3802 } 3803 3804 return max, nil 3805 } 3806 3807 // Index finds the matching index value 3808 func (s *StateStore) Index(name string) (uint64, error) { 3809 txn := s.db.Txn(false) 3810 3811 // Lookup the first matching index 3812 out, err := txn.First("index", "id", name) 3813 if err != nil { 3814 return 0, err 3815 } 3816 if out == nil { 3817 return 0, nil 3818 } 3819 return out.(*IndexEntry).Value, nil 3820 } 3821 3822 // RemoveIndex is a helper method to remove an index for testing purposes 3823 func (s *StateStore) RemoveIndex(name string) error { 3824 txn := s.db.Txn(true) 3825 defer txn.Abort() 3826 3827 if _, err := txn.DeleteAll("index", "id", name); err != nil { 3828 return err 3829 } 3830 3831 txn.Commit() 3832 return nil 3833 } 3834 3835 // Indexes returns an iterator over all the indexes 3836 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 3837 txn := s.db.Txn(false) 3838 3839 // Walk the entire nodes table 3840 iter, err := txn.Get("index", "id") 3841 if err != nil { 3842 return nil, err 3843 } 3844 return iter, nil 3845 } 3846 3847 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 3848 // store 3849 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 3850 txn := s.db.Txn(true) 3851 defer txn.Abort() 3852 3853 // Get all the jobs 3854 iter, err := txn.Get("jobs", "id") 3855 if err != nil { 3856 return err 3857 } 3858 // COMPAT: Remove after 0.11 3859 // Iterate over jobs to build a list of parent jobs and their children 3860 parentMap := make(map[string][]*structs.Job) 3861 for { 3862 rawJob := iter.Next() 3863 if rawJob == nil { 3864 break 3865 } 3866 job := rawJob.(*structs.Job) 3867 if job.ParentID != "" { 3868 children := parentMap[job.ParentID] 3869 children = append(children, job) 3870 parentMap[job.ParentID] = children 3871 } 3872 } 3873 3874 // Get all the jobs again 3875 iter, err = txn.Get("jobs", "id") 3876 if err != nil { 3877 return err 3878 } 3879 3880 for { 3881 rawJob := iter.Next() 3882 if rawJob == nil { 3883 break 3884 } 3885 job := rawJob.(*structs.Job) 3886 3887 if job.IsParameterized() || job.IsPeriodic() { 3888 // COMPAT: Remove after 0.11 3889 3890 // The following block of code fixes incorrect child summaries due to a bug 3891 // See https://github.com/hashicorp/nomad/issues/3886 for details 3892 rawSummary, err := txn.First("job_summary", "id", job.Namespace, job.ID) 3893 if err != nil { 3894 return err 3895 } 3896 if rawSummary == nil { 3897 continue 3898 } 3899 3900 oldSummary := rawSummary.(*structs.JobSummary) 3901 3902 // Create an empty summary 3903 summary := &structs.JobSummary{ 3904 JobID: job.ID, 3905 Namespace: job.Namespace, 3906 Summary: make(map[string]structs.TaskGroupSummary), 3907 Children: &structs.JobChildrenSummary{}, 3908 } 3909 3910 // Iterate over children of this job if any to fix summary counts 3911 children := parentMap[job.ID] 3912 for _, childJob := range children { 3913 switch childJob.Status { 3914 case structs.JobStatusPending: 3915 summary.Children.Pending++ 3916 case structs.JobStatusDead: 3917 summary.Children.Dead++ 3918 case structs.JobStatusRunning: 3919 summary.Children.Running++ 3920 } 3921 } 3922 3923 // Insert the job summary if its different 3924 if !reflect.DeepEqual(summary, oldSummary) { 3925 // Set the create index of the summary same as the job's create index 3926 // and the modify index to the current index 3927 summary.CreateIndex = job.CreateIndex 3928 summary.ModifyIndex = index 3929 3930 if err := txn.Insert("job_summary", summary); err != nil { 3931 return fmt.Errorf("error inserting job summary: %v", err) 3932 } 3933 } 3934 3935 // Done with handling a parent job, continue to next 3936 continue 3937 } 3938 3939 // Create a job summary for the job 3940 summary := &structs.JobSummary{ 3941 JobID: job.ID, 3942 Namespace: job.Namespace, 3943 Summary: make(map[string]structs.TaskGroupSummary), 3944 } 3945 for _, tg := range job.TaskGroups { 3946 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 3947 } 3948 3949 // Find all the allocations for the jobs 3950 iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 3951 if err != nil { 3952 return err 3953 } 3954 3955 // Calculate the summary for the job 3956 for { 3957 rawAlloc := iterAllocs.Next() 3958 if rawAlloc == nil { 3959 break 3960 } 3961 alloc := rawAlloc.(*structs.Allocation) 3962 3963 // Ignore the allocation if it doesn't belong to the currently 3964 // registered job. The allocation is checked because of issue #2304 3965 if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex { 3966 continue 3967 } 3968 3969 tg := summary.Summary[alloc.TaskGroup] 3970 switch alloc.ClientStatus { 3971 case structs.AllocClientStatusFailed: 3972 tg.Failed += 1 3973 case structs.AllocClientStatusLost: 3974 tg.Lost += 1 3975 case structs.AllocClientStatusComplete: 3976 tg.Complete += 1 3977 case structs.AllocClientStatusRunning: 3978 tg.Running += 1 3979 case structs.AllocClientStatusPending: 3980 tg.Starting += 1 3981 default: 3982 s.logger.Error("invalid client status set on allocation", "client_status", alloc.ClientStatus, "alloc_id", alloc.ID) 3983 } 3984 summary.Summary[alloc.TaskGroup] = tg 3985 } 3986 3987 // Set the create index of the summary same as the job's create index 3988 // and the modify index to the current index 3989 summary.CreateIndex = job.CreateIndex 3990 summary.ModifyIndex = index 3991 3992 // Insert the job summary 3993 if err := txn.Insert("job_summary", summary); err != nil { 3994 return fmt.Errorf("error inserting job summary: %v", err) 3995 } 3996 } 3997 3998 // Update the indexes table for job summary 3999 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4000 return fmt.Errorf("index update failed: %v", err) 4001 } 4002 txn.Commit() 4003 return nil 4004 } 4005 4006 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 4007 // It takes a map of job IDs to an optional forceStatus string. It returns an 4008 // error if the job doesn't exist or setJobStatus fails. 4009 func (s *StateStore) setJobStatuses(index uint64, txn *memdb.Txn, 4010 jobs map[structs.NamespacedID]string, evalDelete bool) error { 4011 for tuple, forceStatus := range jobs { 4012 4013 existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID) 4014 if err != nil { 4015 return fmt.Errorf("job lookup failed: %v", err) 4016 } 4017 4018 if existing == nil { 4019 continue 4020 } 4021 4022 if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 4023 return err 4024 } 4025 } 4026 4027 return nil 4028 } 4029 4030 // setJobStatus sets the status of the job by looking up associated evaluations 4031 // and allocations. evalDelete should be set to true if setJobStatus is being 4032 // called because an evaluation is being deleted (potentially because of garbage 4033 // collection). If forceStatus is non-empty, the job's status will be set to the 4034 // passed status. 4035 func (s *StateStore) setJobStatus(index uint64, txn *memdb.Txn, 4036 job *structs.Job, evalDelete bool, forceStatus string) error { 4037 4038 // Capture the current status so we can check if there is a change 4039 oldStatus := job.Status 4040 if index == job.CreateIndex { 4041 oldStatus = "" 4042 } 4043 newStatus := forceStatus 4044 4045 // If forceStatus is not set, compute the jobs status. 4046 if forceStatus == "" { 4047 var err error 4048 newStatus, err = s.getJobStatus(txn, job, evalDelete) 4049 if err != nil { 4050 return err 4051 } 4052 } 4053 4054 // Fast-path if nothing has changed. 4055 if oldStatus == newStatus { 4056 return nil 4057 } 4058 4059 // Copy and update the existing job 4060 updated := job.Copy() 4061 updated.Status = newStatus 4062 updated.ModifyIndex = index 4063 4064 // Insert the job 4065 if err := txn.Insert("jobs", updated); err != nil { 4066 return fmt.Errorf("job insert failed: %v", err) 4067 } 4068 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 4069 return fmt.Errorf("index update failed: %v", err) 4070 } 4071 4072 // Update the children summary 4073 if updated.ParentID != "" { 4074 // Try to update the summary of the parent job summary 4075 summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID) 4076 if err != nil { 4077 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 4078 } 4079 4080 // Only continue if the summary exists. It could not exist if the parent 4081 // job was removed 4082 if summaryRaw != nil { 4083 existing := summaryRaw.(*structs.JobSummary) 4084 pSummary := existing.Copy() 4085 if pSummary.Children == nil { 4086 pSummary.Children = new(structs.JobChildrenSummary) 4087 } 4088 4089 // Determine the transition and update the correct fields 4090 children := pSummary.Children 4091 4092 // Decrement old status 4093 if oldStatus != "" { 4094 switch oldStatus { 4095 case structs.JobStatusPending: 4096 children.Pending-- 4097 case structs.JobStatusRunning: 4098 children.Running-- 4099 case structs.JobStatusDead: 4100 children.Dead-- 4101 default: 4102 return fmt.Errorf("unknown old job status %q", oldStatus) 4103 } 4104 } 4105 4106 // Increment new status 4107 switch newStatus { 4108 case structs.JobStatusPending: 4109 children.Pending++ 4110 case structs.JobStatusRunning: 4111 children.Running++ 4112 case structs.JobStatusDead: 4113 children.Dead++ 4114 default: 4115 return fmt.Errorf("unknown new job status %q", newStatus) 4116 } 4117 4118 // Update the index 4119 pSummary.ModifyIndex = index 4120 4121 // Insert the summary 4122 if err := txn.Insert("job_summary", pSummary); err != nil { 4123 return fmt.Errorf("job summary insert failed: %v", err) 4124 } 4125 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4126 return fmt.Errorf("index update failed: %v", err) 4127 } 4128 } 4129 } 4130 4131 return nil 4132 } 4133 4134 func (s *StateStore) getJobStatus(txn *memdb.Txn, job *structs.Job, evalDelete bool) (string, error) { 4135 // System, Periodic and Parameterized jobs are running until explicitly 4136 // stopped 4137 if job.Type == structs.JobTypeSystem || job.IsParameterized() || job.IsPeriodic() { 4138 if job.Stop { 4139 return structs.JobStatusDead, nil 4140 } 4141 4142 return structs.JobStatusRunning, nil 4143 } 4144 4145 allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 4146 if err != nil { 4147 return "", err 4148 } 4149 4150 // If there is a non-terminal allocation, the job is running. 4151 hasAlloc := false 4152 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 4153 hasAlloc = true 4154 if !alloc.(*structs.Allocation).TerminalStatus() { 4155 return structs.JobStatusRunning, nil 4156 } 4157 } 4158 4159 evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID) 4160 if err != nil { 4161 return "", err 4162 } 4163 4164 hasEval := false 4165 for raw := evals.Next(); raw != nil; raw = evals.Next() { 4166 e := raw.(*structs.Evaluation) 4167 4168 // Filter non-exact matches 4169 if e.JobID != job.ID { 4170 continue 4171 } 4172 4173 hasEval = true 4174 if !e.TerminalStatus() { 4175 return structs.JobStatusPending, nil 4176 } 4177 } 4178 4179 // The job is dead if all the allocations and evals are terminal or if there 4180 // are no evals because of garbage collection. 4181 if evalDelete || hasEval || hasAlloc { 4182 return structs.JobStatusDead, nil 4183 } 4184 4185 return structs.JobStatusPending, nil 4186 } 4187 4188 // updateSummaryWithJob creates or updates job summaries when new jobs are 4189 // upserted or existing ones are updated 4190 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 4191 txn *memdb.Txn) error { 4192 4193 // Update the job summary 4194 summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID) 4195 if err != nil { 4196 return fmt.Errorf("job summary lookup failed: %v", err) 4197 } 4198 4199 // Get the summary or create if necessary 4200 var summary *structs.JobSummary 4201 hasSummaryChanged := false 4202 if summaryRaw != nil { 4203 summary = summaryRaw.(*structs.JobSummary).Copy() 4204 } else { 4205 summary = &structs.JobSummary{ 4206 JobID: job.ID, 4207 Namespace: job.Namespace, 4208 Summary: make(map[string]structs.TaskGroupSummary), 4209 Children: new(structs.JobChildrenSummary), 4210 CreateIndex: index, 4211 } 4212 hasSummaryChanged = true 4213 } 4214 4215 for _, tg := range job.TaskGroups { 4216 if _, ok := summary.Summary[tg.Name]; !ok { 4217 newSummary := structs.TaskGroupSummary{ 4218 Complete: 0, 4219 Failed: 0, 4220 Running: 0, 4221 Starting: 0, 4222 } 4223 summary.Summary[tg.Name] = newSummary 4224 hasSummaryChanged = true 4225 } 4226 } 4227 4228 // The job summary has changed, so update the modify index. 4229 if hasSummaryChanged { 4230 summary.ModifyIndex = index 4231 4232 // Update the indexes table for job summary 4233 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4234 return fmt.Errorf("index update failed: %v", err) 4235 } 4236 if err := txn.Insert("job_summary", summary); err != nil { 4237 return err 4238 } 4239 } 4240 4241 return nil 4242 } 4243 4244 // updateJobScalingPolicies upserts any scaling policies contained in the job and removes 4245 // any previous scaling policies that were removed from the job 4246 func (s *StateStore) updateJobScalingPolicies(index uint64, job *structs.Job, txn *memdb.Txn) error { 4247 4248 ws := memdb.NewWatchSet() 4249 4250 scalingPolicies := job.GetScalingPolicies() 4251 newTargets := map[string]struct{}{} 4252 for _, p := range scalingPolicies { 4253 newTargets[p.Target[structs.ScalingTargetGroup]] = struct{}{} 4254 } 4255 // find existing policies that need to be deleted 4256 deletedPolicies := []string{} 4257 iter, err := s.ScalingPoliciesByJobTxn(ws, job.Namespace, job.ID, txn) 4258 if err != nil { 4259 return fmt.Errorf("ScalingPoliciesByJob lookup failed: %v", err) 4260 } 4261 for { 4262 raw := iter.Next() 4263 if raw == nil { 4264 break 4265 } 4266 oldPolicy := raw.(*structs.ScalingPolicy) 4267 if _, ok := newTargets[oldPolicy.Target[structs.ScalingTargetGroup]]; !ok { 4268 deletedPolicies = append(deletedPolicies, oldPolicy.ID) 4269 } 4270 } 4271 err = s.DeleteScalingPoliciesTxn(index, deletedPolicies, txn) 4272 if err != nil { 4273 return fmt.Errorf("DeleteScalingPolicies of removed policies failed: %v", err) 4274 } 4275 4276 err = s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn) 4277 if err != nil { 4278 return fmt.Errorf("UpsertScalingPolicies of policies failed: %v", err) 4279 } 4280 4281 return nil 4282 } 4283 4284 // updateDeploymentWithAlloc is used to update the deployment state associated 4285 // with the given allocation. The passed alloc may be updated if the deployment 4286 // status has changed to capture the modify index at which it has changed. 4287 func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *memdb.Txn) error { 4288 // Nothing to do if the allocation is not associated with a deployment 4289 if alloc.DeploymentID == "" { 4290 return nil 4291 } 4292 4293 // Get the deployment 4294 ws := memdb.NewWatchSet() 4295 deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn) 4296 if err != nil { 4297 return err 4298 } 4299 if deployment == nil { 4300 return nil 4301 } 4302 4303 // Retrieve the deployment state object 4304 _, ok := deployment.TaskGroups[alloc.TaskGroup] 4305 if !ok { 4306 // If the task group isn't part of the deployment, the task group wasn't 4307 // part of a rolling update so nothing to do 4308 return nil 4309 } 4310 4311 // Do not modify in-place. Instead keep track of what must be done 4312 placed := 0 4313 healthy := 0 4314 unhealthy := 0 4315 4316 // If there was no existing allocation, this is a placement and we increment 4317 // the placement 4318 existingHealthSet := existing != nil && existing.DeploymentStatus.HasHealth() 4319 allocHealthSet := alloc.DeploymentStatus.HasHealth() 4320 if existing == nil || existing.DeploymentID != alloc.DeploymentID { 4321 placed++ 4322 } else if !existingHealthSet && allocHealthSet { 4323 if *alloc.DeploymentStatus.Healthy { 4324 healthy++ 4325 } else { 4326 unhealthy++ 4327 } 4328 } else if existingHealthSet && allocHealthSet { 4329 // See if it has gone from healthy to unhealthy 4330 if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy { 4331 healthy-- 4332 unhealthy++ 4333 } 4334 } 4335 4336 // Nothing to do 4337 if placed == 0 && healthy == 0 && unhealthy == 0 { 4338 return nil 4339 } 4340 4341 // Update the allocation's deployment status modify index 4342 if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 { 4343 alloc.DeploymentStatus.ModifyIndex = index 4344 } 4345 4346 // Create a copy of the deployment object 4347 deploymentCopy := deployment.Copy() 4348 deploymentCopy.ModifyIndex = index 4349 4350 state := deploymentCopy.TaskGroups[alloc.TaskGroup] 4351 state.PlacedAllocs += placed 4352 state.HealthyAllocs += healthy 4353 state.UnhealthyAllocs += unhealthy 4354 4355 // Ensure PlacedCanaries accurately reflects the alloc canary status 4356 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary { 4357 found := false 4358 for _, canary := range state.PlacedCanaries { 4359 if alloc.ID == canary { 4360 found = true 4361 break 4362 } 4363 } 4364 if !found { 4365 state.PlacedCanaries = append(state.PlacedCanaries, alloc.ID) 4366 } 4367 } 4368 4369 // Update the progress deadline 4370 if pd := state.ProgressDeadline; pd != 0 { 4371 // If we are the first placed allocation for the deployment start the progress deadline. 4372 if placed != 0 && state.RequireProgressBy.IsZero() { 4373 // Use modify time instead of create time because we may in-place 4374 // update the allocation to be part of a new deployment. 4375 state.RequireProgressBy = time.Unix(0, alloc.ModifyTime).Add(pd) 4376 } else if healthy != 0 { 4377 if d := alloc.DeploymentStatus.Timestamp.Add(pd); d.After(state.RequireProgressBy) { 4378 state.RequireProgressBy = d 4379 } 4380 } 4381 } 4382 4383 // Upsert the deployment 4384 if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil { 4385 return err 4386 } 4387 4388 return nil 4389 } 4390 4391 // updateSummaryWithAlloc updates the job summary when allocations are updated 4392 // or inserted 4393 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 4394 existingAlloc *structs.Allocation, txn *memdb.Txn) error { 4395 4396 // We don't have to update the summary if the job is missing 4397 if alloc.Job == nil { 4398 return nil 4399 } 4400 4401 summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID) 4402 if err != nil { 4403 return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err) 4404 } 4405 4406 if summaryRaw == nil { 4407 // Check if the job is de-registered 4408 rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID) 4409 if err != nil { 4410 return fmt.Errorf("unable to query job: %v", err) 4411 } 4412 4413 // If the job is de-registered then we skip updating it's summary 4414 if rawJob == nil { 4415 return nil 4416 } 4417 4418 return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace) 4419 } 4420 4421 // Get a copy of the existing summary 4422 jobSummary := summaryRaw.(*structs.JobSummary).Copy() 4423 4424 // Not updating the job summary because the allocation doesn't belong to the 4425 // currently registered job 4426 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 4427 return nil 4428 } 4429 4430 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 4431 if !ok { 4432 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 4433 } 4434 4435 summaryChanged := false 4436 if existingAlloc == nil { 4437 switch alloc.DesiredStatus { 4438 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 4439 s.logger.Error("new allocation inserted into state store with bad desired status", 4440 "alloc_id", alloc.ID, "desired_status", alloc.DesiredStatus) 4441 } 4442 switch alloc.ClientStatus { 4443 case structs.AllocClientStatusPending: 4444 tgSummary.Starting += 1 4445 if tgSummary.Queued > 0 { 4446 tgSummary.Queued -= 1 4447 } 4448 summaryChanged = true 4449 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 4450 structs.AllocClientStatusComplete: 4451 s.logger.Error("new allocation inserted into state store with bad client status", 4452 "alloc_id", alloc.ID, "client_status", alloc.ClientStatus) 4453 } 4454 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 4455 // Incrementing the client of the bin of the current state 4456 switch alloc.ClientStatus { 4457 case structs.AllocClientStatusRunning: 4458 tgSummary.Running += 1 4459 case structs.AllocClientStatusFailed: 4460 tgSummary.Failed += 1 4461 case structs.AllocClientStatusPending: 4462 tgSummary.Starting += 1 4463 case structs.AllocClientStatusComplete: 4464 tgSummary.Complete += 1 4465 case structs.AllocClientStatusLost: 4466 tgSummary.Lost += 1 4467 } 4468 4469 // Decrementing the count of the bin of the last state 4470 switch existingAlloc.ClientStatus { 4471 case structs.AllocClientStatusRunning: 4472 if tgSummary.Running > 0 { 4473 tgSummary.Running -= 1 4474 } 4475 case structs.AllocClientStatusPending: 4476 if tgSummary.Starting > 0 { 4477 tgSummary.Starting -= 1 4478 } 4479 case structs.AllocClientStatusLost: 4480 if tgSummary.Lost > 0 { 4481 tgSummary.Lost -= 1 4482 } 4483 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 4484 default: 4485 s.logger.Error("invalid old client status for allocatio", 4486 "alloc_id", existingAlloc.ID, "client_status", existingAlloc.ClientStatus) 4487 } 4488 summaryChanged = true 4489 } 4490 jobSummary.Summary[alloc.TaskGroup] = tgSummary 4491 4492 if summaryChanged { 4493 jobSummary.ModifyIndex = index 4494 4495 // Update the indexes table for job summary 4496 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 4497 return fmt.Errorf("index update failed: %v", err) 4498 } 4499 4500 if err := txn.Insert("job_summary", jobSummary); err != nil { 4501 return fmt.Errorf("updating job summary failed: %v", err) 4502 } 4503 } 4504 4505 return nil 4506 } 4507 4508 // UpsertACLPolicies is used to create or update a set of ACL policies 4509 func (s *StateStore) UpsertACLPolicies(index uint64, policies []*structs.ACLPolicy) error { 4510 txn := s.db.Txn(true) 4511 defer txn.Abort() 4512 4513 for _, policy := range policies { 4514 // Ensure the policy hash is non-nil. This should be done outside the state store 4515 // for performance reasons, but we check here for defense in depth. 4516 if len(policy.Hash) == 0 { 4517 policy.SetHash() 4518 } 4519 4520 // Check if the policy already exists 4521 existing, err := txn.First("acl_policy", "id", policy.Name) 4522 if err != nil { 4523 return fmt.Errorf("policy lookup failed: %v", err) 4524 } 4525 4526 // Update all the indexes 4527 if existing != nil { 4528 policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex 4529 policy.ModifyIndex = index 4530 } else { 4531 policy.CreateIndex = index 4532 policy.ModifyIndex = index 4533 } 4534 4535 // Update the policy 4536 if err := txn.Insert("acl_policy", policy); err != nil { 4537 return fmt.Errorf("upserting policy failed: %v", err) 4538 } 4539 } 4540 4541 // Update the indexes tabl 4542 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 4543 return fmt.Errorf("index update failed: %v", err) 4544 } 4545 4546 txn.Commit() 4547 return nil 4548 } 4549 4550 // DeleteACLPolicies deletes the policies with the given names 4551 func (s *StateStore) DeleteACLPolicies(index uint64, names []string) error { 4552 txn := s.db.Txn(true) 4553 defer txn.Abort() 4554 4555 // Delete the policy 4556 for _, name := range names { 4557 if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil { 4558 return fmt.Errorf("deleting acl policy failed: %v", err) 4559 } 4560 } 4561 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 4562 return fmt.Errorf("index update failed: %v", err) 4563 } 4564 txn.Commit() 4565 return nil 4566 } 4567 4568 // ACLPolicyByName is used to lookup a policy by name 4569 func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) { 4570 txn := s.db.Txn(false) 4571 4572 watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name) 4573 if err != nil { 4574 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 4575 } 4576 ws.Add(watchCh) 4577 4578 if existing != nil { 4579 return existing.(*structs.ACLPolicy), nil 4580 } 4581 return nil, nil 4582 } 4583 4584 // ACLPolicyByNamePrefix is used to lookup policies by prefix 4585 func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 4586 txn := s.db.Txn(false) 4587 4588 iter, err := txn.Get("acl_policy", "id_prefix", prefix) 4589 if err != nil { 4590 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 4591 } 4592 ws.Add(iter.WatchCh()) 4593 4594 return iter, nil 4595 } 4596 4597 // ACLPolicies returns an iterator over all the acl policies 4598 func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 4599 txn := s.db.Txn(false) 4600 4601 // Walk the entire table 4602 iter, err := txn.Get("acl_policy", "id") 4603 if err != nil { 4604 return nil, err 4605 } 4606 ws.Add(iter.WatchCh()) 4607 return iter, nil 4608 } 4609 4610 // UpsertACLTokens is used to create or update a set of ACL tokens 4611 func (s *StateStore) UpsertACLTokens(index uint64, tokens []*structs.ACLToken) error { 4612 txn := s.db.Txn(true) 4613 defer txn.Abort() 4614 4615 for _, token := range tokens { 4616 // Ensure the policy hash is non-nil. This should be done outside the state store 4617 // for performance reasons, but we check here for defense in depth. 4618 if len(token.Hash) == 0 { 4619 token.SetHash() 4620 } 4621 4622 // Check if the token already exists 4623 existing, err := txn.First("acl_token", "id", token.AccessorID) 4624 if err != nil { 4625 return fmt.Errorf("token lookup failed: %v", err) 4626 } 4627 4628 // Update all the indexes 4629 if existing != nil { 4630 existTK := existing.(*structs.ACLToken) 4631 token.CreateIndex = existTK.CreateIndex 4632 token.ModifyIndex = index 4633 4634 // Do not allow SecretID or create time to change 4635 token.SecretID = existTK.SecretID 4636 token.CreateTime = existTK.CreateTime 4637 4638 } else { 4639 token.CreateIndex = index 4640 token.ModifyIndex = index 4641 } 4642 4643 // Update the token 4644 if err := txn.Insert("acl_token", token); err != nil { 4645 return fmt.Errorf("upserting token failed: %v", err) 4646 } 4647 } 4648 4649 // Update the indexes table 4650 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 4651 return fmt.Errorf("index update failed: %v", err) 4652 } 4653 txn.Commit() 4654 return nil 4655 } 4656 4657 // DeleteACLTokens deletes the tokens with the given accessor ids 4658 func (s *StateStore) DeleteACLTokens(index uint64, ids []string) error { 4659 txn := s.db.Txn(true) 4660 defer txn.Abort() 4661 4662 // Delete the tokens 4663 for _, id := range ids { 4664 if _, err := txn.DeleteAll("acl_token", "id", id); err != nil { 4665 return fmt.Errorf("deleting acl token failed: %v", err) 4666 } 4667 } 4668 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 4669 return fmt.Errorf("index update failed: %v", err) 4670 } 4671 txn.Commit() 4672 return nil 4673 } 4674 4675 // ACLTokenByAccessorID is used to lookup a token by accessor ID 4676 func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) { 4677 if id == "" { 4678 return nil, fmt.Errorf("acl token lookup failed: missing accessor id") 4679 } 4680 4681 txn := s.db.Txn(false) 4682 4683 watchCh, existing, err := txn.FirstWatch("acl_token", "id", id) 4684 if err != nil { 4685 return nil, fmt.Errorf("acl token lookup failed: %v", err) 4686 } 4687 ws.Add(watchCh) 4688 4689 if existing != nil { 4690 return existing.(*structs.ACLToken), nil 4691 } 4692 return nil, nil 4693 } 4694 4695 // ACLTokenBySecretID is used to lookup a token by secret ID 4696 func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) { 4697 if secretID == "" { 4698 return nil, fmt.Errorf("acl token lookup failed: missing secret id") 4699 } 4700 4701 txn := s.db.Txn(false) 4702 4703 watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID) 4704 if err != nil { 4705 return nil, fmt.Errorf("acl token lookup failed: %v", err) 4706 } 4707 ws.Add(watchCh) 4708 4709 if existing != nil { 4710 return existing.(*structs.ACLToken), nil 4711 } 4712 return nil, nil 4713 } 4714 4715 // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix 4716 func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 4717 txn := s.db.Txn(false) 4718 4719 iter, err := txn.Get("acl_token", "id_prefix", prefix) 4720 if err != nil { 4721 return nil, fmt.Errorf("acl token lookup failed: %v", err) 4722 } 4723 ws.Add(iter.WatchCh()) 4724 return iter, nil 4725 } 4726 4727 // ACLTokens returns an iterator over all the tokens 4728 func (s *StateStore) ACLTokens(ws memdb.WatchSet) (memdb.ResultIterator, error) { 4729 txn := s.db.Txn(false) 4730 4731 // Walk the entire table 4732 iter, err := txn.Get("acl_token", "id") 4733 if err != nil { 4734 return nil, err 4735 } 4736 ws.Add(iter.WatchCh()) 4737 return iter, nil 4738 } 4739 4740 // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value 4741 func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool) (memdb.ResultIterator, error) { 4742 txn := s.db.Txn(false) 4743 4744 // Walk the entire table 4745 iter, err := txn.Get("acl_token", "global", globalVal) 4746 if err != nil { 4747 return nil, err 4748 } 4749 ws.Add(iter.WatchCh()) 4750 return iter, nil 4751 } 4752 4753 // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index 4754 func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) { 4755 txn := s.db.Txn(false) 4756 4757 // Lookup the bootstrap sentinel 4758 out, err := txn.First("index", "id", "acl_token_bootstrap") 4759 if err != nil { 4760 return false, 0, err 4761 } 4762 4763 // No entry, we haven't bootstrapped yet 4764 if out == nil { 4765 return true, 0, nil 4766 } 4767 4768 // Return the reset index if we've already bootstrapped 4769 return false, out.(*IndexEntry).Value, nil 4770 } 4771 4772 // BootstrapACLToken is used to create an initial ACL token 4773 func (s *StateStore) BootstrapACLTokens(index, resetIndex uint64, token *structs.ACLToken) error { 4774 txn := s.db.Txn(true) 4775 defer txn.Abort() 4776 4777 // Check if we have already done a bootstrap 4778 existing, err := txn.First("index", "id", "acl_token_bootstrap") 4779 if err != nil { 4780 return fmt.Errorf("bootstrap check failed: %v", err) 4781 } 4782 if existing != nil { 4783 if resetIndex == 0 { 4784 return fmt.Errorf("ACL bootstrap already done") 4785 } else if resetIndex != existing.(*IndexEntry).Value { 4786 return fmt.Errorf("Invalid reset index for ACL bootstrap") 4787 } 4788 } 4789 4790 // Update the Create/Modify time 4791 token.CreateIndex = index 4792 token.ModifyIndex = index 4793 4794 // Insert the token 4795 if err := txn.Insert("acl_token", token); err != nil { 4796 return fmt.Errorf("upserting token failed: %v", err) 4797 } 4798 4799 // Update the indexes table, prevents future bootstrap until reset 4800 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 4801 return fmt.Errorf("index update failed: %v", err) 4802 } 4803 if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil { 4804 return fmt.Errorf("index update failed: %v", err) 4805 } 4806 txn.Commit() 4807 return nil 4808 } 4809 4810 // SchedulerConfig is used to get the current Scheduler configuration. 4811 func (s *StateStore) SchedulerConfig() (uint64, *structs.SchedulerConfiguration, error) { 4812 tx := s.db.Txn(false) 4813 defer tx.Abort() 4814 4815 // Get the scheduler config 4816 c, err := tx.First("scheduler_config", "id") 4817 if err != nil { 4818 return 0, nil, fmt.Errorf("failed scheduler config lookup: %s", err) 4819 } 4820 4821 config, ok := c.(*structs.SchedulerConfiguration) 4822 if !ok { 4823 return 0, nil, nil 4824 } 4825 4826 return config.ModifyIndex, config, nil 4827 } 4828 4829 // SchedulerSetConfig is used to set the current Scheduler configuration. 4830 func (s *StateStore) SchedulerSetConfig(idx uint64, config *structs.SchedulerConfiguration) error { 4831 tx := s.db.Txn(true) 4832 defer tx.Abort() 4833 4834 s.schedulerSetConfigTxn(idx, tx, config) 4835 4836 tx.Commit() 4837 return nil 4838 } 4839 4840 func (s *StateStore) ClusterMetadata() (*structs.ClusterMetadata, error) { 4841 txn := s.db.Txn(false) 4842 defer txn.Abort() 4843 4844 // Get the cluster metadata 4845 m, err := txn.First("cluster_meta", "id") 4846 if err != nil { 4847 return nil, errors.Wrap(err, "failed cluster metadata lookup") 4848 } 4849 4850 if m != nil { 4851 return m.(*structs.ClusterMetadata), nil 4852 } 4853 4854 return nil, nil 4855 } 4856 4857 func (s *StateStore) ClusterSetMetadata(index uint64, meta *structs.ClusterMetadata) error { 4858 txn := s.db.Txn(true) 4859 defer txn.Abort() 4860 4861 if err := s.setClusterMetadata(txn, meta); err != nil { 4862 return errors.Wrap(err, "set cluster metadata failed") 4863 } 4864 4865 txn.Commit() 4866 return nil 4867 } 4868 4869 // WithWriteTransaction executes the passed function within a write transaction, 4870 // and returns its result. If the invocation returns no error, the transaction 4871 // is committed; otherwise, it's aborted. 4872 func (s *StateStore) WithWriteTransaction(fn func(Txn) error) error { 4873 tx := s.db.Txn(true) 4874 defer tx.Abort() 4875 4876 err := fn(tx) 4877 if err == nil { 4878 tx.Commit() 4879 } 4880 return err 4881 } 4882 4883 // SchedulerCASConfig is used to update the scheduler configuration with a 4884 // given Raft index. If the CAS index specified is not equal to the last observed index 4885 // for the config, then the call is a noop. 4886 func (s *StateStore) SchedulerCASConfig(idx, cidx uint64, config *structs.SchedulerConfiguration) (bool, error) { 4887 tx := s.db.Txn(true) 4888 defer tx.Abort() 4889 4890 // Check for an existing config 4891 existing, err := tx.First("scheduler_config", "id") 4892 if err != nil { 4893 return false, fmt.Errorf("failed scheduler config lookup: %s", err) 4894 } 4895 4896 // If the existing index does not match the provided CAS 4897 // index arg, then we shouldn't update anything and can safely 4898 // return early here. 4899 e, ok := existing.(*structs.SchedulerConfiguration) 4900 if !ok || (e != nil && e.ModifyIndex != cidx) { 4901 return false, nil 4902 } 4903 4904 s.schedulerSetConfigTxn(idx, tx, config) 4905 4906 tx.Commit() 4907 return true, nil 4908 } 4909 4910 func (s *StateStore) schedulerSetConfigTxn(idx uint64, tx *memdb.Txn, config *structs.SchedulerConfiguration) error { 4911 // Check for an existing config 4912 existing, err := tx.First("scheduler_config", "id") 4913 if err != nil { 4914 return fmt.Errorf("failed scheduler config lookup: %s", err) 4915 } 4916 4917 // Set the indexes. 4918 if existing != nil { 4919 config.CreateIndex = existing.(*structs.SchedulerConfiguration).CreateIndex 4920 } else { 4921 config.CreateIndex = idx 4922 } 4923 config.ModifyIndex = idx 4924 4925 if err := tx.Insert("scheduler_config", config); err != nil { 4926 return fmt.Errorf("failed updating scheduler config: %s", err) 4927 } 4928 return nil 4929 } 4930 4931 func (s *StateStore) setClusterMetadata(txn *memdb.Txn, meta *structs.ClusterMetadata) error { 4932 // Check for an existing config, if it exists, sanity check the cluster ID matches 4933 existing, err := txn.First("cluster_meta", "id") 4934 if err != nil { 4935 return fmt.Errorf("failed cluster meta lookup: %v", err) 4936 } 4937 4938 if existing != nil { 4939 existingClusterID := existing.(*structs.ClusterMetadata).ClusterID 4940 if meta.ClusterID != existingClusterID { 4941 // there is a bug in cluster ID detection 4942 return fmt.Errorf("refusing to set new cluster id, previous: %s, new: %s", existingClusterID, meta.ClusterID) 4943 } 4944 } 4945 4946 // update is technically a noop, unless someday we add more / mutable fields 4947 if err := txn.Insert("cluster_meta", meta); err != nil { 4948 return fmt.Errorf("set cluster metadata failed: %v", err) 4949 } 4950 4951 return nil 4952 } 4953 4954 // UpsertScalingPolicy is used to insert a new scaling policy. 4955 func (s *StateStore) UpsertScalingPolicies(index uint64, scalingPolicies []*structs.ScalingPolicy) error { 4956 txn := s.db.Txn(true) 4957 defer txn.Abort() 4958 4959 if err := s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn); err != nil { 4960 return err 4961 } 4962 4963 txn.Commit() 4964 return nil 4965 } 4966 4967 // upsertScalingPolicy is used to insert a new scaling policy. 4968 func (s *StateStore) UpsertScalingPoliciesTxn(index uint64, scalingPolicies []*structs.ScalingPolicy, 4969 txn *memdb.Txn) error { 4970 4971 hadUpdates := false 4972 4973 for _, policy := range scalingPolicies { 4974 // Check if the scaling policy already exists 4975 existing, err := txn.First("scaling_policy", "target", 4976 policy.Target[structs.ScalingTargetNamespace], 4977 policy.Target[structs.ScalingTargetJob], 4978 policy.Target[structs.ScalingTargetGroup]) 4979 if err != nil { 4980 return fmt.Errorf("scaling policy lookup failed: %v", err) 4981 } 4982 4983 // Setup the indexes correctly 4984 if existing != nil { 4985 p := existing.(*structs.ScalingPolicy) 4986 if !p.Diff(policy) { 4987 continue 4988 } 4989 policy.ID = p.ID 4990 policy.CreateIndex = p.CreateIndex 4991 policy.ModifyIndex = index 4992 } else { 4993 // policy.ID must have been set already in Job.Register before log apply 4994 policy.CreateIndex = index 4995 policy.ModifyIndex = index 4996 } 4997 4998 // Insert the scaling policy 4999 hadUpdates = true 5000 if err := txn.Insert("scaling_policy", policy); err != nil { 5001 return err 5002 } 5003 } 5004 5005 // Update the indexes table for scaling policy 5006 if hadUpdates { 5007 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 5008 return fmt.Errorf("index update failed: %v", err) 5009 } 5010 } 5011 5012 return nil 5013 } 5014 5015 func (s *StateStore) DeleteScalingPolicies(index uint64, ids []string) error { 5016 txn := s.db.Txn(true) 5017 defer txn.Abort() 5018 5019 err := s.DeleteScalingPoliciesTxn(index, ids, txn) 5020 if err == nil { 5021 txn.Commit() 5022 } 5023 5024 return err 5025 } 5026 5027 // DeleteScalingPolicies is used to delete a set of scaling policies by ID 5028 func (s *StateStore) DeleteScalingPoliciesTxn(index uint64, ids []string, txn *memdb.Txn) error { 5029 if len(ids) == 0 { 5030 return nil 5031 } 5032 5033 for _, id := range ids { 5034 // Lookup the scaling policy 5035 existing, err := txn.First("scaling_policy", "id", id) 5036 if err != nil { 5037 return fmt.Errorf("scaling policy lookup failed: %v", err) 5038 } 5039 if existing == nil { 5040 return fmt.Errorf("scaling policy not found") 5041 } 5042 5043 // Delete the scaling policy 5044 if err := txn.Delete("scaling_policy", existing); err != nil { 5045 return fmt.Errorf("scaling policy delete failed: %v", err) 5046 } 5047 } 5048 5049 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 5050 return fmt.Errorf("index update failed: %v", err) 5051 } 5052 5053 return nil 5054 } 5055 5056 // ScalingPolicies returns an iterator over all the scaling policies 5057 func (s *StateStore) ScalingPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 5058 txn := s.db.Txn(false) 5059 5060 // Walk the entire scaling_policy table 5061 iter, err := txn.Get("scaling_policy", "id") 5062 if err != nil { 5063 return nil, err 5064 } 5065 5066 ws.Add(iter.WatchCh()) 5067 5068 return iter, nil 5069 } 5070 5071 func (s *StateStore) ScalingPoliciesByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 5072 txn := s.db.Txn(false) 5073 5074 iter, err := txn.Get("scaling_policy", "target_prefix", namespace) 5075 if err != nil { 5076 return nil, err 5077 } 5078 5079 ws.Add(iter.WatchCh()) 5080 return iter, nil 5081 } 5082 5083 func (s *StateStore) ScalingPoliciesByJob(ws memdb.WatchSet, namespace, jobID string) (memdb.ResultIterator, error) { 5084 txn := s.db.Txn(false) 5085 return s.ScalingPoliciesByJobTxn(ws, namespace, jobID, txn) 5086 } 5087 5088 func (s *StateStore) ScalingPoliciesByJobTxn(ws memdb.WatchSet, namespace, jobID string, 5089 txn *memdb.Txn) (memdb.ResultIterator, error) { 5090 5091 iter, err := txn.Get("scaling_policy", "target_prefix", namespace, jobID) 5092 if err != nil { 5093 return nil, err 5094 } 5095 5096 ws.Add(iter.WatchCh()) 5097 return iter, nil 5098 } 5099 5100 func (s *StateStore) ScalingPolicyByID(ws memdb.WatchSet, id string) (*structs.ScalingPolicy, error) { 5101 txn := s.db.Txn(false) 5102 5103 watchCh, existing, err := txn.FirstWatch("scaling_policy", "id", id) 5104 if err != nil { 5105 return nil, fmt.Errorf("scaling_policy lookup failed: %v", err) 5106 } 5107 ws.Add(watchCh) 5108 5109 if existing != nil { 5110 return existing.(*structs.ScalingPolicy), nil 5111 } 5112 5113 return nil, nil 5114 } 5115 5116 func (s *StateStore) ScalingPolicyByTarget(ws memdb.WatchSet, target map[string]string) (*structs.ScalingPolicy, 5117 error) { 5118 txn := s.db.Txn(false) 5119 5120 // currently, only scaling policy type is against a task group 5121 namespace := target[structs.ScalingTargetNamespace] 5122 job := target[structs.ScalingTargetJob] 5123 group := target[structs.ScalingTargetGroup] 5124 5125 watchCh, existing, err := txn.FirstWatch("scaling_policy", "target", namespace, job, group) 5126 if err != nil { 5127 return nil, fmt.Errorf("scaling_policy lookup failed: %v", err) 5128 } 5129 ws.Add(watchCh) 5130 5131 if existing != nil { 5132 return existing.(*structs.ScalingPolicy), nil 5133 } 5134 5135 return nil, nil 5136 } 5137 5138 // StateSnapshot is used to provide a point-in-time snapshot 5139 type StateSnapshot struct { 5140 StateStore 5141 } 5142 5143 // DenormalizeAllocationsMap takes in a map of nodes to allocations, and queries the 5144 // Allocation for each of the Allocation diffs and merges the updated attributes with 5145 // the existing Allocation, and attaches the Job provided 5146 func (s *StateSnapshot) DenormalizeAllocationsMap(nodeAllocations map[string][]*structs.Allocation) error { 5147 for nodeID, allocs := range nodeAllocations { 5148 denormalizedAllocs, err := s.DenormalizeAllocationSlice(allocs) 5149 if err != nil { 5150 return err 5151 } 5152 5153 nodeAllocations[nodeID] = denormalizedAllocs 5154 } 5155 return nil 5156 } 5157 5158 // DenormalizeAllocationSlice queries the Allocation for each allocation diff 5159 // represented as an Allocation and merges the updated attributes with the existing 5160 // Allocation, and attaches the Job provided. 5161 // 5162 // This should only be called on terminal allocs, particularly stopped or preempted allocs 5163 func (s *StateSnapshot) DenormalizeAllocationSlice(allocs []*structs.Allocation) ([]*structs.Allocation, error) { 5164 allocDiffs := make([]*structs.AllocationDiff, len(allocs)) 5165 for i, alloc := range allocs { 5166 allocDiffs[i] = alloc.AllocationDiff() 5167 } 5168 5169 return s.DenormalizeAllocationDiffSlice(allocDiffs) 5170 } 5171 5172 // DenormalizeAllocationDiffSlice queries the Allocation for each AllocationDiff and merges 5173 // the updated attributes with the existing Allocation, and attaches the Job provided. 5174 // 5175 // This should only be called on terminal alloc, particularly stopped or preempted allocs 5176 func (s *StateSnapshot) DenormalizeAllocationDiffSlice(allocDiffs []*structs.AllocationDiff) ([]*structs.Allocation, error) { 5177 // Output index for denormalized Allocations 5178 j := 0 5179 5180 denormalizedAllocs := make([]*structs.Allocation, len(allocDiffs)) 5181 for _, allocDiff := range allocDiffs { 5182 alloc, err := s.AllocByID(nil, allocDiff.ID) 5183 if err != nil { 5184 return nil, fmt.Errorf("alloc lookup failed: %v", err) 5185 } 5186 if alloc == nil { 5187 return nil, fmt.Errorf("alloc %v doesn't exist", allocDiff.ID) 5188 } 5189 5190 // Merge the updates to the Allocation. Don't update alloc.Job for terminal allocs 5191 // so alloc refers to the latest Job view before destruction and to ease handler implementations 5192 allocCopy := alloc.Copy() 5193 5194 if allocDiff.PreemptedByAllocation != "" { 5195 allocCopy.PreemptedByAllocation = allocDiff.PreemptedByAllocation 5196 allocCopy.DesiredDescription = getPreemptedAllocDesiredDescription(allocDiff.PreemptedByAllocation) 5197 allocCopy.DesiredStatus = structs.AllocDesiredStatusEvict 5198 } else { 5199 // If alloc is a stopped alloc 5200 allocCopy.DesiredDescription = allocDiff.DesiredDescription 5201 allocCopy.DesiredStatus = structs.AllocDesiredStatusStop 5202 if allocDiff.ClientStatus != "" { 5203 allocCopy.ClientStatus = allocDiff.ClientStatus 5204 } 5205 } 5206 if allocDiff.ModifyTime != 0 { 5207 allocCopy.ModifyTime = allocDiff.ModifyTime 5208 } 5209 5210 // Update the allocDiff in the slice to equal the denormalized alloc 5211 denormalizedAllocs[j] = allocCopy 5212 j++ 5213 } 5214 // Retain only the denormalized Allocations in the slice 5215 denormalizedAllocs = denormalizedAllocs[:j] 5216 return denormalizedAllocs, nil 5217 } 5218 5219 func getPreemptedAllocDesiredDescription(PreemptedByAllocID string) string { 5220 return fmt.Sprintf("Preempted by alloc ID %v", PreemptedByAllocID) 5221 } 5222 5223 // StateRestore is used to optimize the performance when 5224 // restoring state by only using a single large transaction 5225 // instead of thousands of sub transactions 5226 type StateRestore struct { 5227 txn *memdb.Txn 5228 } 5229 5230 // Abort is used to abort the restore operation 5231 func (s *StateRestore) Abort() { 5232 s.txn.Abort() 5233 } 5234 5235 // Commit is used to commit the restore operation 5236 func (s *StateRestore) Commit() { 5237 s.txn.Commit() 5238 } 5239 5240 // NodeRestore is used to restore a node 5241 func (r *StateRestore) NodeRestore(node *structs.Node) error { 5242 if err := r.txn.Insert("nodes", node); err != nil { 5243 return fmt.Errorf("node insert failed: %v", err) 5244 } 5245 return nil 5246 } 5247 5248 // JobRestore is used to restore a job 5249 func (r *StateRestore) JobRestore(job *structs.Job) error { 5250 if err := r.txn.Insert("jobs", job); err != nil { 5251 return fmt.Errorf("job insert failed: %v", err) 5252 } 5253 return nil 5254 } 5255 5256 // EvalRestore is used to restore an evaluation 5257 func (r *StateRestore) EvalRestore(eval *structs.Evaluation) error { 5258 if err := r.txn.Insert("evals", eval); err != nil { 5259 return fmt.Errorf("eval insert failed: %v", err) 5260 } 5261 return nil 5262 } 5263 5264 // AllocRestore is used to restore an allocation 5265 func (r *StateRestore) AllocRestore(alloc *structs.Allocation) error { 5266 if err := r.txn.Insert("allocs", alloc); err != nil { 5267 return fmt.Errorf("alloc insert failed: %v", err) 5268 } 5269 return nil 5270 } 5271 5272 // IndexRestore is used to restore an index 5273 func (r *StateRestore) IndexRestore(idx *IndexEntry) error { 5274 if err := r.txn.Insert("index", idx); err != nil { 5275 return fmt.Errorf("index insert failed: %v", err) 5276 } 5277 return nil 5278 } 5279 5280 // PeriodicLaunchRestore is used to restore a periodic launch. 5281 func (r *StateRestore) PeriodicLaunchRestore(launch *structs.PeriodicLaunch) error { 5282 if err := r.txn.Insert("periodic_launch", launch); err != nil { 5283 return fmt.Errorf("periodic launch insert failed: %v", err) 5284 } 5285 return nil 5286 } 5287 5288 // JobSummaryRestore is used to restore a job summary 5289 func (r *StateRestore) JobSummaryRestore(jobSummary *structs.JobSummary) error { 5290 if err := r.txn.Insert("job_summary", jobSummary); err != nil { 5291 return fmt.Errorf("job summary insert failed: %v", err) 5292 } 5293 return nil 5294 } 5295 5296 // JobVersionRestore is used to restore a job version 5297 func (r *StateRestore) JobVersionRestore(version *structs.Job) error { 5298 if err := r.txn.Insert("job_version", version); err != nil { 5299 return fmt.Errorf("job version insert failed: %v", err) 5300 } 5301 return nil 5302 } 5303 5304 // DeploymentRestore is used to restore a deployment 5305 func (r *StateRestore) DeploymentRestore(deployment *structs.Deployment) error { 5306 if err := r.txn.Insert("deployment", deployment); err != nil { 5307 return fmt.Errorf("deployment insert failed: %v", err) 5308 } 5309 return nil 5310 } 5311 5312 // VaultAccessorRestore is used to restore a vault accessor 5313 func (r *StateRestore) VaultAccessorRestore(accessor *structs.VaultAccessor) error { 5314 if err := r.txn.Insert("vault_accessors", accessor); err != nil { 5315 return fmt.Errorf("vault accessor insert failed: %v", err) 5316 } 5317 return nil 5318 } 5319 5320 // SITokenAccessorRestore is used to restore an SI token accessor 5321 func (r *StateRestore) SITokenAccessorRestore(accessor *structs.SITokenAccessor) error { 5322 if err := r.txn.Insert(siTokenAccessorTable, accessor); err != nil { 5323 return errors.Wrap(err, "si token accessor insert failed") 5324 } 5325 return nil 5326 } 5327 5328 // ACLPolicyRestore is used to restore an ACL policy 5329 func (r *StateRestore) ACLPolicyRestore(policy *structs.ACLPolicy) error { 5330 if err := r.txn.Insert("acl_policy", policy); err != nil { 5331 return fmt.Errorf("inserting acl policy failed: %v", err) 5332 } 5333 return nil 5334 } 5335 5336 // ACLTokenRestore is used to restore an ACL token 5337 func (r *StateRestore) ACLTokenRestore(token *structs.ACLToken) error { 5338 if err := r.txn.Insert("acl_token", token); err != nil { 5339 return fmt.Errorf("inserting acl token failed: %v", err) 5340 } 5341 return nil 5342 } 5343 5344 func (r *StateRestore) SchedulerConfigRestore(schedConfig *structs.SchedulerConfiguration) error { 5345 if err := r.txn.Insert("scheduler_config", schedConfig); err != nil { 5346 return fmt.Errorf("inserting scheduler config failed: %s", err) 5347 } 5348 return nil 5349 } 5350 5351 func (r *StateRestore) ClusterMetadataRestore(meta *structs.ClusterMetadata) error { 5352 if err := r.txn.Insert("cluster_meta", meta); err != nil { 5353 return fmt.Errorf("inserting cluster meta failed: %v", err) 5354 } 5355 return nil 5356 } 5357 5358 // ScalingPolicyRestore is used to restore a scaling policy 5359 func (r *StateRestore) ScalingPolicyRestore(scalingPolicy *structs.ScalingPolicy) error { 5360 if err := r.txn.Insert("scaling_policy", scalingPolicy); err != nil { 5361 return fmt.Errorf("scaling policy insert failed: %v", err) 5362 } 5363 return nil 5364 } 5365 5366 // CSIPluginRestore is used to restore a CSI plugin 5367 func (r *StateRestore) CSIPluginRestore(plugin *structs.CSIPlugin) error { 5368 if err := r.txn.Insert("csi_plugins", plugin); err != nil { 5369 return fmt.Errorf("csi plugin insert failed: %v", err) 5370 } 5371 return nil 5372 } 5373 5374 // CSIVolumeRestore is used to restore a CSI volume 5375 func (r *StateRestore) CSIVolumeRestore(volume *structs.CSIVolume) error { 5376 if err := r.txn.Insert("csi_volumes", volume); err != nil { 5377 return fmt.Errorf("csi volume insert failed: %v", err) 5378 } 5379 return nil 5380 } 5381 5382 func (r *StateRestore) ScalingEventsRestore(jobEvents *structs.JobScalingEvents) error { 5383 if err := r.txn.Insert("scaling_event", jobEvents); err != nil { 5384 return fmt.Errorf("scaling event insert failed: %v", err) 5385 } 5386 return nil 5387 }