github.com/hernad/nomad@v1.6.112/nomad/state/state_store.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package state 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "reflect" 11 "sort" 12 "strings" 13 "time" 14 15 "github.com/hashicorp/go-bexpr" 16 "github.com/hashicorp/go-hclog" 17 "github.com/hashicorp/go-memdb" 18 "github.com/hashicorp/go-multierror" 19 "github.com/hashicorp/go-set" 20 "github.com/hernad/nomad/helper/pointer" 21 "github.com/hernad/nomad/lib/lang" 22 "github.com/hernad/nomad/nomad/stream" 23 "github.com/hernad/nomad/nomad/structs" 24 "golang.org/x/exp/slices" 25 ) 26 27 // Txn is a transaction against a state store. 28 // This can be a read or write transaction. 29 type Txn = *txn 30 31 // SortOption represents how results can be sorted. 32 type SortOption bool 33 34 const ( 35 // SortDefault indicates that the result should be returned using the 36 // default go-memdb ResultIterator order. 37 SortDefault SortOption = false 38 39 // SortReverse indicates that the result should be returned using the 40 // reversed go-memdb ResultIterator order. 41 SortReverse SortOption = true 42 ) 43 44 // NodeUpsertOption represents options to configure a NodeUpsert operation. 45 type NodeUpsertOption uint8 46 47 const ( 48 // NodeUpsertWithNodePool indicates that the node pool in the node should 49 // be created if it doesn't exist. 50 NodeUpsertWithNodePool NodeUpsertOption = iota 51 ) 52 53 const ( 54 // NodeEligibilityEventPlanRejectThreshold is the message used when the node 55 // is set to ineligible due to multiple plan failures. 56 // This is a preventive measure to signal scheduler workers to not consider 57 // the node for future placements. 58 // Plan rejections for a node are expected due to the optimistic and 59 // concurrent nature of the scheduling process, but repeated failures for 60 // the same node may indicate an underlying issue not detected by Nomad. 61 // The plan applier keeps track of plan rejection history and will mark 62 // nodes as ineligible if they cross a given threshold. 63 NodeEligibilityEventPlanRejectThreshold = "Node marked as ineligible for scheduling due to multiple plan rejections, refer to https://www.nomadproject.io/s/port-plan-failure for more information" 64 65 // NodeRegisterEventRegistered is the message used when the node becomes 66 // registered. 67 NodeRegisterEventRegistered = "Node registered" 68 69 // NodeRegisterEventReregistered is the message used when the node becomes 70 // re-registered. 71 NodeRegisterEventReregistered = "Node re-registered" 72 ) 73 74 // terminate appends the go-memdb terminator character to s. 75 // 76 // We can then use the result for exact matches during prefix 77 // scans over compound indexes that start with s. 78 func terminate(s string) string { 79 return s + "\x00" 80 } 81 82 // IndexEntry is used with the "index" table 83 // for managing the latest Raft index affecting a table. 84 type IndexEntry struct { 85 Key string 86 Value uint64 87 } 88 89 // StateStoreConfig is used to configure a new state store 90 type StateStoreConfig struct { 91 // Logger is used to output the state store's logs 92 Logger hclog.Logger 93 94 // Region is the region of the server embedding the state store. 95 Region string 96 97 // EnablePublisher is used to enable or disable the event publisher 98 EnablePublisher bool 99 100 // EventBufferSize configures the amount of events to hold in memory 101 EventBufferSize int64 102 } 103 104 // The StateStore is responsible for maintaining all the Nomad 105 // state. It is manipulated by the FSM which maintains consistency 106 // through the use of Raft. The goals of the StateStore are to provide 107 // high concurrency for read operations without blocking writes, and 108 // to provide write availability in the face of reads. EVERY object 109 // returned as a result of a read against the state store should be 110 // considered a constant and NEVER modified in place. 111 type StateStore struct { 112 logger hclog.Logger 113 db *changeTrackerDB 114 115 // config is the passed in configuration 116 config *StateStoreConfig 117 118 // abandonCh is used to signal watchers that this state store has been 119 // abandoned (usually during a restore). This is only ever closed. 120 abandonCh chan struct{} 121 122 // TODO: refactor abandonCh to use a context so that both can use the same 123 // cancel mechanism. 124 stopEventBroker func() 125 } 126 127 type streamACLDelegate struct { 128 s *StateStore 129 } 130 131 func (a *streamACLDelegate) TokenProvider() stream.ACLTokenProvider { 132 resolver, _ := a.s.Snapshot() 133 return resolver 134 } 135 136 // NewStateStore is used to create a new state store 137 func NewStateStore(config *StateStoreConfig) (*StateStore, error) { 138 // Create the MemDB 139 db, err := memdb.NewMemDB(stateStoreSchema()) 140 if err != nil { 141 return nil, fmt.Errorf("state store setup failed: %v", err) 142 } 143 144 // Create the state store 145 ctx, cancel := context.WithCancel(context.TODO()) 146 s := &StateStore{ 147 logger: config.Logger.Named("state_store"), 148 config: config, 149 abandonCh: make(chan struct{}), 150 stopEventBroker: cancel, 151 } 152 153 if config.EnablePublisher { 154 // Create new event publisher using provided config 155 broker, err := stream.NewEventBroker(ctx, &streamACLDelegate{s}, stream.EventBrokerCfg{ 156 EventBufferSize: config.EventBufferSize, 157 Logger: config.Logger, 158 }) 159 if err != nil { 160 return nil, fmt.Errorf("creating state store event broker %w", err) 161 } 162 s.db = NewChangeTrackerDB(db, broker, eventsFromChanges) 163 } else { 164 s.db = NewChangeTrackerDB(db, nil, noOpProcessChanges) 165 } 166 167 // Initialize the state store with the default namespace and built-in node 168 // pools. 169 if err := s.namespaceInit(); err != nil { 170 return nil, fmt.Errorf("namespace state store initialization failed: %v", err) 171 } 172 if err := s.nodePoolInit(); err != nil { 173 return nil, fmt.Errorf("node pool state store initialization failed: %w", err) 174 } 175 176 return s, nil 177 } 178 179 // NewWatchSet returns a new memdb.WatchSet that adds the state stores abandonCh 180 // as a watcher. This is important in that it will notify when this specific 181 // state store is no longer valid, usually due to a new snapshot being loaded 182 func (s *StateStore) NewWatchSet() memdb.WatchSet { 183 ws := memdb.NewWatchSet() 184 ws.Add(s.AbandonCh()) 185 return ws 186 } 187 188 func (s *StateStore) EventBroker() (*stream.EventBroker, error) { 189 if s.db.publisher == nil { 190 return nil, fmt.Errorf("EventBroker not configured") 191 } 192 return s.db.publisher, nil 193 } 194 195 // namespaceInit ensures the default namespace exists. 196 func (s *StateStore) namespaceInit() error { 197 // Create the default namespace. This is safe to do every time we create the 198 // state store. There are two main cases, a brand new cluster in which case 199 // each server will have the same default namespace object, or a new cluster 200 // in which case if the default namespace has been modified, it will be 201 // overridden by the restore code path. 202 defaultNs := &structs.Namespace{ 203 Name: structs.DefaultNamespace, 204 Description: structs.DefaultNamespaceDescription, 205 } 206 207 if err := s.UpsertNamespaces(1, []*structs.Namespace{defaultNs}); err != nil { 208 return fmt.Errorf("inserting default namespace failed: %v", err) 209 } 210 211 return nil 212 } 213 214 // Config returns the state store configuration. 215 func (s *StateStore) Config() *StateStoreConfig { 216 return s.config 217 } 218 219 // Snapshot is used to create a point in time snapshot. Because 220 // we use MemDB, we just need to snapshot the state of the underlying 221 // database. 222 func (s *StateStore) Snapshot() (*StateSnapshot, error) { 223 memDBSnap := s.db.memdb.Snapshot() 224 225 store := StateStore{ 226 logger: s.logger, 227 config: s.config, 228 } 229 230 // Create a new change tracker DB that does not publish or track changes 231 store.db = NewChangeTrackerDB(memDBSnap, nil, noOpProcessChanges) 232 233 snap := &StateSnapshot{ 234 StateStore: store, 235 } 236 return snap, nil 237 } 238 239 // SnapshotMinIndex is used to create a state snapshot where the index is 240 // guaranteed to be greater than or equal to the index parameter. 241 // 242 // Some server operations (such as scheduling) exchange objects via RPC 243 // concurrent with Raft log application, so they must ensure the state store 244 // snapshot they are operating on is at or after the index the objects 245 // retrieved via RPC were applied to the Raft log at. 246 // 247 // Callers should maintain their own timer metric as the time this method 248 // blocks indicates Raft log application latency relative to scheduling. 249 func (s *StateStore) SnapshotMinIndex(ctx context.Context, index uint64) (*StateSnapshot, error) { 250 // Ported from work.go:waitForIndex prior to 0.9 251 252 const backoffBase = 20 * time.Millisecond 253 const backoffLimit = 1 * time.Second 254 var retries uint 255 var retryTimer *time.Timer 256 257 // XXX: Potential optimization is to set up a watch on the state 258 // store's index table and only unblock via a trigger rather than 259 // polling. 260 for { 261 // Get the states current index 262 snapshotIndex, err := s.LatestIndex() 263 if err != nil { 264 return nil, fmt.Errorf("failed to determine state store's index: %w", err) 265 } 266 267 // We only need the FSM state to be as recent as the given index 268 if snapshotIndex >= index { 269 return s.Snapshot() 270 } 271 272 // Exponential back off 273 retries++ 274 if retryTimer == nil { 275 // First retry, start at baseline 276 retryTimer = time.NewTimer(backoffBase) 277 } else { 278 // Subsequent retry, reset timer 279 deadline := 1 << (2 * retries) * backoffBase 280 if deadline > backoffLimit { 281 deadline = backoffLimit 282 } 283 retryTimer.Reset(deadline) 284 } 285 286 select { 287 case <-ctx.Done(): 288 return nil, ctx.Err() 289 case <-retryTimer.C: 290 } 291 } 292 } 293 294 // Restore is used to optimize the efficiency of rebuilding 295 // state by minimizing the number of transactions and checking 296 // overhead. 297 func (s *StateStore) Restore() (*StateRestore, error) { 298 txn := s.db.WriteTxnRestore() 299 r := &StateRestore{ 300 txn: txn, 301 } 302 return r, nil 303 } 304 305 // AbandonCh returns a channel you can wait on to know if the state store was 306 // abandoned. 307 func (s *StateStore) AbandonCh() <-chan struct{} { 308 return s.abandonCh 309 } 310 311 // Abandon is used to signal that the given state store has been abandoned. 312 // Calling this more than one time will panic. 313 func (s *StateStore) Abandon() { 314 s.StopEventBroker() 315 close(s.abandonCh) 316 } 317 318 // StopEventBroker calls the cancel func for the state stores event 319 // publisher. It should be called during server shutdown. 320 func (s *StateStore) StopEventBroker() { 321 s.stopEventBroker() 322 } 323 324 // QueryFn is the definition of a function that can be used to implement a basic 325 // blocking query against the state store. 326 type QueryFn func(memdb.WatchSet, *StateStore) (resp interface{}, index uint64, err error) 327 328 // BlockingQuery takes a query function and runs the function until the minimum 329 // query index is met or until the passed context is cancelled. 330 func (s *StateStore) BlockingQuery(query QueryFn, minIndex uint64, ctx context.Context) ( 331 resp interface{}, index uint64, err error) { 332 333 RUN_QUERY: 334 // We capture the state store and its abandon channel but pass a snapshot to 335 // the blocking query function. We operate on the snapshot to allow separate 336 // calls to the state store not all wrapped within the same transaction. 337 abandonCh := s.AbandonCh() 338 snap, _ := s.Snapshot() 339 stateSnap := &snap.StateStore 340 341 // We can skip all watch tracking if this isn't a blocking query. 342 var ws memdb.WatchSet 343 if minIndex > 0 { 344 ws = memdb.NewWatchSet() 345 346 // This channel will be closed if a snapshot is restored and the 347 // whole state store is abandoned. 348 ws.Add(abandonCh) 349 } 350 351 resp, index, err = query(ws, stateSnap) 352 if err != nil { 353 return nil, index, err 354 } 355 356 // We haven't reached the min-index yet. 357 if minIndex > 0 && index <= minIndex { 358 if err := ws.WatchCtx(ctx); err != nil { 359 return nil, index, err 360 } 361 362 goto RUN_QUERY 363 } 364 365 return resp, index, nil 366 } 367 368 // UpsertPlanResults is used to upsert the results of a plan. 369 func (s *StateStore) UpsertPlanResults(msgType structs.MessageType, index uint64, results *structs.ApplyPlanResultsRequest) error { 370 snapshot, err := s.Snapshot() 371 if err != nil { 372 return err 373 } 374 375 allocsStopped, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsStopped) 376 if err != nil { 377 return err 378 } 379 380 allocsPreempted, err := snapshot.DenormalizeAllocationDiffSlice(results.AllocsPreempted) 381 if err != nil { 382 return err 383 } 384 385 // COMPAT 0.11: Remove this denormalization when NodePreemptions is removed 386 results.NodePreemptions, err = snapshot.DenormalizeAllocationSlice(results.NodePreemptions) 387 if err != nil { 388 return err 389 } 390 391 txn := s.db.WriteTxnMsgT(msgType, index) 392 defer txn.Abort() 393 394 // Mark nodes as ineligible. 395 for _, nodeID := range results.IneligibleNodes { 396 s.logger.Warn("marking node as ineligible due to multiple plan rejections, refer to https://www.nomadproject.io/s/port-plan-failure for more information", "node_id", nodeID) 397 398 nodeEvent := structs.NewNodeEvent(). 399 SetSubsystem(structs.NodeEventSubsystemScheduler). 400 SetMessage(NodeEligibilityEventPlanRejectThreshold) 401 402 err := s.updateNodeEligibilityImpl(index, nodeID, 403 structs.NodeSchedulingIneligible, results.UpdatedAt, nodeEvent, txn) 404 if err != nil { 405 return err 406 } 407 } 408 409 // Upsert the newly created or updated deployment 410 if results.Deployment != nil { 411 if err := s.upsertDeploymentImpl(index, results.Deployment, txn); err != nil { 412 return err 413 } 414 } 415 416 // Update the status of deployments effected by the plan. 417 if len(results.DeploymentUpdates) != 0 { 418 s.upsertDeploymentUpdates(index, results.DeploymentUpdates, txn) 419 } 420 421 if results.EvalID != "" { 422 // Update the modify index of the eval id 423 if err := s.updateEvalModifyIndex(txn, index, results.EvalID); err != nil { 424 return err 425 } 426 } 427 428 numAllocs := 0 429 if len(results.Alloc) > 0 || len(results.NodePreemptions) > 0 { 430 // COMPAT 0.11: This branch will be removed, when Alloc is removed 431 // Attach the job to all the allocations. It is pulled out in the payload to 432 // avoid the redundancy of encoding, but should be denormalized prior to 433 // being inserted into MemDB. 434 addComputedAllocAttrs(results.Alloc, results.Job) 435 numAllocs = len(results.Alloc) + len(results.NodePreemptions) 436 } else { 437 // Attach the job to all the allocations. It is pulled out in the payload to 438 // avoid the redundancy of encoding, but should be denormalized prior to 439 // being inserted into MemDB. 440 addComputedAllocAttrs(results.AllocsUpdated, results.Job) 441 numAllocs = len(allocsStopped) + len(results.AllocsUpdated) + len(allocsPreempted) 442 } 443 444 allocsToUpsert := make([]*structs.Allocation, 0, numAllocs) 445 446 // COMPAT 0.11: Both these appends should be removed when Alloc and NodePreemptions are removed 447 allocsToUpsert = append(allocsToUpsert, results.Alloc...) 448 allocsToUpsert = append(allocsToUpsert, results.NodePreemptions...) 449 450 allocsToUpsert = append(allocsToUpsert, allocsStopped...) 451 allocsToUpsert = append(allocsToUpsert, results.AllocsUpdated...) 452 allocsToUpsert = append(allocsToUpsert, allocsPreempted...) 453 454 // handle upgrade path 455 for _, alloc := range allocsToUpsert { 456 alloc.Canonicalize() 457 } 458 459 if err := s.upsertAllocsImpl(index, allocsToUpsert, txn); err != nil { 460 return err 461 } 462 463 // Upsert followup evals for allocs that were preempted 464 for _, eval := range results.PreemptionEvals { 465 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 466 return err 467 } 468 } 469 470 return txn.Commit() 471 } 472 473 // addComputedAllocAttrs adds the computed/derived attributes to the allocation. 474 // This method is used when an allocation is being denormalized. 475 func addComputedAllocAttrs(allocs []*structs.Allocation, job *structs.Job) { 476 structs.DenormalizeAllocationJobs(job, allocs) 477 478 // COMPAT(0.11): Remove in 0.11 479 // Calculate the total resources of allocations. It is pulled out in the 480 // payload to avoid encoding something that can be computed, but should be 481 // denormalized prior to being inserted into MemDB. 482 for _, alloc := range allocs { 483 if alloc.Resources != nil { 484 continue 485 } 486 487 alloc.Resources = new(structs.Resources) 488 for _, task := range alloc.TaskResources { 489 alloc.Resources.Add(task) 490 } 491 492 // Add the shared resources 493 alloc.Resources.Add(alloc.SharedResources) 494 } 495 } 496 497 // upsertDeploymentUpdates updates the deployments given the passed status 498 // updates. 499 func (s *StateStore) upsertDeploymentUpdates(index uint64, updates []*structs.DeploymentStatusUpdate, txn *txn) error { 500 for _, u := range updates { 501 if err := s.updateDeploymentStatusImpl(index, u, txn); err != nil { 502 return err 503 } 504 } 505 506 return nil 507 } 508 509 // UpsertJobSummary upserts a job summary into the state store. 510 func (s *StateStore) UpsertJobSummary(index uint64, jobSummary *structs.JobSummary) error { 511 txn := s.db.WriteTxn(index) 512 defer txn.Abort() 513 514 // Check if the job summary already exists 515 existing, err := txn.First("job_summary", "id", jobSummary.Namespace, jobSummary.JobID) 516 if err != nil { 517 return fmt.Errorf("job summary lookup failed: %v", err) 518 } 519 520 // Setup the indexes correctly 521 if existing != nil { 522 jobSummary.CreateIndex = existing.(*structs.JobSummary).CreateIndex 523 jobSummary.ModifyIndex = index 524 } else { 525 jobSummary.CreateIndex = index 526 jobSummary.ModifyIndex = index 527 } 528 529 // Update the index 530 if err := txn.Insert("job_summary", jobSummary); err != nil { 531 return err 532 } 533 534 // Update the indexes table for job summary 535 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 536 return fmt.Errorf("index update failed: %v", err) 537 } 538 539 return txn.Commit() 540 } 541 542 // DeleteJobSummary deletes the job summary with the given ID. This is for 543 // testing purposes only. 544 func (s *StateStore) DeleteJobSummary(index uint64, namespace, id string) error { 545 txn := s.db.WriteTxn(index) 546 defer txn.Abort() 547 548 // Delete the job summary 549 if _, err := txn.DeleteAll("job_summary", "id", namespace, id); err != nil { 550 return fmt.Errorf("deleting job summary failed: %v", err) 551 } 552 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 553 return fmt.Errorf("index update failed: %v", err) 554 } 555 return txn.Commit() 556 } 557 558 // UpsertDeployment is used to insert or update a new deployment. 559 func (s *StateStore) UpsertDeployment(index uint64, deployment *structs.Deployment) error { 560 txn := s.db.WriteTxn(index) 561 defer txn.Abort() 562 if err := s.upsertDeploymentImpl(index, deployment, txn); err != nil { 563 return err 564 } 565 return txn.Commit() 566 } 567 568 func (s *StateStore) upsertDeploymentImpl(index uint64, deployment *structs.Deployment, txn *txn) error { 569 // Check if the deployment already exists 570 existing, err := txn.First("deployment", "id", deployment.ID) 571 if err != nil { 572 return fmt.Errorf("deployment lookup failed: %v", err) 573 } 574 575 // Setup the indexes correctly 576 if existing != nil { 577 deployment.CreateIndex = existing.(*structs.Deployment).CreateIndex 578 deployment.ModifyIndex = index 579 } else { 580 deployment.CreateIndex = index 581 deployment.ModifyIndex = index 582 } 583 584 // Insert the deployment 585 if err := txn.Insert("deployment", deployment); err != nil { 586 return err 587 } 588 589 // Update the indexes table for deployment 590 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 591 return fmt.Errorf("index update failed: %v", err) 592 } 593 594 // If the deployment is being marked as complete, set the job to stable. 595 if deployment.Status == structs.DeploymentStatusSuccessful { 596 if err := s.updateJobStabilityImpl(index, deployment.Namespace, deployment.JobID, deployment.JobVersion, true, txn); err != nil { 597 return fmt.Errorf("failed to update job stability: %v", err) 598 } 599 } 600 601 return nil 602 } 603 604 func (s *StateStore) Deployments(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { 605 txn := s.db.ReadTxn() 606 607 var it memdb.ResultIterator 608 var err error 609 610 switch sort { 611 case SortReverse: 612 it, err = txn.GetReverse("deployment", "create") 613 default: 614 it, err = txn.Get("deployment", "create") 615 } 616 617 if err != nil { 618 return nil, err 619 } 620 621 ws.Add(it.WatchCh()) 622 623 return it, nil 624 } 625 626 func (s *StateStore) DeploymentsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 627 txn := s.db.ReadTxn() 628 629 // Walk the entire deployments table 630 iter, err := txn.Get("deployment", "namespace", namespace) 631 if err != nil { 632 return nil, err 633 } 634 635 ws.Add(iter.WatchCh()) 636 return iter, nil 637 } 638 639 func (s *StateStore) DeploymentsByNamespaceOrdered(ws memdb.WatchSet, namespace string, sort SortOption) (memdb.ResultIterator, error) { 640 txn := s.db.ReadTxn() 641 642 var ( 643 it memdb.ResultIterator 644 err error 645 exact = terminate(namespace) 646 ) 647 648 switch sort { 649 case SortReverse: 650 it, err = txn.GetReverse("deployment", "namespace_create_prefix", exact) 651 default: 652 it, err = txn.Get("deployment", "namespace_create_prefix", exact) 653 } 654 655 if err != nil { 656 return nil, err 657 } 658 659 ws.Add(it.WatchCh()) 660 661 return it, nil 662 } 663 664 func (s *StateStore) DeploymentsByIDPrefix(ws memdb.WatchSet, namespace, deploymentID string, sort SortOption) (memdb.ResultIterator, error) { 665 txn := s.db.ReadTxn() 666 667 var iter memdb.ResultIterator 668 var err error 669 670 // Walk the entire deployments table 671 switch sort { 672 case SortReverse: 673 iter, err = txn.GetReverse("deployment", "id_prefix", deploymentID) 674 default: 675 iter, err = txn.Get("deployment", "id_prefix", deploymentID) 676 } 677 if err != nil { 678 return nil, err 679 } 680 681 ws.Add(iter.WatchCh()) 682 683 // Wrap the iterator in a filter 684 wrap := memdb.NewFilterIterator(iter, deploymentNamespaceFilter(namespace)) 685 return wrap, nil 686 } 687 688 // deploymentNamespaceFilter returns a filter function that filters all 689 // deployment not in the given namespace. 690 func deploymentNamespaceFilter(namespace string) func(interface{}) bool { 691 return func(raw interface{}) bool { 692 d, ok := raw.(*structs.Deployment) 693 if !ok { 694 return true 695 } 696 697 return namespace != structs.AllNamespacesSentinel && 698 d.Namespace != namespace 699 } 700 } 701 702 func (s *StateStore) DeploymentByID(ws memdb.WatchSet, deploymentID string) (*structs.Deployment, error) { 703 txn := s.db.ReadTxn() 704 return s.deploymentByIDImpl(ws, deploymentID, txn) 705 } 706 707 func (s *StateStore) deploymentByIDImpl(ws memdb.WatchSet, deploymentID string, txn *txn) (*structs.Deployment, error) { 708 watchCh, existing, err := txn.FirstWatch("deployment", "id", deploymentID) 709 if err != nil { 710 return nil, fmt.Errorf("deployment lookup failed: %v", err) 711 } 712 ws.Add(watchCh) 713 714 if existing != nil { 715 return existing.(*structs.Deployment), nil 716 } 717 718 return nil, nil 719 } 720 721 func (s *StateStore) DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Deployment, error) { 722 txn := s.db.ReadTxn() 723 724 var job *structs.Job 725 // Read job from state store 726 _, existing, err := txn.FirstWatch("jobs", "id", namespace, jobID) 727 if err != nil { 728 return nil, fmt.Errorf("job lookup failed: %v", err) 729 } 730 if existing != nil { 731 job = existing.(*structs.Job) 732 } 733 734 // Get an iterator over the deployments 735 iter, err := txn.Get("deployment", "job", namespace, jobID) 736 if err != nil { 737 return nil, err 738 } 739 740 ws.Add(iter.WatchCh()) 741 742 var out []*structs.Deployment 743 for { 744 raw := iter.Next() 745 if raw == nil { 746 break 747 } 748 d := raw.(*structs.Deployment) 749 750 // If the allocation belongs to a job with the same ID but a different 751 // create index and we are not getting all the allocations whose Jobs 752 // matches the same Job ID then we skip it 753 if !all && job != nil && d.JobCreateIndex != job.CreateIndex { 754 continue 755 } 756 out = append(out, d) 757 } 758 759 return out, nil 760 } 761 762 // LatestDeploymentByJobID returns the latest deployment for the given job. The 763 // latest is determined strictly by CreateIndex. 764 func (s *StateStore) LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error) { 765 txn := s.db.ReadTxn() 766 767 // Get an iterator over the deployments 768 iter, err := txn.Get("deployment", "job", namespace, jobID) 769 if err != nil { 770 return nil, err 771 } 772 773 ws.Add(iter.WatchCh()) 774 775 var out *structs.Deployment 776 for { 777 raw := iter.Next() 778 if raw == nil { 779 break 780 } 781 782 d := raw.(*structs.Deployment) 783 if out == nil || out.CreateIndex < d.CreateIndex { 784 out = d 785 } 786 } 787 788 return out, nil 789 } 790 791 // DeleteDeployment is used to delete a set of deployments by ID 792 func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) error { 793 txn := s.db.WriteTxn(index) 794 defer txn.Abort() 795 796 if len(deploymentIDs) == 0 { 797 return nil 798 } 799 800 for _, deploymentID := range deploymentIDs { 801 // Lookup the deployment 802 existing, err := txn.First("deployment", "id", deploymentID) 803 if err != nil { 804 return fmt.Errorf("deployment lookup failed: %v", err) 805 } 806 if existing == nil { 807 return fmt.Errorf("deployment not found") 808 } 809 810 // Delete the deployment 811 if err := txn.Delete("deployment", existing); err != nil { 812 return fmt.Errorf("deployment delete failed: %v", err) 813 } 814 } 815 816 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 817 return fmt.Errorf("index update failed: %v", err) 818 } 819 820 return txn.Commit() 821 } 822 823 // UpsertScalingEvent is used to insert a new scaling event. 824 // Only the most recent JobTrackedScalingEvents will be kept. 825 func (s *StateStore) UpsertScalingEvent(index uint64, req *structs.ScalingEventRequest) error { 826 txn := s.db.WriteTxn(index) 827 defer txn.Abort() 828 829 // Get the existing events 830 existing, err := txn.First("scaling_event", "id", req.Namespace, req.JobID) 831 if err != nil { 832 return fmt.Errorf("scaling event lookup failed: %v", err) 833 } 834 835 var jobEvents *structs.JobScalingEvents 836 if existing != nil { 837 jobEvents = existing.(*structs.JobScalingEvents) 838 } else { 839 jobEvents = &structs.JobScalingEvents{ 840 Namespace: req.Namespace, 841 JobID: req.JobID, 842 ScalingEvents: make(map[string][]*structs.ScalingEvent), 843 } 844 } 845 846 jobEvents.ModifyIndex = index 847 req.ScalingEvent.CreateIndex = index 848 849 events := jobEvents.ScalingEvents[req.TaskGroup] 850 // Prepend this latest event 851 events = append( 852 []*structs.ScalingEvent{req.ScalingEvent}, 853 events..., 854 ) 855 // Truncate older events 856 if len(events) > structs.JobTrackedScalingEvents { 857 events = events[0:structs.JobTrackedScalingEvents] 858 } 859 jobEvents.ScalingEvents[req.TaskGroup] = events 860 861 // Insert the new event 862 if err := txn.Insert("scaling_event", jobEvents); err != nil { 863 return fmt.Errorf("scaling event insert failed: %v", err) 864 } 865 866 // Update the indexes table for scaling_event 867 if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil { 868 return fmt.Errorf("index update failed: %v", err) 869 } 870 871 return txn.Commit() 872 } 873 874 // ScalingEvents returns an iterator over all the job scaling events 875 func (s *StateStore) ScalingEvents(ws memdb.WatchSet) (memdb.ResultIterator, error) { 876 txn := s.db.ReadTxn() 877 878 // Walk the entire scaling_event table 879 iter, err := txn.Get("scaling_event", "id") 880 if err != nil { 881 return nil, err 882 } 883 884 ws.Add(iter.WatchCh()) 885 886 return iter, nil 887 } 888 889 func (s *StateStore) ScalingEventsByJob(ws memdb.WatchSet, namespace, jobID string) (map[string][]*structs.ScalingEvent, uint64, error) { 890 txn := s.db.ReadTxn() 891 892 watchCh, existing, err := txn.FirstWatch("scaling_event", "id", namespace, jobID) 893 if err != nil { 894 return nil, 0, fmt.Errorf("job scaling events lookup failed: %v", err) 895 } 896 ws.Add(watchCh) 897 898 if existing != nil { 899 events := existing.(*structs.JobScalingEvents) 900 return events.ScalingEvents, events.ModifyIndex, nil 901 } 902 return nil, 0, nil 903 } 904 905 // UpsertNode is used to register a node or update a node definition 906 // This is assumed to be triggered by the client, so we retain the value 907 // of drain/eligibility which is set by the scheduler. 908 func (s *StateStore) UpsertNode(msgType structs.MessageType, index uint64, node *structs.Node, opts ...NodeUpsertOption) error { 909 txn := s.db.WriteTxnMsgT(msgType, index) 910 defer txn.Abort() 911 912 for _, opt := range opts { 913 // Create node pool if necessary. 914 if opt == NodeUpsertWithNodePool && node.NodePool != "" { 915 _, err := s.fetchOrCreateNodePoolTxn(txn, index, node.NodePool) 916 if err != nil { 917 return err 918 } 919 } 920 } 921 922 err := upsertNodeTxn(txn, index, node) 923 if err != nil { 924 return nil 925 } 926 return txn.Commit() 927 } 928 929 func upsertNodeTxn(txn *txn, index uint64, node *structs.Node) error { 930 // Check if the node already exists 931 existing, err := txn.First("nodes", "id", node.ID) 932 if err != nil { 933 return fmt.Errorf("node lookup failed: %v", err) 934 } 935 936 // Setup the indexes correctly 937 if existing != nil { 938 exist := existing.(*structs.Node) 939 node.CreateIndex = exist.CreateIndex 940 node.ModifyIndex = index 941 942 // Update last missed heartbeat if the node became unresponsive. 943 if !exist.UnresponsiveStatus() && node.UnresponsiveStatus() { 944 node.LastMissedHeartbeatIndex = index 945 } 946 947 // Retain node events that have already been set on the node 948 node.Events = exist.Events 949 950 // If we are transitioning from down, record the re-registration 951 if exist.Status == structs.NodeStatusDown && node.Status != structs.NodeStatusDown { 952 appendNodeEvents(index, node, []*structs.NodeEvent{ 953 structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster). 954 SetMessage(NodeRegisterEventReregistered). 955 SetTimestamp(time.Unix(node.StatusUpdatedAt, 0))}) 956 } 957 958 node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility 959 node.DrainStrategy = exist.DrainStrategy // Retain the drain strategy 960 node.LastDrain = exist.LastDrain // Retain the drain metadata 961 962 // Retain the last index the node missed a heartbeat. 963 if node.LastMissedHeartbeatIndex < exist.LastMissedHeartbeatIndex { 964 node.LastMissedHeartbeatIndex = exist.LastMissedHeartbeatIndex 965 } 966 967 // Retain the last index the node updated its allocs. 968 if node.LastAllocUpdateIndex < exist.LastAllocUpdateIndex { 969 node.LastAllocUpdateIndex = exist.LastAllocUpdateIndex 970 } 971 } else { 972 // Because this is the first time the node is being registered, we should 973 // also create a node registration event 974 nodeEvent := structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster). 975 SetMessage(NodeRegisterEventRegistered). 976 SetTimestamp(time.Unix(node.StatusUpdatedAt, 0)) 977 node.Events = []*structs.NodeEvent{nodeEvent} 978 node.CreateIndex = index 979 node.ModifyIndex = index 980 } 981 982 // Insert the node 983 if err := txn.Insert("nodes", node); err != nil { 984 return fmt.Errorf("node insert failed: %v", err) 985 } 986 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 987 return fmt.Errorf("index update failed: %v", err) 988 } 989 if err := upsertCSIPluginsForNode(txn, node, index); err != nil { 990 return fmt.Errorf("csi plugin update failed: %v", err) 991 } 992 993 return nil 994 } 995 996 // DeleteNode deregisters a batch of nodes 997 func (s *StateStore) DeleteNode(msgType structs.MessageType, index uint64, nodes []string) error { 998 txn := s.db.WriteTxn(index) 999 defer txn.Abort() 1000 1001 err := deleteNodeTxn(txn, index, nodes) 1002 if err != nil { 1003 return nil 1004 } 1005 return txn.Commit() 1006 } 1007 1008 func deleteNodeTxn(txn *txn, index uint64, nodes []string) error { 1009 if len(nodes) == 0 { 1010 return fmt.Errorf("node ids missing") 1011 } 1012 1013 for _, nodeID := range nodes { 1014 existing, err := txn.First("nodes", "id", nodeID) 1015 if err != nil { 1016 return fmt.Errorf("node lookup failed: %s: %v", nodeID, err) 1017 } 1018 if existing == nil { 1019 return fmt.Errorf("node not found: %s", nodeID) 1020 } 1021 1022 // Delete the node 1023 if err := txn.Delete("nodes", existing); err != nil { 1024 return fmt.Errorf("node delete failed: %s: %v", nodeID, err) 1025 } 1026 1027 node := existing.(*structs.Node) 1028 if err := deleteNodeCSIPlugins(txn, node, index); err != nil { 1029 return fmt.Errorf("csi plugin delete failed: %v", err) 1030 } 1031 } 1032 1033 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1034 return fmt.Errorf("index update failed: %v", err) 1035 } 1036 1037 return nil 1038 } 1039 1040 // UpdateNodeStatus is used to update the status of a node 1041 func (s *StateStore) UpdateNodeStatus(msgType structs.MessageType, index uint64, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error { 1042 txn := s.db.WriteTxnMsgT(msgType, index) 1043 defer txn.Abort() 1044 1045 if err := s.updateNodeStatusTxn(txn, nodeID, status, updatedAt, event); err != nil { 1046 return err 1047 } 1048 1049 return txn.Commit() 1050 } 1051 1052 func (s *StateStore) updateNodeStatusTxn(txn *txn, nodeID, status string, updatedAt int64, event *structs.NodeEvent) error { 1053 1054 // Lookup the node 1055 existing, err := txn.First("nodes", "id", nodeID) 1056 if err != nil { 1057 return fmt.Errorf("node lookup failed: %v", err) 1058 } 1059 if existing == nil { 1060 return fmt.Errorf("node not found") 1061 } 1062 1063 // Copy the existing node 1064 existingNode := existing.(*structs.Node) 1065 copyNode := existingNode.Copy() 1066 copyNode.StatusUpdatedAt = updatedAt 1067 1068 // Add the event if given 1069 if event != nil { 1070 appendNodeEvents(txn.Index, copyNode, []*structs.NodeEvent{event}) 1071 } 1072 1073 // Update the status in the copy 1074 copyNode.Status = status 1075 copyNode.ModifyIndex = txn.Index 1076 1077 // Update last missed heartbeat if the node became unresponsive or reset it 1078 // zero if the node became ready. 1079 if !existingNode.UnresponsiveStatus() && copyNode.UnresponsiveStatus() { 1080 copyNode.LastMissedHeartbeatIndex = txn.Index 1081 } else if existingNode.Status != structs.NodeStatusReady && 1082 copyNode.Status == structs.NodeStatusReady { 1083 copyNode.LastMissedHeartbeatIndex = 0 1084 } 1085 1086 // Insert the node 1087 if err := txn.Insert("nodes", copyNode); err != nil { 1088 return fmt.Errorf("node update failed: %v", err) 1089 } 1090 if err := txn.Insert("index", &IndexEntry{"nodes", txn.Index}); err != nil { 1091 return fmt.Errorf("index update failed: %v", err) 1092 } 1093 return nil 1094 } 1095 1096 // BatchUpdateNodeDrain is used to update the drain of a node set of nodes. 1097 // This is currently only called when node drain is completed by the drainer. 1098 func (s *StateStore) BatchUpdateNodeDrain(msgType structs.MessageType, index uint64, updatedAt int64, 1099 updates map[string]*structs.DrainUpdate, events map[string]*structs.NodeEvent) error { 1100 txn := s.db.WriteTxnMsgT(msgType, index) 1101 defer txn.Abort() 1102 for node, update := range updates { 1103 if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible, updatedAt, 1104 events[node], nil, "", true); err != nil { 1105 return err 1106 } 1107 } 1108 return txn.Commit() 1109 } 1110 1111 // UpdateNodeDrain is used to update the drain of a node 1112 func (s *StateStore) UpdateNodeDrain(msgType structs.MessageType, index uint64, nodeID string, 1113 drain *structs.DrainStrategy, markEligible bool, updatedAt int64, 1114 event *structs.NodeEvent, drainMeta map[string]string, accessorId string) error { 1115 1116 txn := s.db.WriteTxnMsgT(msgType, index) 1117 defer txn.Abort() 1118 if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible, updatedAt, event, 1119 drainMeta, accessorId, false); err != nil { 1120 1121 return err 1122 } 1123 return txn.Commit() 1124 } 1125 1126 func (s *StateStore) updateNodeDrainImpl(txn *txn, index uint64, nodeID string, 1127 drain *structs.DrainStrategy, markEligible bool, updatedAt int64, 1128 event *structs.NodeEvent, drainMeta map[string]string, accessorId string, 1129 drainCompleted bool) error { 1130 1131 // Lookup the node 1132 existing, err := txn.First("nodes", "id", nodeID) 1133 if err != nil { 1134 return fmt.Errorf("node lookup failed: %v", err) 1135 } 1136 if existing == nil { 1137 return fmt.Errorf("node not found") 1138 } 1139 1140 // Copy the existing node 1141 existingNode := existing.(*structs.Node) 1142 updatedNode := existingNode.Copy() 1143 updatedNode.StatusUpdatedAt = updatedAt 1144 1145 // Add the event if given 1146 if event != nil { 1147 appendNodeEvents(index, updatedNode, []*structs.NodeEvent{event}) 1148 } 1149 1150 // Update the drain in the copy 1151 updatedNode.DrainStrategy = drain 1152 if drain != nil { 1153 updatedNode.SchedulingEligibility = structs.NodeSchedulingIneligible 1154 } else if markEligible { 1155 updatedNode.SchedulingEligibility = structs.NodeSchedulingEligible 1156 } 1157 1158 // Update LastDrain 1159 updateTime := time.Unix(updatedAt, 0) 1160 1161 // if drain strategy isn't set before or after, this wasn't a drain operation 1162 // in that case, we don't care about .LastDrain 1163 drainNoop := existingNode.DrainStrategy == nil && updatedNode.DrainStrategy == nil 1164 // otherwise, when done with this method, updatedNode.LastDrain should be set 1165 // if starting a new drain operation, create a new LastDrain. otherwise, update the existing one. 1166 startedDraining := existingNode.DrainStrategy == nil && updatedNode.DrainStrategy != nil 1167 if !drainNoop { 1168 if startedDraining { 1169 updatedNode.LastDrain = &structs.DrainMetadata{ 1170 StartedAt: updateTime, 1171 Meta: drainMeta, 1172 } 1173 } else if updatedNode.LastDrain == nil { 1174 // if already draining and LastDrain doesn't exist, we need to create a new one 1175 // this could happen if we upgraded to 1.1.x during a drain 1176 updatedNode.LastDrain = &structs.DrainMetadata{ 1177 // we don't have sub-second accuracy on these fields, so truncate this 1178 StartedAt: time.Unix(existingNode.DrainStrategy.StartedAt.Unix(), 0), 1179 Meta: drainMeta, 1180 } 1181 } 1182 1183 updatedNode.LastDrain.UpdatedAt = updateTime 1184 1185 // won't have new metadata on drain complete; keep the existing operator-provided metadata 1186 // also, keep existing if they didn't provide it 1187 if len(drainMeta) != 0 { 1188 updatedNode.LastDrain.Meta = drainMeta 1189 } 1190 1191 // we won't have an accessor ID on drain complete, so don't overwrite the existing one 1192 if accessorId != "" { 1193 updatedNode.LastDrain.AccessorID = accessorId 1194 } 1195 1196 if updatedNode.DrainStrategy != nil { 1197 updatedNode.LastDrain.Status = structs.DrainStatusDraining 1198 } else if drainCompleted { 1199 updatedNode.LastDrain.Status = structs.DrainStatusComplete 1200 } else { 1201 updatedNode.LastDrain.Status = structs.DrainStatusCanceled 1202 } 1203 } 1204 1205 updatedNode.ModifyIndex = index 1206 1207 // Insert the node 1208 if err := txn.Insert("nodes", updatedNode); err != nil { 1209 return fmt.Errorf("node update failed: %v", err) 1210 } 1211 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1212 return fmt.Errorf("index update failed: %v", err) 1213 } 1214 1215 return nil 1216 } 1217 1218 // UpdateNodeEligibility is used to update the scheduling eligibility of a node 1219 func (s *StateStore) UpdateNodeEligibility(msgType structs.MessageType, index uint64, nodeID string, eligibility string, updatedAt int64, event *structs.NodeEvent) error { 1220 txn := s.db.WriteTxnMsgT(msgType, index) 1221 defer txn.Abort() 1222 if err := s.updateNodeEligibilityImpl(index, nodeID, eligibility, updatedAt, event, txn); err != nil { 1223 return err 1224 } 1225 return txn.Commit() 1226 } 1227 1228 func (s *StateStore) updateNodeEligibilityImpl(index uint64, nodeID string, eligibility string, updatedAt int64, event *structs.NodeEvent, txn *txn) error { 1229 // Lookup the node 1230 existing, err := txn.First("nodes", "id", nodeID) 1231 if err != nil { 1232 return fmt.Errorf("node lookup failed: %v", err) 1233 } 1234 if existing == nil { 1235 return fmt.Errorf("node not found") 1236 } 1237 1238 // Copy the existing node 1239 existingNode := existing.(*structs.Node) 1240 copyNode := existingNode.Copy() 1241 copyNode.StatusUpdatedAt = updatedAt 1242 1243 // Add the event if given 1244 if event != nil { 1245 appendNodeEvents(index, copyNode, []*structs.NodeEvent{event}) 1246 } 1247 1248 // Check if this is a valid action 1249 if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible { 1250 return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") 1251 } 1252 1253 // Update the eligibility in the copy 1254 copyNode.SchedulingEligibility = eligibility 1255 copyNode.ModifyIndex = index 1256 1257 // Insert the node 1258 if err := txn.Insert("nodes", copyNode); err != nil { 1259 return fmt.Errorf("node update failed: %v", err) 1260 } 1261 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1262 return fmt.Errorf("index update failed: %v", err) 1263 } 1264 1265 return nil 1266 } 1267 1268 // UpsertNodeEvents adds the node events to the nodes, rotating events as 1269 // necessary. 1270 func (s *StateStore) UpsertNodeEvents(msgType structs.MessageType, index uint64, nodeEvents map[string][]*structs.NodeEvent) error { 1271 txn := s.db.WriteTxnMsgT(msgType, index) 1272 defer txn.Abort() 1273 1274 for nodeID, events := range nodeEvents { 1275 if err := s.upsertNodeEvents(index, nodeID, events, txn); err != nil { 1276 return err 1277 } 1278 } 1279 1280 return txn.Commit() 1281 } 1282 1283 // upsertNodeEvent upserts a node event for a respective node. It also maintains 1284 // that a fixed number of node events are ever stored simultaneously, deleting 1285 // older events once this bound has been reached. 1286 func (s *StateStore) upsertNodeEvents(index uint64, nodeID string, events []*structs.NodeEvent, txn *txn) error { 1287 // Lookup the node 1288 existing, err := txn.First("nodes", "id", nodeID) 1289 if err != nil { 1290 return fmt.Errorf("node lookup failed: %v", err) 1291 } 1292 if existing == nil { 1293 return fmt.Errorf("node not found") 1294 } 1295 1296 // Copy the existing node 1297 existingNode := existing.(*structs.Node) 1298 copyNode := existingNode.Copy() 1299 appendNodeEvents(index, copyNode, events) 1300 1301 // Insert the node 1302 if err := txn.Insert("nodes", copyNode); err != nil { 1303 return fmt.Errorf("node update failed: %v", err) 1304 } 1305 if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { 1306 return fmt.Errorf("index update failed: %v", err) 1307 } 1308 1309 return nil 1310 } 1311 1312 // appendNodeEvents is a helper that takes a node and new events and appends 1313 // them, pruning older events as needed. 1314 func appendNodeEvents(index uint64, node *structs.Node, events []*structs.NodeEvent) { 1315 // Add the events, updating the indexes 1316 for _, e := range events { 1317 e.CreateIndex = index 1318 node.Events = append(node.Events, e) 1319 } 1320 1321 // Keep node events pruned to not exceed the max allowed 1322 if l := len(node.Events); l > structs.MaxRetainedNodeEvents { 1323 delta := l - structs.MaxRetainedNodeEvents 1324 node.Events = node.Events[delta:] 1325 } 1326 } 1327 1328 // upsertCSIPluginsForNode indexes csi plugins for volume retrieval, with health. It's called 1329 // on upsertNodeEvents, so that event driven health changes are updated 1330 func upsertCSIPluginsForNode(txn *txn, node *structs.Node, index uint64) error { 1331 1332 upsertFn := func(info *structs.CSIInfo) error { 1333 raw, err := txn.First("csi_plugins", "id", info.PluginID) 1334 if err != nil { 1335 return fmt.Errorf("csi_plugin lookup error: %s %v", info.PluginID, err) 1336 } 1337 1338 var plug *structs.CSIPlugin 1339 if raw != nil { 1340 plug = raw.(*structs.CSIPlugin).Copy() 1341 } else { 1342 if !info.Healthy { 1343 // we don't want to create new plugins for unhealthy 1344 // allocs, otherwise we'd recreate the plugin when we 1345 // get the update for the alloc becoming terminal 1346 return nil 1347 } 1348 plug = structs.NewCSIPlugin(info.PluginID, index) 1349 } 1350 1351 // the plugin may have been created by the job being updated, in which case 1352 // this data will not be configured, it's only available to the fingerprint 1353 // system 1354 plug.Provider = info.Provider 1355 plug.Version = info.ProviderVersion 1356 1357 err = plug.AddPlugin(node.ID, info) 1358 if err != nil { 1359 return err 1360 } 1361 1362 plug.ModifyIndex = index 1363 1364 err = txn.Insert("csi_plugins", plug) 1365 if err != nil { 1366 return fmt.Errorf("csi_plugins insert error: %v", err) 1367 } 1368 1369 return nil 1370 } 1371 1372 inUseController := map[string]struct{}{} 1373 inUseNode := map[string]struct{}{} 1374 1375 for _, info := range node.CSIControllerPlugins { 1376 err := upsertFn(info) 1377 if err != nil { 1378 return err 1379 } 1380 inUseController[info.PluginID] = struct{}{} 1381 } 1382 1383 for _, info := range node.CSINodePlugins { 1384 err := upsertFn(info) 1385 if err != nil { 1386 return err 1387 } 1388 inUseNode[info.PluginID] = struct{}{} 1389 } 1390 1391 // remove the client node from any plugin that's not 1392 // running on it. 1393 iter, err := txn.Get("csi_plugins", "id") 1394 if err != nil { 1395 return fmt.Errorf("csi_plugins lookup failed: %v", err) 1396 } 1397 for { 1398 raw := iter.Next() 1399 if raw == nil { 1400 break 1401 } 1402 plug, ok := raw.(*structs.CSIPlugin) 1403 if !ok { 1404 continue 1405 } 1406 plug = plug.Copy() 1407 1408 var hadDelete bool 1409 if _, ok := inUseController[plug.ID]; !ok { 1410 if _, asController := plug.Controllers[node.ID]; asController { 1411 err := plug.DeleteNodeForType(node.ID, structs.CSIPluginTypeController) 1412 if err != nil { 1413 return err 1414 } 1415 hadDelete = true 1416 } 1417 } 1418 if _, ok := inUseNode[plug.ID]; !ok { 1419 if _, asNode := plug.Nodes[node.ID]; asNode { 1420 err := plug.DeleteNodeForType(node.ID, structs.CSIPluginTypeNode) 1421 if err != nil { 1422 return err 1423 } 1424 hadDelete = true 1425 } 1426 } 1427 // we check this flag both for performance and to make sure we 1428 // don't delete a plugin when registering a node plugin but 1429 // no controller 1430 if hadDelete { 1431 err = updateOrGCPlugin(index, txn, plug) 1432 if err != nil { 1433 return err 1434 } 1435 } 1436 } 1437 1438 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1439 return fmt.Errorf("index update failed: %v", err) 1440 } 1441 1442 return nil 1443 } 1444 1445 // deleteNodeCSIPlugins cleans up CSIInfo node health status, called in DeleteNode 1446 func deleteNodeCSIPlugins(txn *txn, node *structs.Node, index uint64) error { 1447 if len(node.CSIControllerPlugins) == 0 && len(node.CSINodePlugins) == 0 { 1448 return nil 1449 } 1450 1451 names := map[string]struct{}{} 1452 for _, info := range node.CSIControllerPlugins { 1453 names[info.PluginID] = struct{}{} 1454 } 1455 for _, info := range node.CSINodePlugins { 1456 names[info.PluginID] = struct{}{} 1457 } 1458 1459 for id := range names { 1460 raw, err := txn.First("csi_plugins", "id", id) 1461 if err != nil { 1462 return fmt.Errorf("csi_plugins lookup error %s: %v", id, err) 1463 } 1464 if raw == nil { 1465 // plugin may have been deregistered but we didn't 1466 // update the fingerprint yet 1467 continue 1468 } 1469 1470 plug := raw.(*structs.CSIPlugin).Copy() 1471 err = plug.DeleteNode(node.ID) 1472 if err != nil { 1473 return err 1474 } 1475 err = updateOrGCPlugin(index, txn, plug) 1476 if err != nil { 1477 return err 1478 } 1479 } 1480 1481 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1482 return fmt.Errorf("index update failed: %v", err) 1483 } 1484 1485 return nil 1486 } 1487 1488 // updateOrGCPlugin updates a plugin but will delete it if the plugin is empty 1489 func updateOrGCPlugin(index uint64, txn Txn, plug *structs.CSIPlugin) error { 1490 if plug.IsEmpty() { 1491 err := txn.Delete("csi_plugins", plug) 1492 if err != nil { 1493 return fmt.Errorf("csi_plugins delete error: %v", err) 1494 } 1495 } else { 1496 plug.ModifyIndex = index 1497 err := txn.Insert("csi_plugins", plug) 1498 if err != nil { 1499 return fmt.Errorf("csi_plugins update error %s: %v", plug.ID, err) 1500 } 1501 } 1502 return nil 1503 } 1504 1505 // deleteJobFromPlugins removes the allocations of this job from any plugins the job is 1506 // running, possibly deleting the plugin if it's no longer in use. It's called in DeleteJobTxn 1507 func (s *StateStore) deleteJobFromPlugins(index uint64, txn Txn, job *structs.Job) error { 1508 ws := memdb.NewWatchSet() 1509 summary, err := s.JobSummaryByID(ws, job.Namespace, job.ID) 1510 if err != nil { 1511 return fmt.Errorf("error getting job summary: %v", err) 1512 } 1513 1514 allocs, err := s.AllocsByJob(ws, job.Namespace, job.ID, false) 1515 if err != nil { 1516 return fmt.Errorf("error getting allocations: %v", err) 1517 } 1518 1519 type pair struct { 1520 pluginID string 1521 alloc *structs.Allocation 1522 } 1523 1524 plugAllocs := []*pair{} 1525 found := map[string]struct{}{} 1526 1527 // Find plugins for allocs that belong to this job 1528 for _, a := range allocs { 1529 tg := a.Job.LookupTaskGroup(a.TaskGroup) 1530 found[tg.Name] = struct{}{} 1531 for _, t := range tg.Tasks { 1532 if t.CSIPluginConfig == nil { 1533 continue 1534 } 1535 plugAllocs = append(plugAllocs, &pair{ 1536 pluginID: t.CSIPluginConfig.ID, 1537 alloc: a, 1538 }) 1539 } 1540 } 1541 1542 // Find any plugins that do not yet have allocs for this job 1543 for _, tg := range job.TaskGroups { 1544 if _, ok := found[tg.Name]; ok { 1545 continue 1546 } 1547 1548 for _, t := range tg.Tasks { 1549 if t.CSIPluginConfig == nil { 1550 continue 1551 } 1552 plugAllocs = append(plugAllocs, &pair{ 1553 pluginID: t.CSIPluginConfig.ID, 1554 }) 1555 } 1556 } 1557 1558 plugins := map[string]*structs.CSIPlugin{} 1559 1560 for _, x := range plugAllocs { 1561 plug, ok := plugins[x.pluginID] 1562 1563 if !ok { 1564 plug, err = s.CSIPluginByIDTxn(txn, nil, x.pluginID) 1565 if err != nil { 1566 return fmt.Errorf("error getting plugin: %s, %v", x.pluginID, err) 1567 } 1568 if plug == nil { 1569 // plugin was never successfully registered or has been 1570 // GC'd out from under us 1571 continue 1572 } 1573 // only copy once, so we update the same plugin on each alloc 1574 plugins[x.pluginID] = plug.Copy() 1575 plug = plugins[x.pluginID] 1576 } 1577 1578 if x.alloc == nil { 1579 continue 1580 } 1581 err := plug.DeleteAlloc(x.alloc.ID, x.alloc.NodeID) 1582 if err != nil { 1583 return err 1584 } 1585 } 1586 1587 for _, plug := range plugins { 1588 plug.DeleteJob(job, summary) 1589 err = updateOrGCPlugin(index, txn, plug) 1590 if err != nil { 1591 return err 1592 } 1593 } 1594 1595 if len(plugins) > 0 { 1596 if err = txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 1597 return fmt.Errorf("index update failed: %v", err) 1598 } 1599 } 1600 1601 return nil 1602 } 1603 1604 // NodeByID is used to lookup a node by ID 1605 func (s *StateStore) NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error) { 1606 txn := s.db.ReadTxn() 1607 1608 watchCh, existing, err := txn.FirstWatch("nodes", "id", nodeID) 1609 if err != nil { 1610 return nil, fmt.Errorf("node lookup failed: %v", err) 1611 } 1612 ws.Add(watchCh) 1613 1614 if existing != nil { 1615 return existing.(*structs.Node), nil 1616 } 1617 return nil, nil 1618 } 1619 1620 // NodesByIDPrefix is used to lookup nodes by prefix 1621 func (s *StateStore) NodesByIDPrefix(ws memdb.WatchSet, nodeID string) (memdb.ResultIterator, error) { 1622 txn := s.db.ReadTxn() 1623 1624 iter, err := txn.Get("nodes", "id_prefix", nodeID) 1625 if err != nil { 1626 return nil, fmt.Errorf("node lookup failed: %v", err) 1627 } 1628 ws.Add(iter.WatchCh()) 1629 1630 return iter, nil 1631 } 1632 1633 // NodeBySecretID is used to lookup a node by SecretID 1634 func (s *StateStore) NodeBySecretID(ws memdb.WatchSet, secretID string) (*structs.Node, error) { 1635 txn := s.db.ReadTxn() 1636 1637 watchCh, existing, err := txn.FirstWatch("nodes", "secret_id", secretID) 1638 if err != nil { 1639 return nil, fmt.Errorf("node lookup by SecretID failed: %v", err) 1640 } 1641 ws.Add(watchCh) 1642 1643 if existing != nil { 1644 return existing.(*structs.Node), nil 1645 } 1646 return nil, nil 1647 } 1648 1649 // NodesByNodePool returns an iterator over all nodes that are part of the 1650 // given node pool. 1651 func (s *StateStore) NodesByNodePool(ws memdb.WatchSet, pool string) (memdb.ResultIterator, error) { 1652 txn := s.db.ReadTxn() 1653 1654 iter, err := txn.Get("nodes", "node_pool", pool) 1655 if err != nil { 1656 return nil, err 1657 } 1658 1659 ws.Add(iter.WatchCh()) 1660 return iter, nil 1661 } 1662 1663 // Nodes returns an iterator over all the nodes 1664 func (s *StateStore) Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 1665 txn := s.db.ReadTxn() 1666 1667 // Walk the entire nodes table 1668 iter, err := txn.Get("nodes", "id") 1669 if err != nil { 1670 return nil, err 1671 } 1672 ws.Add(iter.WatchCh()) 1673 return iter, nil 1674 } 1675 1676 // UpsertJob is used to register a job or update a job definition 1677 func (s *StateStore) UpsertJob(msgType structs.MessageType, index uint64, sub *structs.JobSubmission, job *structs.Job) error { 1678 txn := s.db.WriteTxnMsgT(msgType, index) 1679 defer txn.Abort() 1680 if err := s.upsertJobImpl(index, sub, job, false, txn); err != nil { 1681 return err 1682 } 1683 return txn.Commit() 1684 } 1685 1686 // UpsertJobTxn is used to register a job or update a job definition, like UpsertJob, 1687 // but in a transaction. Useful for when making multiple modifications atomically 1688 func (s *StateStore) UpsertJobTxn(index uint64, sub *structs.JobSubmission, job *structs.Job, txn Txn) error { 1689 return s.upsertJobImpl(index, sub, job, false, txn) 1690 } 1691 1692 // upsertJobImpl is the implementation for registering a job or updating a job definition 1693 func (s *StateStore) upsertJobImpl(index uint64, sub *structs.JobSubmission, job *structs.Job, keepVersion bool, txn *txn) error { 1694 // Assert the namespace exists 1695 if exists, err := s.namespaceExists(txn, job.Namespace); err != nil { 1696 return err 1697 } else if !exists { 1698 return fmt.Errorf("job %q is in nonexistent namespace %q", job.ID, job.Namespace) 1699 } 1700 1701 // Upgrade path. 1702 // Assert the node pool is set and exists. 1703 if job.NodePool == "" { 1704 job.NodePool = structs.NodePoolDefault 1705 } 1706 if exists, err := s.nodePoolExists(txn, job.NodePool); err != nil { 1707 return err 1708 } else if !exists { 1709 return fmt.Errorf("job %q is in nonexistent node pool %q", job.ID, job.NodePool) 1710 } 1711 1712 // Check if the job already exists 1713 existing, err := txn.First("jobs", "id", job.Namespace, job.ID) 1714 var existingJob *structs.Job 1715 if err != nil { 1716 return fmt.Errorf("job lookup failed: %v", err) 1717 } 1718 1719 // Setup the indexes correctly 1720 if existing != nil { 1721 job.CreateIndex = existing.(*structs.Job).CreateIndex 1722 job.ModifyIndex = index 1723 1724 existingJob = existing.(*structs.Job) 1725 1726 // Bump the version unless asked to keep it. This should only be done 1727 // when changing an internal field such as Stable. A spec change should 1728 // always come with a version bump 1729 if !keepVersion { 1730 job.JobModifyIndex = index 1731 if job.Version <= existingJob.Version { 1732 if sub == nil { 1733 // in the reversion case we must set the submission to be 1734 // that of the job version we are reverting to 1735 sub, _ = s.jobSubmission(nil, job.Namespace, job.ID, job.Version, txn) 1736 } 1737 job.Version = existingJob.Version + 1 1738 } 1739 } 1740 1741 // Compute the job status 1742 var err error 1743 job.Status, err = s.getJobStatus(txn, job, false) 1744 if err != nil { 1745 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 1746 } 1747 } else { 1748 job.CreateIndex = index 1749 job.ModifyIndex = index 1750 job.JobModifyIndex = index 1751 1752 if err := s.setJobStatus(index, txn, job, false, ""); err != nil { 1753 return fmt.Errorf("setting job status for %q failed: %v", job.ID, err) 1754 } 1755 1756 // Have to get the job again since it could have been updated 1757 updated, err := txn.First("jobs", "id", job.Namespace, job.ID) 1758 if err != nil { 1759 return fmt.Errorf("job lookup failed: %v", err) 1760 } 1761 if updated != nil { 1762 job = updated.(*structs.Job) 1763 } 1764 } 1765 1766 if err := s.updateSummaryWithJob(index, job, txn); err != nil { 1767 return fmt.Errorf("unable to create job summary: %v", err) 1768 } 1769 1770 if err := s.upsertJobVersion(index, job, txn); err != nil { 1771 return fmt.Errorf("unable to upsert job into job_version table: %v", err) 1772 } 1773 1774 if err := s.updateJobScalingPolicies(index, job, txn); err != nil { 1775 return fmt.Errorf("unable to update job scaling policies: %v", err) 1776 } 1777 1778 if err := s.updateJobRecommendations(index, txn, existingJob, job); err != nil { 1779 return fmt.Errorf("unable to update job recommendations: %v", err) 1780 } 1781 1782 if err := s.updateJobCSIPlugins(index, job, existingJob, txn); err != nil { 1783 return fmt.Errorf("unable to update job csi plugins: %v", err) 1784 } 1785 1786 if err := s.updateJobSubmission(index, sub, job.Namespace, job.ID, job.Version, txn); err != nil { 1787 return fmt.Errorf("unable to update job submission: %v", err) 1788 } 1789 1790 // Insert the job 1791 if err := txn.Insert("jobs", job); err != nil { 1792 return fmt.Errorf("job insert failed: %v", err) 1793 } 1794 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1795 return fmt.Errorf("index update failed: %v", err) 1796 } 1797 1798 return nil 1799 } 1800 1801 // DeleteJob is used to deregister a job 1802 func (s *StateStore) DeleteJob(index uint64, namespace, jobID string) error { 1803 txn := s.db.WriteTxn(index) 1804 defer txn.Abort() 1805 1806 err := s.DeleteJobTxn(index, namespace, jobID, txn) 1807 if err == nil { 1808 return txn.Commit() 1809 } 1810 return err 1811 } 1812 1813 // DeleteJobTxn is used to deregister a job, like DeleteJob, 1814 // but in a transaction. Useful for when making multiple modifications atomically 1815 func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn) error { 1816 // Lookup the node 1817 existing, err := txn.First("jobs", "id", namespace, jobID) 1818 if err != nil { 1819 return fmt.Errorf("job lookup failed: %v", err) 1820 } 1821 if existing == nil { 1822 return fmt.Errorf("job not found") 1823 } 1824 1825 // Check if we should update a parent job summary 1826 job := existing.(*structs.Job) 1827 if job.ParentID != "" { 1828 summaryRaw, err := txn.First("job_summary", "id", namespace, job.ParentID) 1829 if err != nil { 1830 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 1831 } 1832 1833 // Only continue if the summary exists. It could not exist if the parent 1834 // job was removed 1835 if summaryRaw != nil { 1836 existing := summaryRaw.(*structs.JobSummary) 1837 pSummary := existing.Copy() 1838 if pSummary.Children != nil { 1839 1840 modified := false 1841 switch job.Status { 1842 case structs.JobStatusPending: 1843 pSummary.Children.Pending-- 1844 pSummary.Children.Dead++ 1845 modified = true 1846 case structs.JobStatusRunning: 1847 pSummary.Children.Running-- 1848 pSummary.Children.Dead++ 1849 modified = true 1850 case structs.JobStatusDead: 1851 default: 1852 return fmt.Errorf("unknown old job status %q", job.Status) 1853 } 1854 1855 if modified { 1856 // Update the modify index 1857 pSummary.ModifyIndex = index 1858 1859 // Insert the summary 1860 if err := txn.Insert("job_summary", pSummary); err != nil { 1861 return fmt.Errorf("job summary insert failed: %v", err) 1862 } 1863 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1864 return fmt.Errorf("index update failed: %v", err) 1865 } 1866 } 1867 } 1868 } 1869 } 1870 1871 // Delete the job 1872 if err := txn.Delete("jobs", existing); err != nil { 1873 return fmt.Errorf("job delete failed: %v", err) 1874 } 1875 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 1876 return fmt.Errorf("index update failed: %v", err) 1877 } 1878 1879 // Delete the job versions 1880 if err := s.deleteJobVersions(index, job, txn); err != nil { 1881 return err 1882 } 1883 1884 // Cleanup plugins registered by this job, before we delete the summary 1885 err = s.deleteJobFromPlugins(index, txn, job) 1886 if err != nil { 1887 return fmt.Errorf("deleting job from plugin: %v", err) 1888 } 1889 1890 // Delete the job summary 1891 if _, err = txn.DeleteAll("job_summary", "id", namespace, jobID); err != nil { 1892 return fmt.Errorf("deleting job summary failed: %v", err) 1893 } 1894 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 1895 return fmt.Errorf("index update failed: %v", err) 1896 } 1897 1898 // Delete the job submission 1899 if err := s.deleteJobSubmission(job, txn); err != nil { 1900 return fmt.Errorf("deleting job submission failed: %v", err) 1901 } 1902 1903 // Delete any remaining job scaling policies 1904 if err := s.deleteJobScalingPolicies(index, job, txn); err != nil { 1905 return fmt.Errorf("deleting job scaling policies failed: %v", err) 1906 } 1907 1908 // Delete any job recommendations 1909 if err := s.deleteRecommendationsByJob(index, txn, job); err != nil { 1910 return fmt.Errorf("deleting job recommendatons failed: %v", err) 1911 } 1912 1913 // Delete the scaling events 1914 if _, err = txn.DeleteAll("scaling_event", "id", namespace, jobID); err != nil { 1915 return fmt.Errorf("deleting job scaling events failed: %v", err) 1916 } 1917 if err := txn.Insert("index", &IndexEntry{"scaling_event", index}); err != nil { 1918 return fmt.Errorf("index update failed: %v", err) 1919 } 1920 1921 return nil 1922 } 1923 1924 // deleteJobScalingPolicies deletes any scaling policies associated with the job 1925 func (s *StateStore) deleteJobScalingPolicies(index uint64, job *structs.Job, txn *txn) error { 1926 iter, err := s.ScalingPoliciesByJobTxn(nil, job.Namespace, job.ID, txn) 1927 if err != nil { 1928 return fmt.Errorf("getting job scaling policies for deletion failed: %v", err) 1929 } 1930 1931 // Put them into a slice so there are no safety concerns while actually 1932 // performing the deletes 1933 policies := []interface{}{} 1934 for { 1935 raw := iter.Next() 1936 if raw == nil { 1937 break 1938 } 1939 policies = append(policies, raw) 1940 } 1941 1942 // Do the deletes 1943 for _, p := range policies { 1944 if err := txn.Delete("scaling_policy", p); err != nil { 1945 return fmt.Errorf("deleting scaling policy failed: %v", err) 1946 } 1947 } 1948 1949 if len(policies) > 0 { 1950 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 1951 return fmt.Errorf("index update failed: %v", err) 1952 } 1953 } 1954 return nil 1955 } 1956 1957 func (s *StateStore) deleteJobSubmission(job *structs.Job, txn *txn) error { 1958 // find submissions associated with job 1959 remove := *set.NewHashSet[*structs.JobSubmission, string](structs.JobTrackedVersions) 1960 1961 iter, err := txn.Get("job_submission", "id_prefix", job.Namespace, job.ID) 1962 if err != nil { 1963 return err 1964 } 1965 1966 for { 1967 obj := iter.Next() 1968 if obj == nil { 1969 break 1970 } 1971 sub := obj.(*structs.JobSubmission) 1972 1973 // iterating by prefix; ensure we have an exact match 1974 if sub.Namespace == job.Namespace && sub.JobID == job.ID { 1975 remove.Insert(sub) 1976 } 1977 } 1978 1979 // now delete the submissions we found associated with the job 1980 for _, sub := range remove.Slice() { 1981 err := txn.Delete("job_submission", sub) 1982 if err != nil { 1983 return err 1984 } 1985 } 1986 1987 return nil 1988 } 1989 1990 // deleteJobVersions deletes all versions of the given job. 1991 func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *txn) error { 1992 iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID) 1993 if err != nil { 1994 return err 1995 } 1996 1997 // Put them into a slice so there are no safety concerns while actually 1998 // performing the deletes 1999 jobs := []*structs.Job{} 2000 for { 2001 raw := iter.Next() 2002 if raw == nil { 2003 break 2004 } 2005 2006 // Ensure the ID is an exact match 2007 j := raw.(*structs.Job) 2008 if j.ID != job.ID { 2009 continue 2010 } 2011 2012 jobs = append(jobs, j) 2013 } 2014 2015 // Do the deletes 2016 for _, j := range jobs { 2017 if err := txn.Delete("job_version", j); err != nil { 2018 return fmt.Errorf("deleting job versions failed: %v", err) 2019 } 2020 } 2021 2022 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 2023 return fmt.Errorf("index update failed: %v", err) 2024 } 2025 2026 return nil 2027 } 2028 2029 // upsertJobVersion inserts a job into its historic version table and limits the 2030 // number of job versions that are tracked. 2031 func (s *StateStore) upsertJobVersion(index uint64, job *structs.Job, txn *txn) error { 2032 // Insert the job 2033 if err := txn.Insert("job_version", job); err != nil { 2034 return fmt.Errorf("failed to insert job into job_version table: %v", err) 2035 } 2036 2037 if err := txn.Insert("index", &IndexEntry{"job_version", index}); err != nil { 2038 return fmt.Errorf("index update failed: %v", err) 2039 } 2040 2041 // Get all the historic jobs for this ID 2042 all, err := s.jobVersionByID(txn, nil, job.Namespace, job.ID) 2043 if err != nil { 2044 return fmt.Errorf("failed to look up job versions for %q: %v", job.ID, err) 2045 } 2046 2047 // If we are below the limit there is no GCing to be done 2048 if len(all) <= structs.JobTrackedVersions { 2049 return nil 2050 } 2051 2052 // We have to delete a historic job to make room. 2053 // Find index of the highest versioned stable job 2054 stableIdx := -1 2055 for i, j := range all { 2056 if j.Stable { 2057 stableIdx = i 2058 break 2059 } 2060 } 2061 2062 // If the stable job is the oldest version, do a swap to bring it into the 2063 // keep set. 2064 max := structs.JobTrackedVersions 2065 if stableIdx == max { 2066 all[max-1], all[max] = all[max], all[max-1] 2067 } 2068 2069 // Delete the job outside of the set that are being kept. 2070 d := all[max] 2071 if err := txn.Delete("job_version", d); err != nil { 2072 return fmt.Errorf("failed to delete job %v (%d) from job_version", d.ID, d.Version) 2073 } 2074 2075 return nil 2076 } 2077 2078 // JobSubmission returns the original HCL/Variables context of a job, if available. 2079 // 2080 // Note: it is a normal case for the submission context to be unavailable, in which case 2081 // nil is returned with no error. 2082 func (s *StateStore) JobSubmission(ws memdb.WatchSet, namespace, jobName string, version uint64) (*structs.JobSubmission, error) { 2083 txn := s.db.ReadTxn() 2084 return s.jobSubmission(ws, namespace, jobName, version, txn) 2085 } 2086 2087 func (s *StateStore) jobSubmission(ws memdb.WatchSet, namespace, jobName string, version uint64, txn Txn) (*structs.JobSubmission, error) { 2088 watchCh, existing, err := txn.FirstWatch("job_submission", "id", namespace, jobName, version) 2089 if err != nil { 2090 return nil, fmt.Errorf("job submission lookup failed: %v", err) 2091 } 2092 ws.Add(watchCh) 2093 if existing != nil { 2094 return existing.(*structs.JobSubmission), nil 2095 } 2096 return nil, nil 2097 } 2098 2099 // JobByID is used to lookup a job by its ID. JobByID returns the current/latest job 2100 // version. 2101 func (s *StateStore) JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error) { 2102 txn := s.db.ReadTxn() 2103 return s.JobByIDTxn(ws, namespace, id, txn) 2104 } 2105 2106 // JobByIDTxn is used to lookup a job by its ID, like JobByID. JobByID returns the job version 2107 // accessible through in the transaction 2108 func (s *StateStore) JobByIDTxn(ws memdb.WatchSet, namespace, id string, txn Txn) (*structs.Job, error) { 2109 watchCh, existing, err := txn.FirstWatch("jobs", "id", namespace, id) 2110 if err != nil { 2111 return nil, fmt.Errorf("job lookup failed: %v", err) 2112 } 2113 ws.Add(watchCh) 2114 2115 if existing != nil { 2116 return existing.(*structs.Job), nil 2117 } 2118 return nil, nil 2119 } 2120 2121 // JobsByIDPrefix is used to lookup a job by prefix. If querying all namespaces 2122 // the prefix will not be filtered by an index. 2123 func (s *StateStore) JobsByIDPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 2124 if namespace == structs.AllNamespacesSentinel { 2125 return s.jobsByIDPrefixAllNamespaces(ws, id) 2126 } 2127 2128 txn := s.db.ReadTxn() 2129 2130 iter, err := txn.Get("jobs", "id_prefix", namespace, id) 2131 if err != nil { 2132 return nil, fmt.Errorf("job lookup failed: %v", err) 2133 } 2134 2135 ws.Add(iter.WatchCh()) 2136 2137 return iter, nil 2138 } 2139 2140 func (s *StateStore) jobsByIDPrefixAllNamespaces(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 2141 txn := s.db.ReadTxn() 2142 2143 // Walk the entire jobs table 2144 iter, err := txn.Get("jobs", "id") 2145 2146 if err != nil { 2147 return nil, err 2148 } 2149 2150 ws.Add(iter.WatchCh()) 2151 2152 // Filter the iterator by ID prefix 2153 f := func(raw interface{}) bool { 2154 job, ok := raw.(*structs.Job) 2155 if !ok { 2156 return true 2157 } 2158 return !strings.HasPrefix(job.ID, prefix) 2159 } 2160 wrap := memdb.NewFilterIterator(iter, f) 2161 return wrap, nil 2162 } 2163 2164 // JobVersionsByID returns all the tracked versions of a job. 2165 func (s *StateStore) JobVersionsByID(ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 2166 txn := s.db.ReadTxn() 2167 2168 return s.jobVersionByID(txn, ws, namespace, id) 2169 } 2170 2171 // jobVersionByID is the underlying implementation for retrieving all tracked 2172 // versions of a job and is called under an existing transaction. A watch set 2173 // can optionally be passed in to add the job histories to the watch set. 2174 func (s *StateStore) jobVersionByID(txn *txn, ws memdb.WatchSet, namespace, id string) ([]*structs.Job, error) { 2175 // Get all the historic jobs for this ID 2176 iter, err := txn.Get("job_version", "id_prefix", namespace, id) 2177 if err != nil { 2178 return nil, err 2179 } 2180 2181 ws.Add(iter.WatchCh()) 2182 2183 var all []*structs.Job 2184 for { 2185 raw := iter.Next() 2186 if raw == nil { 2187 break 2188 } 2189 2190 // Ensure the ID is an exact match 2191 j := raw.(*structs.Job) 2192 if j.ID != id { 2193 continue 2194 } 2195 2196 all = append(all, j) 2197 } 2198 2199 // Sort in reverse order so that the highest version is first 2200 sort.Slice(all, func(i, j int) bool { 2201 return all[i].Version > all[j].Version 2202 }) 2203 2204 return all, nil 2205 } 2206 2207 // JobByIDAndVersion returns the job identified by its ID and Version. The 2208 // passed watchset may be nil. 2209 func (s *StateStore) JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error) { 2210 txn := s.db.ReadTxn() 2211 return s.jobByIDAndVersionImpl(ws, namespace, id, version, txn) 2212 } 2213 2214 // jobByIDAndVersionImpl returns the job identified by its ID and Version. The 2215 // passed watchset may be nil. 2216 func (s *StateStore) jobByIDAndVersionImpl(ws memdb.WatchSet, namespace, id string, 2217 version uint64, txn *txn) (*structs.Job, error) { 2218 2219 watchCh, existing, err := txn.FirstWatch("job_version", "id", namespace, id, version) 2220 if err != nil { 2221 return nil, err 2222 } 2223 2224 ws.Add(watchCh) 2225 2226 if existing != nil { 2227 job := existing.(*structs.Job) 2228 return job, nil 2229 } 2230 2231 return nil, nil 2232 } 2233 2234 func (s *StateStore) JobVersions(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2235 txn := s.db.ReadTxn() 2236 2237 // Walk the entire deployments table 2238 iter, err := txn.Get("job_version", "id") 2239 if err != nil { 2240 return nil, err 2241 } 2242 2243 ws.Add(iter.WatchCh()) 2244 return iter, nil 2245 } 2246 2247 // Jobs returns an iterator over all the jobs 2248 func (s *StateStore) Jobs(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2249 txn := s.db.ReadTxn() 2250 2251 // Walk the entire jobs table 2252 iter, err := txn.Get("jobs", "id") 2253 if err != nil { 2254 return nil, err 2255 } 2256 2257 ws.Add(iter.WatchCh()) 2258 2259 return iter, nil 2260 } 2261 2262 // JobsByNamespace returns an iterator over all the jobs for the given namespace 2263 func (s *StateStore) JobsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 2264 txn := s.db.ReadTxn() 2265 return s.jobsByNamespaceImpl(ws, namespace, txn) 2266 } 2267 2268 // jobsByNamespaceImpl returns an iterator over all the jobs for the given namespace 2269 func (s *StateStore) jobsByNamespaceImpl(ws memdb.WatchSet, namespace string, txn *txn) (memdb.ResultIterator, error) { 2270 // Walk the entire jobs table 2271 iter, err := txn.Get("jobs", "id_prefix", namespace, "") 2272 if err != nil { 2273 return nil, err 2274 } 2275 2276 ws.Add(iter.WatchCh()) 2277 2278 return iter, nil 2279 } 2280 2281 // JobsByPeriodic returns an iterator over all the periodic or non-periodic jobs. 2282 func (s *StateStore) JobsByPeriodic(ws memdb.WatchSet, periodic bool) (memdb.ResultIterator, error) { 2283 txn := s.db.ReadTxn() 2284 2285 iter, err := txn.Get("jobs", "periodic", periodic) 2286 if err != nil { 2287 return nil, err 2288 } 2289 2290 ws.Add(iter.WatchCh()) 2291 2292 return iter, nil 2293 } 2294 2295 // JobsByScheduler returns an iterator over all the jobs with the specific 2296 // scheduler type. 2297 func (s *StateStore) JobsByScheduler(ws memdb.WatchSet, schedulerType string) (memdb.ResultIterator, error) { 2298 txn := s.db.ReadTxn() 2299 2300 // Return an iterator for jobs with the specific type. 2301 iter, err := txn.Get("jobs", "type", schedulerType) 2302 if err != nil { 2303 return nil, err 2304 } 2305 2306 ws.Add(iter.WatchCh()) 2307 2308 return iter, nil 2309 } 2310 2311 // JobsByGC returns an iterator over all jobs eligible or ineligible for garbage 2312 // collection. 2313 func (s *StateStore) JobsByGC(ws memdb.WatchSet, gc bool) (memdb.ResultIterator, error) { 2314 txn := s.db.ReadTxn() 2315 2316 iter, err := txn.Get("jobs", "gc", gc) 2317 if err != nil { 2318 return nil, err 2319 } 2320 2321 ws.Add(iter.WatchCh()) 2322 2323 return iter, nil 2324 } 2325 2326 // JobsByPool returns an iterator over all jobs in a given node pool. 2327 func (s *StateStore) JobsByPool(ws memdb.WatchSet, pool string) (memdb.ResultIterator, error) { 2328 txn := s.db.ReadTxn() 2329 2330 iter, err := txn.Get("jobs", "pool", pool) 2331 if err != nil { 2332 return nil, err 2333 } 2334 2335 ws.Add(iter.WatchCh()) 2336 2337 return iter, nil 2338 } 2339 2340 // JobSummaryByID returns a job summary object which matches a specific id. 2341 func (s *StateStore) JobSummaryByID(ws memdb.WatchSet, namespace, jobID string) (*structs.JobSummary, error) { 2342 txn := s.db.ReadTxn() 2343 2344 watchCh, existing, err := txn.FirstWatch("job_summary", "id", namespace, jobID) 2345 if err != nil { 2346 return nil, err 2347 } 2348 2349 ws.Add(watchCh) 2350 2351 if existing != nil { 2352 summary := existing.(*structs.JobSummary) 2353 return summary, nil 2354 } 2355 2356 return nil, nil 2357 } 2358 2359 // JobSummaries walks the entire job summary table and returns all the job 2360 // summary objects 2361 func (s *StateStore) JobSummaries(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2362 txn := s.db.ReadTxn() 2363 2364 iter, err := txn.Get("job_summary", "id") 2365 if err != nil { 2366 return nil, err 2367 } 2368 2369 ws.Add(iter.WatchCh()) 2370 2371 return iter, nil 2372 } 2373 2374 // JobSummaryByPrefix is used to look up Job Summary by id prefix 2375 func (s *StateStore) JobSummaryByPrefix(ws memdb.WatchSet, namespace, id string) (memdb.ResultIterator, error) { 2376 txn := s.db.ReadTxn() 2377 2378 iter, err := txn.Get("job_summary", "id_prefix", namespace, id) 2379 if err != nil { 2380 return nil, fmt.Errorf("job_summary lookup failed: %v", err) 2381 } 2382 2383 ws.Add(iter.WatchCh()) 2384 2385 return iter, nil 2386 } 2387 2388 // UpsertCSIVolume inserts a volume in the state store. 2389 func (s *StateStore) UpsertCSIVolume(index uint64, volumes []*structs.CSIVolume) error { 2390 txn := s.db.WriteTxn(index) 2391 defer txn.Abort() 2392 2393 for _, v := range volumes { 2394 if exists, err := s.namespaceExists(txn, v.Namespace); err != nil { 2395 return err 2396 } else if !exists { 2397 return fmt.Errorf("volume %s is in nonexistent namespace %s", v.ID, v.Namespace) 2398 } 2399 2400 obj, err := txn.First("csi_volumes", "id", v.Namespace, v.ID) 2401 if err != nil { 2402 return fmt.Errorf("volume existence check error: %v", err) 2403 } 2404 if obj != nil { 2405 // Allow some properties of a volume to be updated in place, but 2406 // prevent accidentally overwriting important properties, or 2407 // overwriting a volume in use 2408 old := obj.(*structs.CSIVolume) 2409 if old.ExternalID != v.ExternalID || 2410 old.PluginID != v.PluginID || 2411 old.Provider != v.Provider { 2412 return fmt.Errorf("volume identity cannot be updated: %s", v.ID) 2413 } 2414 s.CSIVolumeDenormalize(nil, old.Copy()) 2415 if old.InUse() { 2416 return fmt.Errorf("volume cannot be updated while in use") 2417 } 2418 2419 v.CreateIndex = old.CreateIndex 2420 v.ModifyIndex = index 2421 } else { 2422 v.CreateIndex = index 2423 v.ModifyIndex = index 2424 } 2425 2426 // Allocations are copy on write, so we want to keep the Allocation ID 2427 // but we need to clear the pointer so that we don't store it when we 2428 // write the volume to the state store. We'll get it from the db in 2429 // denormalize. 2430 for allocID := range v.ReadAllocs { 2431 v.ReadAllocs[allocID] = nil 2432 } 2433 for allocID := range v.WriteAllocs { 2434 v.WriteAllocs[allocID] = nil 2435 } 2436 2437 err = txn.Insert("csi_volumes", v) 2438 if err != nil { 2439 return fmt.Errorf("volume insert: %v", err) 2440 } 2441 } 2442 2443 if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2444 return fmt.Errorf("index update failed: %v", err) 2445 } 2446 2447 return txn.Commit() 2448 } 2449 2450 // CSIVolumes returns the unfiltered list of all volumes. Caller should 2451 // snapshot if it wants to also denormalize the plugins. 2452 func (s *StateStore) CSIVolumes(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2453 txn := s.db.ReadTxn() 2454 defer txn.Abort() 2455 2456 iter, err := txn.Get("csi_volumes", "id") 2457 if err != nil { 2458 return nil, fmt.Errorf("csi_volumes lookup failed: %v", err) 2459 } 2460 2461 ws.Add(iter.WatchCh()) 2462 2463 return iter, nil 2464 } 2465 2466 // CSIVolumeByID is used to lookup a single volume. Returns a copy of the 2467 // volume because its plugins and allocations are denormalized to provide 2468 // accurate Health. 2469 func (s *StateStore) CSIVolumeByID(ws memdb.WatchSet, namespace, id string) (*structs.CSIVolume, error) { 2470 txn := s.db.ReadTxn() 2471 2472 watchCh, obj, err := txn.FirstWatch("csi_volumes", "id", namespace, id) 2473 if err != nil { 2474 return nil, fmt.Errorf("volume lookup failed for %s: %v", id, err) 2475 } 2476 ws.Add(watchCh) 2477 2478 if obj == nil { 2479 return nil, nil 2480 } 2481 vol := obj.(*structs.CSIVolume) 2482 2483 // we return the volume with the plugins denormalized by default, 2484 // because the scheduler needs them for feasibility checking 2485 return s.csiVolumeDenormalizePluginsTxn(txn, vol.Copy()) 2486 } 2487 2488 // CSIVolumesByPluginID looks up csi_volumes by pluginID. Caller should 2489 // snapshot if it wants to also denormalize the plugins. 2490 func (s *StateStore) CSIVolumesByPluginID(ws memdb.WatchSet, namespace, prefix, pluginID string) (memdb.ResultIterator, error) { 2491 txn := s.db.ReadTxn() 2492 2493 iter, err := txn.Get("csi_volumes", "plugin_id", pluginID) 2494 if err != nil { 2495 return nil, fmt.Errorf("volume lookup failed: %v", err) 2496 } 2497 2498 // Filter the iterator by namespace 2499 f := func(raw interface{}) bool { 2500 v, ok := raw.(*structs.CSIVolume) 2501 if !ok { 2502 return false 2503 } 2504 return v.Namespace != namespace && strings.HasPrefix(v.ID, prefix) 2505 } 2506 2507 wrap := memdb.NewFilterIterator(iter, f) 2508 return wrap, nil 2509 } 2510 2511 // CSIVolumesByIDPrefix supports search. Caller should snapshot if it wants to 2512 // also denormalize the plugins. If using a prefix with the wildcard namespace, 2513 // the results will not use the index prefix. 2514 func (s *StateStore) CSIVolumesByIDPrefix(ws memdb.WatchSet, namespace, volumeID string) (memdb.ResultIterator, error) { 2515 if namespace == structs.AllNamespacesSentinel { 2516 return s.csiVolumeByIDPrefixAllNamespaces(ws, volumeID) 2517 } 2518 2519 txn := s.db.ReadTxn() 2520 2521 iter, err := txn.Get("csi_volumes", "id_prefix", namespace, volumeID) 2522 if err != nil { 2523 return nil, err 2524 } 2525 2526 ws.Add(iter.WatchCh()) 2527 2528 return iter, nil 2529 } 2530 2531 func (s *StateStore) csiVolumeByIDPrefixAllNamespaces(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 2532 txn := s.db.ReadTxn() 2533 2534 // Walk the entire csi_volumes table 2535 iter, err := txn.Get("csi_volumes", "id") 2536 2537 if err != nil { 2538 return nil, err 2539 } 2540 2541 ws.Add(iter.WatchCh()) 2542 2543 // Filter the iterator by ID prefix 2544 f := func(raw interface{}) bool { 2545 v, ok := raw.(*structs.CSIVolume) 2546 if !ok { 2547 return false 2548 } 2549 return !strings.HasPrefix(v.ID, prefix) 2550 } 2551 wrap := memdb.NewFilterIterator(iter, f) 2552 return wrap, nil 2553 } 2554 2555 // CSIVolumesByNodeID looks up CSIVolumes in use on a node. Caller should 2556 // snapshot if it wants to also denormalize the plugins. 2557 func (s *StateStore) CSIVolumesByNodeID(ws memdb.WatchSet, prefix, nodeID string) (memdb.ResultIterator, error) { 2558 allocs, err := s.AllocsByNode(ws, nodeID) 2559 if err != nil { 2560 return nil, fmt.Errorf("alloc lookup failed: %v", err) 2561 } 2562 2563 // Find volume ids for CSI volumes in running allocs, or allocs that we desire to run 2564 ids := map[string]string{} // Map volumeID to Namespace 2565 for _, a := range allocs { 2566 tg := a.Job.LookupTaskGroup(a.TaskGroup) 2567 2568 if !(a.DesiredStatus == structs.AllocDesiredStatusRun || 2569 a.ClientStatus == structs.AllocClientStatusRunning) || 2570 len(tg.Volumes) == 0 { 2571 continue 2572 } 2573 2574 for _, v := range tg.Volumes { 2575 if v.Type != structs.VolumeTypeCSI { 2576 continue 2577 } 2578 ids[v.Source] = a.Namespace 2579 } 2580 } 2581 2582 // Lookup the raw CSIVolumes to match the other list interfaces 2583 iter := NewSliceIterator() 2584 txn := s.db.ReadTxn() 2585 for id, namespace := range ids { 2586 if strings.HasPrefix(id, prefix) { 2587 watchCh, raw, err := txn.FirstWatch("csi_volumes", "id", namespace, id) 2588 if err != nil { 2589 return nil, fmt.Errorf("volume lookup failed: %s %v", id, err) 2590 } 2591 ws.Add(watchCh) 2592 iter.Add(raw) 2593 } 2594 } 2595 2596 return iter, nil 2597 } 2598 2599 // CSIVolumesByNamespace looks up the entire csi_volumes table 2600 func (s *StateStore) CSIVolumesByNamespace(ws memdb.WatchSet, namespace, prefix string) (memdb.ResultIterator, error) { 2601 txn := s.db.ReadTxn() 2602 2603 return s.csiVolumesByNamespaceImpl(txn, ws, namespace, prefix) 2604 } 2605 2606 func (s *StateStore) csiVolumesByNamespaceImpl(txn *txn, ws memdb.WatchSet, namespace, prefix string) (memdb.ResultIterator, error) { 2607 2608 iter, err := txn.Get("csi_volumes", "id_prefix", namespace, prefix) 2609 if err != nil { 2610 return nil, fmt.Errorf("volume lookup failed: %v", err) 2611 } 2612 2613 ws.Add(iter.WatchCh()) 2614 2615 return iter, nil 2616 } 2617 2618 // CSIVolumeClaim updates the volume's claim count and allocation list 2619 func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, claim *structs.CSIVolumeClaim) error { 2620 txn := s.db.WriteTxn(index) 2621 defer txn.Abort() 2622 2623 row, err := txn.First("csi_volumes", "id", namespace, id) 2624 if err != nil { 2625 return fmt.Errorf("volume lookup failed: %s: %v", id, err) 2626 } 2627 if row == nil { 2628 return fmt.Errorf("volume not found: %s", id) 2629 } 2630 2631 orig, ok := row.(*structs.CSIVolume) 2632 if !ok { 2633 return fmt.Errorf("volume row conversion error") 2634 } 2635 2636 var alloc *structs.Allocation 2637 if claim.State == structs.CSIVolumeClaimStateTaken { 2638 alloc, err = s.allocByIDImpl(txn, nil, claim.AllocationID) 2639 if err != nil { 2640 s.logger.Error("AllocByID failed", "error", err) 2641 return fmt.Errorf(structs.ErrUnknownAllocationPrefix) 2642 } 2643 if alloc == nil { 2644 s.logger.Error("AllocByID failed to find alloc", "alloc_id", claim.AllocationID) 2645 if err != nil { 2646 return fmt.Errorf(structs.ErrUnknownAllocationPrefix) 2647 } 2648 } 2649 } 2650 2651 volume, err := s.csiVolumeDenormalizePluginsTxn(txn, orig.Copy()) 2652 if err != nil { 2653 return err 2654 } 2655 volume, err = s.csiVolumeDenormalizeTxn(txn, nil, volume) 2656 if err != nil { 2657 return err 2658 } 2659 2660 // in the case of a job deregistration, there will be no allocation ID 2661 // for the claim but we still want to write an updated index to the volume 2662 // so that volume reaping is triggered 2663 if claim.AllocationID != "" { 2664 err = volume.Claim(claim, alloc) 2665 if err != nil { 2666 return err 2667 } 2668 } 2669 2670 volume.ModifyIndex = index 2671 2672 // Allocations are copy on write, so we want to keep the Allocation ID 2673 // but we need to clear the pointer so that we don't store it when we 2674 // write the volume to the state store. We'll get it from the db in 2675 // denormalize. 2676 for allocID := range volume.ReadAllocs { 2677 volume.ReadAllocs[allocID] = nil 2678 } 2679 for allocID := range volume.WriteAllocs { 2680 volume.WriteAllocs[allocID] = nil 2681 } 2682 2683 if err = txn.Insert("csi_volumes", volume); err != nil { 2684 return fmt.Errorf("volume update failed: %s: %v", id, err) 2685 } 2686 2687 if err = txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2688 return fmt.Errorf("index update failed: %v", err) 2689 } 2690 2691 return txn.Commit() 2692 } 2693 2694 // CSIVolumeDeregister removes the volume from the server 2695 func (s *StateStore) CSIVolumeDeregister(index uint64, namespace string, ids []string, force bool) error { 2696 txn := s.db.WriteTxn(index) 2697 defer txn.Abort() 2698 2699 for _, id := range ids { 2700 existing, err := txn.First("csi_volumes", "id", namespace, id) 2701 if err != nil { 2702 return fmt.Errorf("volume lookup failed: %s: %v", id, err) 2703 } 2704 2705 if existing == nil { 2706 return fmt.Errorf("volume not found: %s", id) 2707 } 2708 2709 vol, ok := existing.(*structs.CSIVolume) 2710 if !ok { 2711 return fmt.Errorf("volume row conversion error: %s", id) 2712 } 2713 2714 // The common case for a volume deregister is when the volume is 2715 // unused, but we can also let an operator intervene in the case where 2716 // allocations have been stopped but claims can't be freed because 2717 // ex. the plugins have all been removed. 2718 if vol.InUse() { 2719 if !force || !s.volSafeToForce(txn, vol) { 2720 return fmt.Errorf("volume in use: %s", id) 2721 } 2722 } 2723 2724 if err = txn.Delete("csi_volumes", existing); err != nil { 2725 return fmt.Errorf("volume delete failed: %s: %v", id, err) 2726 } 2727 } 2728 2729 if err := txn.Insert("index", &IndexEntry{"csi_volumes", index}); err != nil { 2730 return fmt.Errorf("index update failed: %v", err) 2731 } 2732 2733 return txn.Commit() 2734 } 2735 2736 // volSafeToForce checks if the any of the remaining allocations 2737 // are in a non-terminal state. 2738 func (s *StateStore) volSafeToForce(txn Txn, v *structs.CSIVolume) bool { 2739 v = v.Copy() 2740 vol, err := s.csiVolumeDenormalizeTxn(txn, nil, v) 2741 if err != nil { 2742 return false 2743 } 2744 2745 for _, alloc := range vol.ReadAllocs { 2746 if alloc != nil && !alloc.TerminalStatus() { 2747 return false 2748 } 2749 } 2750 for _, alloc := range vol.WriteAllocs { 2751 if alloc != nil && !alloc.TerminalStatus() { 2752 return false 2753 } 2754 } 2755 return true 2756 } 2757 2758 // CSIVolumeDenormalizePlugins returns a CSIVolume with current health and 2759 // plugins, but without allocations. 2760 // Use this for current volume metadata, handling lists of volumes. 2761 // Use CSIVolumeDenormalize for volumes containing both health and current 2762 // allocations. 2763 func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2764 if vol == nil { 2765 return nil, nil 2766 } 2767 txn := s.db.ReadTxn() 2768 defer txn.Abort() 2769 return s.csiVolumeDenormalizePluginsTxn(txn, vol) 2770 } 2771 2772 // csiVolumeDenormalizePluginsTxn implements 2773 // CSIVolumeDenormalizePlugins, inside a transaction. 2774 func (s *StateStore) csiVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2775 if vol == nil { 2776 return nil, nil 2777 } 2778 plug, err := s.CSIPluginByIDTxn(txn, nil, vol.PluginID) 2779 if err != nil { 2780 return nil, fmt.Errorf("plugin lookup error: %s %v", vol.PluginID, err) 2781 } 2782 if plug == nil { 2783 vol.ControllersHealthy = 0 2784 vol.NodesHealthy = 0 2785 vol.Schedulable = false 2786 return vol, nil 2787 } 2788 2789 vol.Provider = plug.Provider 2790 vol.ProviderVersion = plug.Version 2791 vol.ControllerRequired = plug.ControllerRequired 2792 vol.ControllersHealthy = plug.ControllersHealthy 2793 vol.NodesHealthy = plug.NodesHealthy 2794 2795 // This value may be stale, but stale is ok 2796 vol.ControllersExpected = plug.ControllersExpected 2797 vol.NodesExpected = plug.NodesExpected 2798 2799 vol.Schedulable = vol.NodesHealthy > 0 2800 if vol.ControllerRequired { 2801 vol.Schedulable = vol.ControllersHealthy > 0 && vol.Schedulable 2802 } 2803 2804 return vol, nil 2805 } 2806 2807 // CSIVolumeDenormalize returns a CSIVolume with its current 2808 // Allocations and Claims, including creating new PastClaims for 2809 // terminal or garbage collected allocations. This ensures we have a 2810 // consistent state. Note that it mutates the original volume and so 2811 // should always be called on a Copy after reading from the state 2812 // store. 2813 func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2814 txn := s.db.ReadTxn() 2815 return s.csiVolumeDenormalizeTxn(txn, ws, vol) 2816 } 2817 2818 // csiVolumeDenormalizeTxn implements CSIVolumeDenormalize inside a transaction 2819 func (s *StateStore) csiVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { 2820 if vol == nil { 2821 return nil, nil 2822 } 2823 2824 // note: denormalize mutates the maps we pass in! 2825 denormalize := func( 2826 currentAllocs map[string]*structs.Allocation, 2827 currentClaims, pastClaims map[string]*structs.CSIVolumeClaim, 2828 fallbackMode structs.CSIVolumeClaimMode) error { 2829 2830 for id := range currentAllocs { 2831 a, err := s.allocByIDImpl(txn, ws, id) 2832 if err != nil { 2833 return err 2834 } 2835 pastClaim := pastClaims[id] 2836 currentClaim := currentClaims[id] 2837 if currentClaim == nil { 2838 // COMPAT(1.4.0): the CSIVolumeClaim fields were added 2839 // after 0.11.1, so claims made before that may be 2840 // missing this value. No clusters should see this 2841 // anymore, so warn nosily in the logs so that 2842 // operators ask us about it. Remove this block and 2843 // the now-unused fallbackMode parameter, and return 2844 // an error if currentClaim is nil in 1.4.0 2845 s.logger.Warn("volume was missing claim for allocation", 2846 "volume_id", vol.ID, "alloc", id) 2847 currentClaim = &structs.CSIVolumeClaim{ 2848 AllocationID: a.ID, 2849 NodeID: a.NodeID, 2850 Mode: fallbackMode, 2851 State: structs.CSIVolumeClaimStateTaken, 2852 } 2853 currentClaims[id] = currentClaim 2854 } 2855 2856 currentAllocs[id] = a 2857 if (a == nil || a.TerminalStatus()) && pastClaim == nil { 2858 // the alloc is garbage collected but nothing has written a PastClaim, 2859 // so create one now 2860 pastClaim = &structs.CSIVolumeClaim{ 2861 AllocationID: id, 2862 NodeID: currentClaim.NodeID, 2863 Mode: currentClaim.Mode, 2864 State: structs.CSIVolumeClaimStateUnpublishing, 2865 AccessMode: currentClaim.AccessMode, 2866 AttachmentMode: currentClaim.AttachmentMode, 2867 } 2868 pastClaims[id] = pastClaim 2869 } 2870 2871 } 2872 return nil 2873 } 2874 2875 err := denormalize(vol.ReadAllocs, vol.ReadClaims, vol.PastClaims, 2876 structs.CSIVolumeClaimRead) 2877 if err != nil { 2878 return nil, err 2879 } 2880 err = denormalize(vol.WriteAllocs, vol.WriteClaims, vol.PastClaims, 2881 structs.CSIVolumeClaimWrite) 2882 if err != nil { 2883 return nil, err 2884 } 2885 2886 // COMPAT: the AccessMode and AttachmentMode fields were added to claims 2887 // in 1.1.0, so claims made before that may be missing this value. In this 2888 // case, the volume will already have AccessMode/AttachmentMode until it 2889 // no longer has any claims, so set from those values 2890 for _, claim := range vol.ReadClaims { 2891 if claim.AccessMode == "" || claim.AttachmentMode == "" { 2892 claim.AccessMode = vol.AccessMode 2893 claim.AttachmentMode = vol.AttachmentMode 2894 } 2895 } 2896 for _, claim := range vol.WriteClaims { 2897 if claim.AccessMode == "" || claim.AttachmentMode == "" { 2898 claim.AccessMode = vol.AccessMode 2899 claim.AttachmentMode = vol.AttachmentMode 2900 } 2901 } 2902 2903 return vol, nil 2904 } 2905 2906 // CSIPlugins returns the unfiltered list of all plugin health status 2907 func (s *StateStore) CSIPlugins(ws memdb.WatchSet) (memdb.ResultIterator, error) { 2908 txn := s.db.ReadTxn() 2909 defer txn.Abort() 2910 2911 iter, err := txn.Get("csi_plugins", "id") 2912 if err != nil { 2913 return nil, fmt.Errorf("csi_plugins lookup failed: %v", err) 2914 } 2915 2916 ws.Add(iter.WatchCh()) 2917 2918 return iter, nil 2919 } 2920 2921 // CSIPluginsByIDPrefix supports search 2922 func (s *StateStore) CSIPluginsByIDPrefix(ws memdb.WatchSet, pluginID string) (memdb.ResultIterator, error) { 2923 txn := s.db.ReadTxn() 2924 2925 iter, err := txn.Get("csi_plugins", "id_prefix", pluginID) 2926 if err != nil { 2927 return nil, err 2928 } 2929 2930 ws.Add(iter.WatchCh()) 2931 2932 return iter, nil 2933 } 2934 2935 // CSIPluginByID returns a named CSIPlugin. This method creates a new 2936 // transaction so you should not call it from within another transaction. 2937 func (s *StateStore) CSIPluginByID(ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) { 2938 txn := s.db.ReadTxn() 2939 plugin, err := s.CSIPluginByIDTxn(txn, ws, id) 2940 if err != nil { 2941 return nil, err 2942 } 2943 return plugin, nil 2944 } 2945 2946 // CSIPluginByIDTxn returns a named CSIPlugin 2947 func (s *StateStore) CSIPluginByIDTxn(txn Txn, ws memdb.WatchSet, id string) (*structs.CSIPlugin, error) { 2948 2949 watchCh, obj, err := txn.FirstWatch("csi_plugins", "id", id) 2950 if err != nil { 2951 return nil, fmt.Errorf("csi_plugin lookup failed: %s %v", id, err) 2952 } 2953 2954 ws.Add(watchCh) 2955 2956 if obj != nil { 2957 return obj.(*structs.CSIPlugin), nil 2958 } 2959 return nil, nil 2960 } 2961 2962 // CSIPluginDenormalize returns a CSIPlugin with allocation details. Always called on a copy of the plugin. 2963 func (s *StateStore) CSIPluginDenormalize(ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) { 2964 txn := s.db.ReadTxn() 2965 return s.CSIPluginDenormalizeTxn(txn, ws, plug) 2966 } 2967 2968 func (s *StateStore) CSIPluginDenormalizeTxn(txn Txn, ws memdb.WatchSet, plug *structs.CSIPlugin) (*structs.CSIPlugin, error) { 2969 if plug == nil { 2970 return nil, nil 2971 } 2972 2973 // Get the unique list of allocation ids 2974 ids := map[string]struct{}{} 2975 for _, info := range plug.Controllers { 2976 ids[info.AllocID] = struct{}{} 2977 } 2978 for _, info := range plug.Nodes { 2979 ids[info.AllocID] = struct{}{} 2980 } 2981 2982 for id := range ids { 2983 alloc, err := s.allocByIDImpl(txn, ws, id) 2984 if err != nil { 2985 return nil, err 2986 } 2987 if alloc == nil { 2988 continue 2989 } 2990 plug.Allocations = append(plug.Allocations, alloc.Stub(nil)) 2991 } 2992 sort.Slice(plug.Allocations, func(i, j int) bool { 2993 return plug.Allocations[i].ModifyIndex > plug.Allocations[j].ModifyIndex 2994 }) 2995 2996 return plug, nil 2997 } 2998 2999 // UpsertCSIPlugin writes the plugin to the state store. Note: there 3000 // is currently no raft message for this, as it's intended to support 3001 // testing use cases. 3002 func (s *StateStore) UpsertCSIPlugin(index uint64, plug *structs.CSIPlugin) error { 3003 txn := s.db.WriteTxn(index) 3004 defer txn.Abort() 3005 3006 existing, err := txn.First("csi_plugins", "id", plug.ID) 3007 if err != nil { 3008 return fmt.Errorf("csi_plugin lookup error: %s %v", plug.ID, err) 3009 } 3010 3011 plug.ModifyIndex = index 3012 if existing != nil { 3013 plug.CreateIndex = existing.(*structs.CSIPlugin).CreateIndex 3014 } 3015 3016 err = txn.Insert("csi_plugins", plug) 3017 if err != nil { 3018 return fmt.Errorf("csi_plugins insert error: %v", err) 3019 } 3020 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 3021 return fmt.Errorf("index update failed: %v", err) 3022 } 3023 return txn.Commit() 3024 } 3025 3026 // DeleteCSIPlugin deletes the plugin if it's not in use. 3027 func (s *StateStore) DeleteCSIPlugin(index uint64, id string) error { 3028 txn := s.db.WriteTxn(index) 3029 defer txn.Abort() 3030 3031 plug, err := s.CSIPluginByIDTxn(txn, nil, id) 3032 if err != nil { 3033 return err 3034 } 3035 3036 if plug == nil { 3037 return nil 3038 } 3039 3040 plug, err = s.CSIPluginDenormalizeTxn(txn, nil, plug.Copy()) 3041 if err != nil { 3042 return err 3043 } 3044 if !plug.IsEmpty() { 3045 return fmt.Errorf("plugin in use") 3046 } 3047 3048 err = txn.Delete("csi_plugins", plug) 3049 if err != nil { 3050 return fmt.Errorf("csi_plugins delete error: %v", err) 3051 } 3052 return txn.Commit() 3053 } 3054 3055 // UpsertPeriodicLaunch is used to register a launch or update it. 3056 func (s *StateStore) UpsertPeriodicLaunch(index uint64, launch *structs.PeriodicLaunch) error { 3057 txn := s.db.WriteTxn(index) 3058 defer txn.Abort() 3059 3060 // Check if the job already exists 3061 existing, err := txn.First("periodic_launch", "id", launch.Namespace, launch.ID) 3062 if err != nil { 3063 return fmt.Errorf("periodic launch lookup failed: %v", err) 3064 } 3065 3066 // Setup the indexes correctly 3067 if existing != nil { 3068 launch.CreateIndex = existing.(*structs.PeriodicLaunch).CreateIndex 3069 launch.ModifyIndex = index 3070 } else { 3071 launch.CreateIndex = index 3072 launch.ModifyIndex = index 3073 } 3074 3075 // Insert the job 3076 if err := txn.Insert("periodic_launch", launch); err != nil { 3077 return fmt.Errorf("launch insert failed: %v", err) 3078 } 3079 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 3080 return fmt.Errorf("index update failed: %v", err) 3081 } 3082 3083 return txn.Commit() 3084 } 3085 3086 // DeletePeriodicLaunch is used to delete the periodic launch 3087 func (s *StateStore) DeletePeriodicLaunch(index uint64, namespace, jobID string) error { 3088 txn := s.db.WriteTxn(index) 3089 defer txn.Abort() 3090 3091 err := s.DeletePeriodicLaunchTxn(index, namespace, jobID, txn) 3092 if err == nil { 3093 return txn.Commit() 3094 } 3095 return err 3096 } 3097 3098 // DeletePeriodicLaunchTxn is used to delete the periodic launch, like DeletePeriodicLaunch 3099 // but in a transaction. Useful for when making multiple modifications atomically 3100 func (s *StateStore) DeletePeriodicLaunchTxn(index uint64, namespace, jobID string, txn Txn) error { 3101 // Lookup the launch 3102 existing, err := txn.First("periodic_launch", "id", namespace, jobID) 3103 if err != nil { 3104 return fmt.Errorf("launch lookup failed: %v", err) 3105 } 3106 if existing == nil { 3107 return fmt.Errorf("launch not found") 3108 } 3109 3110 // Delete the launch 3111 if err := txn.Delete("periodic_launch", existing); err != nil { 3112 return fmt.Errorf("launch delete failed: %v", err) 3113 } 3114 if err := txn.Insert("index", &IndexEntry{"periodic_launch", index}); err != nil { 3115 return fmt.Errorf("index update failed: %v", err) 3116 } 3117 3118 return nil 3119 } 3120 3121 // PeriodicLaunchByID is used to lookup a periodic launch by the periodic job 3122 // ID. 3123 func (s *StateStore) PeriodicLaunchByID(ws memdb.WatchSet, namespace, id string) (*structs.PeriodicLaunch, error) { 3124 txn := s.db.ReadTxn() 3125 3126 watchCh, existing, err := txn.FirstWatch("periodic_launch", "id", namespace, id) 3127 if err != nil { 3128 return nil, fmt.Errorf("periodic launch lookup failed: %v", err) 3129 } 3130 3131 ws.Add(watchCh) 3132 3133 if existing != nil { 3134 return existing.(*structs.PeriodicLaunch), nil 3135 } 3136 return nil, nil 3137 } 3138 3139 // PeriodicLaunches returns an iterator over all the periodic launches 3140 func (s *StateStore) PeriodicLaunches(ws memdb.WatchSet) (memdb.ResultIterator, error) { 3141 txn := s.db.ReadTxn() 3142 3143 // Walk the entire table 3144 iter, err := txn.Get("periodic_launch", "id") 3145 if err != nil { 3146 return nil, err 3147 } 3148 3149 ws.Add(iter.WatchCh()) 3150 3151 return iter, nil 3152 } 3153 3154 // UpsertEvals is used to upsert a set of evaluations 3155 func (s *StateStore) UpsertEvals(msgType structs.MessageType, index uint64, evals []*structs.Evaluation) error { 3156 txn := s.db.WriteTxnMsgT(msgType, index) 3157 defer txn.Abort() 3158 3159 err := s.UpsertEvalsTxn(index, evals, txn) 3160 if err == nil { 3161 return txn.Commit() 3162 } 3163 return err 3164 } 3165 3166 // UpsertEvalsTxn is used to upsert a set of evaluations, like UpsertEvals but 3167 // in a transaction. Useful for when making multiple modifications atomically. 3168 func (s *StateStore) UpsertEvalsTxn(index uint64, evals []*structs.Evaluation, txn Txn) error { 3169 // Do a nested upsert 3170 jobs := make(map[structs.NamespacedID]string, len(evals)) 3171 for _, eval := range evals { 3172 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 3173 return err 3174 } 3175 3176 tuple := structs.NamespacedID{ 3177 ID: eval.JobID, 3178 Namespace: eval.Namespace, 3179 } 3180 jobs[tuple] = "" 3181 } 3182 3183 // Set the job's status 3184 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 3185 return fmt.Errorf("setting job status failed: %v", err) 3186 } 3187 3188 return nil 3189 } 3190 3191 // nestedUpsertEvaluation is used to nest an evaluation upsert within a transaction 3192 func (s *StateStore) nestedUpsertEval(txn *txn, index uint64, eval *structs.Evaluation) error { 3193 // Lookup the evaluation 3194 existing, err := txn.First("evals", "id", eval.ID) 3195 if err != nil { 3196 return fmt.Errorf("eval lookup failed: %v", err) 3197 } 3198 3199 // Update the indexes 3200 if existing != nil { 3201 eval.CreateIndex = existing.(*structs.Evaluation).CreateIndex 3202 eval.ModifyIndex = index 3203 } else { 3204 eval.CreateIndex = index 3205 eval.ModifyIndex = index 3206 } 3207 3208 // Update the job summary 3209 summaryRaw, err := txn.First("job_summary", "id", eval.Namespace, eval.JobID) 3210 if err != nil { 3211 return fmt.Errorf("job summary lookup failed: %v", err) 3212 } 3213 if summaryRaw != nil { 3214 js := summaryRaw.(*structs.JobSummary).Copy() 3215 hasSummaryChanged := false 3216 for tg, num := range eval.QueuedAllocations { 3217 if summary, ok := js.Summary[tg]; ok { 3218 if summary.Queued != num { 3219 summary.Queued = num 3220 js.Summary[tg] = summary 3221 hasSummaryChanged = true 3222 } 3223 } else { 3224 s.logger.Error("unable to update queued for job and task group", "job_id", eval.JobID, "task_group", tg, "namespace", eval.Namespace) 3225 } 3226 } 3227 3228 // Insert the job summary 3229 if hasSummaryChanged { 3230 js.ModifyIndex = index 3231 if err := txn.Insert("job_summary", js); err != nil { 3232 return fmt.Errorf("job summary insert failed: %v", err) 3233 } 3234 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 3235 return fmt.Errorf("index update failed: %v", err) 3236 } 3237 } 3238 } 3239 3240 // Check if the job has any blocked evaluations and cancel them 3241 if eval.Status == structs.EvalStatusComplete && len(eval.FailedTGAllocs) == 0 { 3242 // Get the blocked evaluation for a job if it exists 3243 iter, err := txn.Get("evals", "job", eval.Namespace, eval.JobID, structs.EvalStatusBlocked) 3244 if err != nil { 3245 return fmt.Errorf("failed to get blocked evals for job %q in namespace %q: %v", eval.JobID, eval.Namespace, err) 3246 } 3247 3248 var blocked []*structs.Evaluation 3249 for { 3250 raw := iter.Next() 3251 if raw == nil { 3252 break 3253 } 3254 blocked = append(blocked, raw.(*structs.Evaluation)) 3255 } 3256 3257 // Go through and update the evals 3258 for _, blockedEval := range blocked { 3259 newEval := blockedEval.Copy() 3260 newEval.Status = structs.EvalStatusCancelled 3261 newEval.StatusDescription = fmt.Sprintf("evaluation %q successful", eval.ID) 3262 newEval.ModifyIndex = index 3263 newEval.ModifyTime = eval.ModifyTime 3264 3265 if err := txn.Insert("evals", newEval); err != nil { 3266 return fmt.Errorf("eval insert failed: %v", err) 3267 } 3268 } 3269 } 3270 3271 // Insert the eval 3272 if err := txn.Insert("evals", eval); err != nil { 3273 return fmt.Errorf("eval insert failed: %v", err) 3274 } 3275 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 3276 return fmt.Errorf("index update failed: %v", err) 3277 } 3278 return nil 3279 } 3280 3281 // updateEvalModifyIndex is used to update the modify index of an evaluation that has been 3282 // through a scheduler pass. This is done as part of plan apply. It ensures that when a subsequent 3283 // scheduler workers process a re-queued evaluation it sees any partial updates from the plan apply. 3284 func (s *StateStore) updateEvalModifyIndex(txn *txn, index uint64, evalID string) error { 3285 // Lookup the evaluation 3286 existing, err := txn.First("evals", "id", evalID) 3287 if err != nil { 3288 return fmt.Errorf("eval lookup failed: %v", err) 3289 } 3290 if existing == nil { 3291 s.logger.Error("unable to find eval", "eval_id", evalID) 3292 return fmt.Errorf("unable to find eval id %q", evalID) 3293 } 3294 eval := existing.(*structs.Evaluation).Copy() 3295 // Update the indexes 3296 eval.ModifyIndex = index 3297 3298 // Insert the eval 3299 if err := txn.Insert("evals", eval); err != nil { 3300 return fmt.Errorf("eval insert failed: %v", err) 3301 } 3302 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 3303 return fmt.Errorf("index update failed: %v", err) 3304 } 3305 return nil 3306 } 3307 3308 // DeleteEvalsByFilter is used to delete all evals that are both safe to delete 3309 // and match a filter. 3310 func (s *StateStore) DeleteEvalsByFilter(index uint64, filterExpr string, pageToken string, perPage int32) error { 3311 txn := s.db.WriteTxn(index) 3312 defer txn.Abort() 3313 3314 // These are always user-initiated, so ensure the eval broker is paused. 3315 _, schedConfig, err := s.schedulerConfigTxn(txn) 3316 if err != nil { 3317 return err 3318 } 3319 if schedConfig == nil || !schedConfig.PauseEvalBroker { 3320 return errors.New("eval broker is enabled; eval broker must be paused to delete evals") 3321 } 3322 3323 filter, err := bexpr.CreateEvaluator(filterExpr) 3324 if err != nil { 3325 return err 3326 } 3327 3328 iter, err := s.Evals(nil, SortDefault) 3329 if err != nil { 3330 return fmt.Errorf("failed to lookup evals: %v", err) 3331 } 3332 3333 // Note: Paginator imports this package for testing so we can't just use 3334 // Paginator 3335 pageCount := int32(0) 3336 3337 for { 3338 if pageCount >= perPage { 3339 break 3340 } 3341 raw := iter.Next() 3342 if raw == nil { 3343 break 3344 } 3345 eval := raw.(*structs.Evaluation) 3346 if eval.ID < pageToken { 3347 continue 3348 } 3349 3350 deleteOk, err := s.EvalIsUserDeleteSafe(nil, eval) 3351 if !deleteOk || err != nil { 3352 continue 3353 } 3354 match, err := filter.Evaluate(eval) 3355 if !match || err != nil { 3356 continue 3357 } 3358 if err := txn.Delete("evals", eval); err != nil { 3359 return fmt.Errorf("eval delete failed: %v", err) 3360 } 3361 pageCount++ 3362 } 3363 3364 err = txn.Commit() 3365 return err 3366 } 3367 3368 // EvalIsUserDeleteSafe ensures an evaluation is safe to delete based on its 3369 // related allocation and job information. This follows similar, but different 3370 // rules to the eval reap checking, to ensure evaluations for running allocs or 3371 // allocs which need the evaluation detail are not deleted. 3372 // 3373 // Returns both a bool and an error so that error in querying the related 3374 // objects can be differentiated from reporting that the eval isn't safe to 3375 // delete. 3376 func (s *StateStore) EvalIsUserDeleteSafe(ws memdb.WatchSet, eval *structs.Evaluation) (bool, error) { 3377 3378 job, err := s.JobByID(ws, eval.Namespace, eval.JobID) 3379 if err != nil { 3380 return false, fmt.Errorf("failed to lookup job for eval: %v", err) 3381 } 3382 3383 allocs, err := s.AllocsByEval(ws, eval.ID) 3384 if err != nil { 3385 return false, fmt.Errorf("failed to lookup eval allocs: %v", err) 3386 } 3387 3388 return isEvalDeleteSafe(allocs, job), nil 3389 } 3390 3391 func isEvalDeleteSafe(allocs []*structs.Allocation, job *structs.Job) bool { 3392 3393 // If the job is deleted, stopped, or dead, all allocs are terminal and 3394 // the eval can be deleted. 3395 if job == nil || job.Stop || job.Status == structs.JobStatusDead { 3396 return true 3397 } 3398 3399 // Iterate the allocations associated to the eval, if any, and check 3400 // whether we can delete the eval. 3401 for _, alloc := range allocs { 3402 3403 // If the allocation is still classed as running on the client, or 3404 // might be, we can't delete. 3405 switch alloc.ClientStatus { 3406 case structs.AllocClientStatusRunning, structs.AllocClientStatusUnknown: 3407 return false 3408 } 3409 3410 // If the alloc hasn't failed then we don't need to consider it for 3411 // rescheduling. Rescheduling needs to copy over information from the 3412 // previous alloc so that it can enforce the reschedule policy. 3413 if alloc.ClientStatus != structs.AllocClientStatusFailed { 3414 continue 3415 } 3416 3417 var reschedulePolicy *structs.ReschedulePolicy 3418 tg := job.LookupTaskGroup(alloc.TaskGroup) 3419 3420 if tg != nil { 3421 reschedulePolicy = tg.ReschedulePolicy 3422 } 3423 3424 // No reschedule policy or rescheduling is disabled 3425 if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) { 3426 continue 3427 } 3428 3429 // The restart tracking information has not been carried forward. 3430 if alloc.NextAllocation == "" { 3431 return false 3432 } 3433 3434 // This task has unlimited rescheduling and the alloc has not been 3435 // replaced, so we can't delete the eval yet. 3436 if reschedulePolicy.Unlimited { 3437 return false 3438 } 3439 3440 // No restarts have been attempted yet. 3441 if alloc.RescheduleTracker == nil || len(alloc.RescheduleTracker.Events) == 0 { 3442 return false 3443 } 3444 } 3445 3446 return true 3447 } 3448 3449 // DeleteEval is used to delete an evaluation 3450 func (s *StateStore) DeleteEval(index uint64, evals, allocs []string, userInitiated bool) error { 3451 txn := s.db.WriteTxn(index) 3452 defer txn.Abort() 3453 3454 // If this deletion has been initiated by an operator, ensure the eval 3455 // broker is paused. 3456 if userInitiated { 3457 _, schedConfig, err := s.schedulerConfigTxn(txn) 3458 if err != nil { 3459 return err 3460 } 3461 if schedConfig == nil || !schedConfig.PauseEvalBroker { 3462 return errors.New("eval broker is enabled; eval broker must be paused to delete evals") 3463 } 3464 } 3465 3466 jobs := make(map[structs.NamespacedID]string, len(evals)) 3467 3468 // evalsTableUpdated and allocsTableUpdated allow us to track whether each 3469 // table has been modified. This allows us to skip updating the index table 3470 // entries if we do not need to. 3471 var evalsTableUpdated, allocsTableUpdated bool 3472 3473 for _, eval := range evals { 3474 existing, err := txn.First("evals", "id", eval) 3475 if err != nil { 3476 return fmt.Errorf("eval lookup failed: %v", err) 3477 } 3478 if existing == nil { 3479 continue 3480 } 3481 if err := txn.Delete("evals", existing); err != nil { 3482 return fmt.Errorf("eval delete failed: %v", err) 3483 } 3484 3485 // Mark that we have made a successful modification to the evals 3486 // table. 3487 evalsTableUpdated = true 3488 3489 eval := existing.(*structs.Evaluation) 3490 3491 tuple := structs.NamespacedID{ 3492 ID: eval.JobID, 3493 Namespace: eval.Namespace, 3494 } 3495 jobs[tuple] = "" 3496 } 3497 3498 for _, alloc := range allocs { 3499 raw, err := txn.First("allocs", "id", alloc) 3500 if err != nil { 3501 return fmt.Errorf("alloc lookup failed: %v", err) 3502 } 3503 if raw == nil { 3504 continue 3505 } 3506 if err := txn.Delete("allocs", raw); err != nil { 3507 return fmt.Errorf("alloc delete failed: %v", err) 3508 } 3509 3510 // Mark that we have made a successful modification to the allocs 3511 // table. 3512 allocsTableUpdated = true 3513 } 3514 3515 // Update the indexes 3516 if evalsTableUpdated { 3517 if err := txn.Insert("index", &IndexEntry{"evals", index}); err != nil { 3518 return fmt.Errorf("index update failed: %v", err) 3519 } 3520 } 3521 if allocsTableUpdated { 3522 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3523 return fmt.Errorf("index update failed: %v", err) 3524 } 3525 } 3526 3527 // Set the job's status 3528 if err := s.setJobStatuses(index, txn, jobs, true); err != nil { 3529 return fmt.Errorf("setting job status failed: %v", err) 3530 } 3531 3532 return txn.Commit() 3533 } 3534 3535 // EvalByID is used to lookup an eval by its ID 3536 func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation, error) { 3537 txn := s.db.ReadTxn() 3538 3539 watchCh, existing, err := txn.FirstWatch("evals", "id", id) 3540 if err != nil { 3541 return nil, fmt.Errorf("eval lookup failed: %v", err) 3542 } 3543 3544 ws.Add(watchCh) 3545 3546 if existing != nil { 3547 return existing.(*structs.Evaluation), nil 3548 } 3549 return nil, nil 3550 } 3551 3552 // EvalsRelatedToID is used to retrieve the evals that are related (next, 3553 // previous, or blocked) to the provided eval ID. 3554 func (s *StateStore) EvalsRelatedToID(ws memdb.WatchSet, id string) ([]*structs.EvaluationStub, error) { 3555 txn := s.db.ReadTxn() 3556 3557 raw, err := txn.First("evals", "id", id) 3558 if err != nil { 3559 return nil, fmt.Errorf("eval lookup failed: %v", err) 3560 } 3561 if raw == nil { 3562 return nil, nil 3563 } 3564 eval := raw.(*structs.Evaluation) 3565 3566 relatedEvals := []*structs.EvaluationStub{} 3567 todo := eval.RelatedIDs() 3568 done := map[string]bool{ 3569 eval.ID: true, // don't place the requested eval in the related list. 3570 } 3571 3572 for len(todo) > 0 { 3573 // Pop the first value from the todo list. 3574 current := todo[0] 3575 todo = todo[1:] 3576 if current == "" { 3577 continue 3578 } 3579 3580 // Skip value if we already have it in the results. 3581 if done[current] { 3582 continue 3583 } 3584 3585 eval, err := s.EvalByID(ws, current) 3586 if err != nil { 3587 return nil, err 3588 } 3589 if eval == nil { 3590 continue 3591 } 3592 3593 todo = append(todo, eval.RelatedIDs()...) 3594 relatedEvals = append(relatedEvals, eval.Stub()) 3595 done[eval.ID] = true 3596 } 3597 3598 return relatedEvals, nil 3599 } 3600 3601 // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular 3602 // namespace 3603 func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string, sort SortOption) (memdb.ResultIterator, error) { 3604 txn := s.db.ReadTxn() 3605 3606 var iter memdb.ResultIterator 3607 var err error 3608 3609 // Get an iterator over all evals by the id prefix 3610 switch sort { 3611 case SortReverse: 3612 iter, err = txn.GetReverse("evals", "id_prefix", id) 3613 default: 3614 iter, err = txn.Get("evals", "id_prefix", id) 3615 } 3616 if err != nil { 3617 return nil, fmt.Errorf("eval lookup failed: %v", err) 3618 } 3619 3620 ws.Add(iter.WatchCh()) 3621 3622 // Wrap the iterator in a filter 3623 wrap := memdb.NewFilterIterator(iter, evalNamespaceFilter(namespace)) 3624 return wrap, nil 3625 } 3626 3627 // evalNamespaceFilter returns a filter function that filters all evaluations 3628 // not in the given namespace. 3629 func evalNamespaceFilter(namespace string) func(interface{}) bool { 3630 return func(raw interface{}) bool { 3631 eval, ok := raw.(*structs.Evaluation) 3632 if !ok { 3633 return true 3634 } 3635 3636 return namespace != structs.AllNamespacesSentinel && 3637 eval.Namespace != namespace 3638 } 3639 } 3640 3641 // EvalsByJob returns all the evaluations by job id 3642 func (s *StateStore) EvalsByJob(ws memdb.WatchSet, namespace, jobID string) ([]*structs.Evaluation, error) { 3643 txn := s.db.ReadTxn() 3644 3645 // Get an iterator over the node allocations 3646 iter, err := txn.Get("evals", "job_prefix", namespace, jobID) 3647 if err != nil { 3648 return nil, err 3649 } 3650 3651 ws.Add(iter.WatchCh()) 3652 3653 var out []*structs.Evaluation 3654 for { 3655 raw := iter.Next() 3656 if raw == nil { 3657 break 3658 } 3659 3660 e := raw.(*structs.Evaluation) 3661 3662 // Filter non-exact matches 3663 if e.JobID != jobID { 3664 continue 3665 } 3666 3667 out = append(out, e) 3668 } 3669 return out, nil 3670 } 3671 3672 // Evals returns an iterator over all the evaluations in ascending or descending 3673 // order of CreationIndex as determined by the reverse parameter. 3674 func (s *StateStore) Evals(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { 3675 txn := s.db.ReadTxn() 3676 3677 var it memdb.ResultIterator 3678 var err error 3679 3680 switch sort { 3681 case SortReverse: 3682 it, err = txn.GetReverse("evals", "create") 3683 default: 3684 it, err = txn.Get("evals", "create") 3685 } 3686 3687 if err != nil { 3688 return nil, err 3689 } 3690 3691 ws.Add(it.WatchCh()) 3692 3693 return it, nil 3694 } 3695 3696 // EvalsByNamespace returns an iterator over all evaluations in no particular 3697 // order. 3698 // 3699 // todo(shoenig): can this be removed? 3700 func (s *StateStore) EvalsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 3701 txn := s.db.ReadTxn() 3702 3703 it, err := txn.Get("evals", "namespace", namespace) 3704 if err != nil { 3705 return nil, err 3706 } 3707 3708 ws.Add(it.WatchCh()) 3709 3710 return it, nil 3711 } 3712 3713 func (s *StateStore) EvalsByNamespaceOrdered(ws memdb.WatchSet, namespace string, sort SortOption) (memdb.ResultIterator, error) { 3714 txn := s.db.ReadTxn() 3715 3716 var ( 3717 it memdb.ResultIterator 3718 err error 3719 exact = terminate(namespace) 3720 ) 3721 3722 switch sort { 3723 case SortReverse: 3724 it, err = txn.GetReverse("evals", "namespace_create_prefix", exact) 3725 default: 3726 it, err = txn.Get("evals", "namespace_create_prefix", exact) 3727 } 3728 3729 if err != nil { 3730 return nil, err 3731 } 3732 3733 ws.Add(it.WatchCh()) 3734 3735 return it, nil 3736 } 3737 3738 // UpdateAllocsFromClient is used to update an allocation based on input 3739 // from a client. While the schedulers are the authority on the allocation for 3740 // most things, some updates are authoritative from the client. Specifically, 3741 // the desired state comes from the schedulers, while the actual state comes 3742 // from clients. 3743 func (s *StateStore) UpdateAllocsFromClient(msgType structs.MessageType, index uint64, allocs []*structs.Allocation) error { 3744 txn := s.db.WriteTxnMsgT(msgType, index) 3745 defer txn.Abort() 3746 3747 // Capture all nodes being affected. Alloc updates from clients are batched 3748 // so this request may include allocs from several nodes. 3749 nodeIDs := set.New[string](1) 3750 3751 // Handle each of the updated allocations 3752 for _, alloc := range allocs { 3753 nodeIDs.Insert(alloc.NodeID) 3754 if err := s.nestedUpdateAllocFromClient(txn, index, alloc); err != nil { 3755 return err 3756 } 3757 } 3758 3759 // Update the indexes 3760 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 3761 return fmt.Errorf("index update failed: %v", err) 3762 } 3763 3764 // Update the index of when nodes last updated their allocs. 3765 for _, nodeID := range nodeIDs.List() { 3766 if err := s.updateClientAllocUpdateIndex(txn, index, nodeID); err != nil { 3767 return fmt.Errorf("node update failed: %v", err) 3768 } 3769 } 3770 3771 return txn.Commit() 3772 } 3773 3774 // nestedUpdateAllocFromClient is used to nest an update of an allocation with client status 3775 func (s *StateStore) nestedUpdateAllocFromClient(txn *txn, index uint64, alloc *structs.Allocation) error { 3776 // Look for existing alloc 3777 existing, err := txn.First("allocs", "id", alloc.ID) 3778 if err != nil { 3779 return fmt.Errorf("alloc lookup failed: %v", err) 3780 } 3781 3782 // Nothing to do if this does not exist 3783 if existing == nil { 3784 return nil 3785 } 3786 exist := existing.(*structs.Allocation) 3787 3788 // Copy everything from the existing allocation 3789 copyAlloc := exist.Copy() 3790 3791 // Pull in anything the client is the authority on 3792 copyAlloc.ClientStatus = alloc.ClientStatus 3793 copyAlloc.ClientDescription = alloc.ClientDescription 3794 copyAlloc.TaskStates = alloc.TaskStates 3795 copyAlloc.NetworkStatus = alloc.NetworkStatus 3796 3797 // The client can only set its deployment health and timestamp, so just take 3798 // those 3799 if copyAlloc.DeploymentStatus != nil && alloc.DeploymentStatus != nil { 3800 oldHasHealthy := copyAlloc.DeploymentStatus.HasHealth() 3801 newHasHealthy := alloc.DeploymentStatus.HasHealth() 3802 3803 // We got new health information from the client 3804 if newHasHealthy && (!oldHasHealthy || *copyAlloc.DeploymentStatus.Healthy != *alloc.DeploymentStatus.Healthy) { 3805 // Updated deployment health and timestamp 3806 copyAlloc.DeploymentStatus.Healthy = pointer.Of(*alloc.DeploymentStatus.Healthy) 3807 copyAlloc.DeploymentStatus.Timestamp = alloc.DeploymentStatus.Timestamp 3808 copyAlloc.DeploymentStatus.ModifyIndex = index 3809 } 3810 } else if alloc.DeploymentStatus != nil { 3811 // First time getting a deployment status so copy everything and just 3812 // set the index 3813 copyAlloc.DeploymentStatus = alloc.DeploymentStatus.Copy() 3814 copyAlloc.DeploymentStatus.ModifyIndex = index 3815 } 3816 3817 // Update the modify index 3818 copyAlloc.ModifyIndex = index 3819 3820 // Update the modify time 3821 copyAlloc.ModifyTime = alloc.ModifyTime 3822 3823 if err := s.updateDeploymentWithAlloc(index, copyAlloc, exist, txn); err != nil { 3824 return fmt.Errorf("error updating deployment: %v", err) 3825 } 3826 3827 if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, txn); err != nil { 3828 return fmt.Errorf("error updating job summary: %v", err) 3829 } 3830 3831 if err := s.updateEntWithAlloc(index, copyAlloc, exist, txn); err != nil { 3832 return err 3833 } 3834 3835 if err := s.updatePluginForTerminalAlloc(index, copyAlloc, txn); err != nil { 3836 return err 3837 } 3838 3839 // Update the allocation 3840 if err := txn.Insert("allocs", copyAlloc); err != nil { 3841 return fmt.Errorf("alloc insert failed: %v", err) 3842 } 3843 3844 // Set the job's status 3845 forceStatus := "" 3846 if !copyAlloc.TerminalStatus() { 3847 forceStatus = structs.JobStatusRunning 3848 } 3849 3850 tuple := structs.NamespacedID{ 3851 ID: exist.JobID, 3852 Namespace: exist.Namespace, 3853 } 3854 jobs := map[structs.NamespacedID]string{tuple: forceStatus} 3855 3856 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 3857 return fmt.Errorf("setting job status failed: %v", err) 3858 } 3859 return nil 3860 } 3861 3862 func (s *StateStore) updateClientAllocUpdateIndex(txn *txn, index uint64, nodeID string) error { 3863 existing, err := txn.First("nodes", "id", nodeID) 3864 if err != nil { 3865 return fmt.Errorf("node lookup failed: %v", err) 3866 } 3867 if existing == nil { 3868 return nil 3869 } 3870 3871 node := existing.(*structs.Node) 3872 copyNode := node.Copy() 3873 copyNode.LastAllocUpdateIndex = index 3874 3875 if err := txn.Insert("nodes", copyNode); err != nil { 3876 return fmt.Errorf("node update failed: %v", err) 3877 } 3878 if err := txn.Insert("index", &IndexEntry{"nodes", txn.Index}); err != nil { 3879 return fmt.Errorf("index update failed: %v", err) 3880 } 3881 return nil 3882 } 3883 3884 // UpsertAllocs is used to evict a set of allocations and allocate new ones at 3885 // the same time. 3886 func (s *StateStore) UpsertAllocs(msgType structs.MessageType, index uint64, allocs []*structs.Allocation) error { 3887 txn := s.db.WriteTxn(index) 3888 defer txn.Abort() 3889 if err := s.upsertAllocsImpl(index, allocs, txn); err != nil { 3890 return err 3891 } 3892 return txn.Commit() 3893 } 3894 3895 // upsertAllocs is the actual implementation of UpsertAllocs so that it may be 3896 // used with an existing transaction. 3897 func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation, txn *txn) error { 3898 // Handle the allocations 3899 jobs := make(map[structs.NamespacedID]string, 1) 3900 for _, alloc := range allocs { 3901 existing, err := txn.First("allocs", "id", alloc.ID) 3902 if err != nil { 3903 return fmt.Errorf("alloc lookup failed: %v", err) 3904 } 3905 exist, _ := existing.(*structs.Allocation) 3906 3907 if exist == nil { 3908 alloc.CreateIndex = index 3909 alloc.ModifyIndex = index 3910 alloc.AllocModifyIndex = index 3911 if alloc.DeploymentStatus != nil { 3912 alloc.DeploymentStatus.ModifyIndex = index 3913 } 3914 3915 // Issue https://github.com/hernad/nomad/issues/2583 uncovered 3916 // the a race between a forced garbage collection and the scheduler 3917 // marking an allocation as terminal. The issue is that the 3918 // allocation from the scheduler has its job normalized and the FSM 3919 // will only denormalize if the allocation is not terminal. However 3920 // if the allocation is garbage collected, that will result in a 3921 // allocation being upserted for the first time without a job 3922 // attached. By returning an error here, it will cause the FSM to 3923 // error, causing the plan_apply to error and thus causing the 3924 // evaluation to be failed. This will force an index refresh that 3925 // should solve this issue. 3926 if alloc.Job == nil { 3927 return fmt.Errorf("attempting to upsert allocation %q without a job", alloc.ID) 3928 } 3929 } else { 3930 alloc.CreateIndex = exist.CreateIndex 3931 alloc.ModifyIndex = index 3932 alloc.AllocModifyIndex = index 3933 3934 // Keep the clients task states 3935 alloc.TaskStates = exist.TaskStates 3936 3937 // If the scheduler is marking this allocation as lost or unknown we do not 3938 // want to reuse the status of the existing allocation. 3939 if alloc.ClientStatus != structs.AllocClientStatusLost && 3940 alloc.ClientStatus != structs.AllocClientStatusUnknown { 3941 alloc.ClientStatus = exist.ClientStatus 3942 alloc.ClientDescription = exist.ClientDescription 3943 } 3944 3945 // The job has been denormalized so re-attach the original job 3946 if alloc.Job == nil { 3947 alloc.Job = exist.Job 3948 } 3949 } 3950 3951 // OPTIMIZATION: 3952 // These should be given a map of new to old allocation and the updates 3953 // should be one on all changes. The current implementation causes O(n) 3954 // lookups/copies/insertions rather than O(1) 3955 if err := s.updateDeploymentWithAlloc(index, alloc, exist, txn); err != nil { 3956 return fmt.Errorf("error updating deployment: %v", err) 3957 } 3958 3959 if err := s.updateSummaryWithAlloc(index, alloc, exist, txn); err != nil { 3960 return fmt.Errorf("error updating job summary: %v", err) 3961 } 3962 3963 if err := s.updateEntWithAlloc(index, alloc, exist, txn); err != nil { 3964 return err 3965 } 3966 3967 if err := s.updatePluginForTerminalAlloc(index, alloc, txn); err != nil { 3968 return err 3969 } 3970 3971 if err := txn.Insert("allocs", alloc); err != nil { 3972 return fmt.Errorf("alloc insert failed: %v", err) 3973 } 3974 3975 if alloc.PreviousAllocation != "" { 3976 prevAlloc, err := txn.First("allocs", "id", alloc.PreviousAllocation) 3977 if err != nil { 3978 return fmt.Errorf("alloc lookup failed: %v", err) 3979 } 3980 existingPrevAlloc, _ := prevAlloc.(*structs.Allocation) 3981 if existingPrevAlloc != nil { 3982 prevAllocCopy := existingPrevAlloc.Copy() 3983 prevAllocCopy.NextAllocation = alloc.ID 3984 prevAllocCopy.ModifyIndex = index 3985 if err := txn.Insert("allocs", prevAllocCopy); err != nil { 3986 return fmt.Errorf("alloc insert failed: %v", err) 3987 } 3988 } 3989 } 3990 3991 // If the allocation is running, force the job to running status. 3992 forceStatus := "" 3993 if !alloc.TerminalStatus() { 3994 forceStatus = structs.JobStatusRunning 3995 } 3996 3997 tuple := structs.NamespacedID{ 3998 ID: alloc.JobID, 3999 Namespace: alloc.Namespace, 4000 } 4001 jobs[tuple] = forceStatus 4002 } 4003 4004 // Update the indexes 4005 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 4006 return fmt.Errorf("index update failed: %v", err) 4007 } 4008 4009 // Set the job's status 4010 if err := s.setJobStatuses(index, txn, jobs, false); err != nil { 4011 return fmt.Errorf("setting job status failed: %v", err) 4012 } 4013 4014 return nil 4015 } 4016 4017 // UpdateAllocsDesiredTransitions is used to update a set of allocations 4018 // desired transitions. 4019 func (s *StateStore) UpdateAllocsDesiredTransitions(msgType structs.MessageType, index uint64, allocs map[string]*structs.DesiredTransition, 4020 evals []*structs.Evaluation) error { 4021 4022 txn := s.db.WriteTxnMsgT(msgType, index) 4023 defer txn.Abort() 4024 4025 // Handle each of the updated allocations 4026 for id, transition := range allocs { 4027 if err := s.UpdateAllocDesiredTransitionTxn(txn, index, id, transition); err != nil { 4028 return err 4029 } 4030 } 4031 4032 for _, eval := range evals { 4033 if err := s.nestedUpsertEval(txn, index, eval); err != nil { 4034 return err 4035 } 4036 } 4037 4038 // Update the indexes 4039 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 4040 return fmt.Errorf("index update failed: %v", err) 4041 } 4042 4043 return txn.Commit() 4044 } 4045 4046 // UpdateAllocDesiredTransitionTxn is used to nest an update of an 4047 // allocations desired transition 4048 func (s *StateStore) UpdateAllocDesiredTransitionTxn( 4049 txn *txn, index uint64, allocID string, 4050 transition *structs.DesiredTransition) error { 4051 4052 // Look for existing alloc 4053 existing, err := txn.First("allocs", "id", allocID) 4054 if err != nil { 4055 return fmt.Errorf("alloc lookup failed: %v", err) 4056 } 4057 4058 // Nothing to do if this does not exist 4059 if existing == nil { 4060 return nil 4061 } 4062 exist := existing.(*structs.Allocation) 4063 4064 // Copy everything from the existing allocation 4065 copyAlloc := exist.Copy() 4066 4067 // Merge the desired transitions 4068 copyAlloc.DesiredTransition.Merge(transition) 4069 4070 // Update the modify indexes 4071 copyAlloc.ModifyIndex = index 4072 copyAlloc.AllocModifyIndex = index 4073 4074 // Update the allocation 4075 if err := txn.Insert("allocs", copyAlloc); err != nil { 4076 return fmt.Errorf("alloc insert failed: %v", err) 4077 } 4078 4079 return nil 4080 } 4081 4082 // AllocByID is used to lookup an allocation by its ID 4083 func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) { 4084 txn := s.db.ReadTxn() 4085 return s.allocByIDImpl(txn, ws, id) 4086 } 4087 4088 // allocByIDImpl retrives an allocation and is called under and existing 4089 // transaction. An optional watch set can be passed to add allocations to the 4090 // watch set 4091 func (s *StateStore) allocByIDImpl(txn Txn, ws memdb.WatchSet, id string) (*structs.Allocation, error) { 4092 watchCh, raw, err := txn.FirstWatch("allocs", "id", id) 4093 if err != nil { 4094 return nil, fmt.Errorf("alloc lookup failed: %v", err) 4095 } 4096 4097 ws.Add(watchCh) 4098 4099 if raw == nil { 4100 return nil, nil 4101 } 4102 alloc := raw.(*structs.Allocation) 4103 return alloc, nil 4104 } 4105 4106 // AllocsByIDPrefix is used to lookup allocs by prefix 4107 func (s *StateStore) AllocsByIDPrefix(ws memdb.WatchSet, namespace, id string, sort SortOption) (memdb.ResultIterator, error) { 4108 txn := s.db.ReadTxn() 4109 4110 var iter memdb.ResultIterator 4111 var err error 4112 4113 switch sort { 4114 case SortReverse: 4115 iter, err = txn.GetReverse("allocs", "id_prefix", id) 4116 default: 4117 iter, err = txn.Get("allocs", "id_prefix", id) 4118 } 4119 if err != nil { 4120 return nil, fmt.Errorf("alloc lookup failed: %v", err) 4121 } 4122 4123 ws.Add(iter.WatchCh()) 4124 4125 // Wrap the iterator in a filter 4126 wrap := memdb.NewFilterIterator(iter, allocNamespaceFilter(namespace)) 4127 return wrap, nil 4128 } 4129 4130 // allocNamespaceFilter returns a filter function that filters all allocations 4131 // not in the given namespace. 4132 func allocNamespaceFilter(namespace string) func(interface{}) bool { 4133 return func(raw interface{}) bool { 4134 alloc, ok := raw.(*structs.Allocation) 4135 if !ok { 4136 return true 4137 } 4138 4139 if namespace == structs.AllNamespacesSentinel { 4140 return false 4141 } 4142 4143 return alloc.Namespace != namespace 4144 } 4145 } 4146 4147 // AllocsByIDPrefixAllNSs is used to lookup allocs by prefix. 4148 func (s *StateStore) AllocsByIDPrefixAllNSs(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 4149 txn := s.db.ReadTxn() 4150 4151 iter, err := txn.Get("allocs", "id_prefix", prefix) 4152 if err != nil { 4153 return nil, fmt.Errorf("alloc lookup failed: %v", err) 4154 } 4155 4156 ws.Add(iter.WatchCh()) 4157 4158 return iter, nil 4159 } 4160 4161 // AllocsByNode returns all the allocations by node 4162 func (s *StateStore) AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 4163 txn := s.db.ReadTxn() 4164 4165 return allocsByNodeTxn(txn, ws, node) 4166 } 4167 4168 func allocsByNodeTxn(txn ReadTxn, ws memdb.WatchSet, node string) ([]*structs.Allocation, error) { 4169 // Get an iterator over the node allocations, using only the 4170 // node prefix which ignores the terminal status 4171 iter, err := txn.Get("allocs", "node_prefix", node) 4172 if err != nil { 4173 return nil, err 4174 } 4175 4176 ws.Add(iter.WatchCh()) 4177 4178 var out []*structs.Allocation 4179 for { 4180 raw := iter.Next() 4181 if raw == nil { 4182 break 4183 } 4184 out = append(out, raw.(*structs.Allocation)) 4185 } 4186 return out, nil 4187 } 4188 4189 // AllocsByNodeTerminal returns all the allocations by node and terminal 4190 // status. 4191 func (s *StateStore) AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error) { 4192 txn := s.db.ReadTxn() 4193 4194 // Get an iterator over the node allocations 4195 iter, err := txn.Get("allocs", "node", node, terminal) 4196 if err != nil { 4197 return nil, err 4198 } 4199 4200 ws.Add(iter.WatchCh()) 4201 4202 var out []*structs.Allocation 4203 for { 4204 raw := iter.Next() 4205 if raw == nil { 4206 break 4207 } 4208 out = append(out, raw.(*structs.Allocation)) 4209 } 4210 return out, nil 4211 } 4212 4213 // AllocsByJob returns allocations by job id 4214 func (s *StateStore) AllocsByJob(ws memdb.WatchSet, namespace, jobID string, anyCreateIndex bool) ([]*structs.Allocation, error) { 4215 txn := s.db.ReadTxn() 4216 4217 // Get the job 4218 var job *structs.Job 4219 rawJob, err := txn.First("jobs", "id", namespace, jobID) 4220 if err != nil { 4221 return nil, err 4222 } 4223 if rawJob != nil { 4224 job = rawJob.(*structs.Job) 4225 } 4226 4227 // Get an iterator over the node allocations 4228 iter, err := txn.Get("allocs", "job", namespace, jobID) 4229 if err != nil { 4230 return nil, err 4231 } 4232 4233 ws.Add(iter.WatchCh()) 4234 4235 var out []*structs.Allocation 4236 for { 4237 raw := iter.Next() 4238 if raw == nil { 4239 break 4240 } 4241 4242 alloc := raw.(*structs.Allocation) 4243 // If the allocation belongs to a job with the same ID but a different 4244 // create index and we are not getting all the allocations whose Jobs 4245 // matches the same Job ID then we skip it 4246 if !anyCreateIndex && job != nil && alloc.Job.CreateIndex != job.CreateIndex { 4247 continue 4248 } 4249 out = append(out, raw.(*structs.Allocation)) 4250 } 4251 return out, nil 4252 } 4253 4254 // AllocsByEval returns all the allocations by eval id 4255 func (s *StateStore) AllocsByEval(ws memdb.WatchSet, evalID string) ([]*structs.Allocation, error) { 4256 txn := s.db.ReadTxn() 4257 4258 // Get an iterator over the eval allocations 4259 iter, err := txn.Get("allocs", "eval", evalID) 4260 if err != nil { 4261 return nil, err 4262 } 4263 4264 ws.Add(iter.WatchCh()) 4265 4266 var out []*structs.Allocation 4267 for { 4268 raw := iter.Next() 4269 if raw == nil { 4270 break 4271 } 4272 out = append(out, raw.(*structs.Allocation)) 4273 } 4274 return out, nil 4275 } 4276 4277 // AllocsByDeployment returns all the allocations by deployment id 4278 func (s *StateStore) AllocsByDeployment(ws memdb.WatchSet, deploymentID string) ([]*structs.Allocation, error) { 4279 txn := s.db.ReadTxn() 4280 4281 // Get an iterator over the deployments allocations 4282 iter, err := txn.Get("allocs", "deployment", deploymentID) 4283 if err != nil { 4284 return nil, err 4285 } 4286 4287 ws.Add(iter.WatchCh()) 4288 4289 var out []*structs.Allocation 4290 for { 4291 raw := iter.Next() 4292 if raw == nil { 4293 break 4294 } 4295 out = append(out, raw.(*structs.Allocation)) 4296 } 4297 return out, nil 4298 } 4299 4300 // Allocs returns an iterator over all the evaluations. 4301 func (s *StateStore) Allocs(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { 4302 txn := s.db.ReadTxn() 4303 4304 var it memdb.ResultIterator 4305 var err error 4306 4307 switch sort { 4308 case SortReverse: 4309 it, err = txn.GetReverse("allocs", "create") 4310 default: 4311 it, err = txn.Get("allocs", "create") 4312 } 4313 4314 if err != nil { 4315 return nil, err 4316 } 4317 4318 ws.Add(it.WatchCh()) 4319 4320 return it, nil 4321 } 4322 4323 func (s *StateStore) AllocsByNamespaceOrdered(ws memdb.WatchSet, namespace string, sort SortOption) (memdb.ResultIterator, error) { 4324 txn := s.db.ReadTxn() 4325 4326 var ( 4327 it memdb.ResultIterator 4328 err error 4329 exact = terminate(namespace) 4330 ) 4331 4332 switch sort { 4333 case SortReverse: 4334 it, err = txn.GetReverse("allocs", "namespace_create_prefix", exact) 4335 default: 4336 it, err = txn.Get("allocs", "namespace_create_prefix", exact) 4337 } 4338 4339 if err != nil { 4340 return nil, err 4341 } 4342 4343 ws.Add(it.WatchCh()) 4344 4345 return it, nil 4346 } 4347 4348 // AllocsByNamespace returns an iterator over all the allocations in the 4349 // namespace 4350 func (s *StateStore) AllocsByNamespace(ws memdb.WatchSet, namespace string) (memdb.ResultIterator, error) { 4351 txn := s.db.ReadTxn() 4352 return s.allocsByNamespaceImpl(ws, txn, namespace) 4353 } 4354 4355 // allocsByNamespaceImpl returns an iterator over all the allocations in the 4356 // namespace 4357 func (s *StateStore) allocsByNamespaceImpl(ws memdb.WatchSet, txn *txn, namespace string) (memdb.ResultIterator, error) { 4358 // Walk the entire table 4359 iter, err := txn.Get("allocs", "namespace", namespace) 4360 if err != nil { 4361 return nil, err 4362 } 4363 4364 ws.Add(iter.WatchCh()) 4365 4366 return iter, nil 4367 } 4368 4369 // UpsertVaultAccessor is used to register a set of Vault Accessors. 4370 func (s *StateStore) UpsertVaultAccessor(index uint64, accessors []*structs.VaultAccessor) error { 4371 txn := s.db.WriteTxn(index) 4372 defer txn.Abort() 4373 4374 for _, accessor := range accessors { 4375 // Set the create index 4376 accessor.CreateIndex = index 4377 4378 // Insert the accessor 4379 if err := txn.Insert("vault_accessors", accessor); err != nil { 4380 return fmt.Errorf("accessor insert failed: %v", err) 4381 } 4382 } 4383 4384 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 4385 return fmt.Errorf("index update failed: %v", err) 4386 } 4387 4388 return txn.Commit() 4389 } 4390 4391 // DeleteVaultAccessors is used to delete a set of Vault Accessors 4392 func (s *StateStore) DeleteVaultAccessors(index uint64, accessors []*structs.VaultAccessor) error { 4393 txn := s.db.WriteTxn(index) 4394 defer txn.Abort() 4395 4396 // Lookup the accessor 4397 for _, accessor := range accessors { 4398 // Delete the accessor 4399 if err := txn.Delete("vault_accessors", accessor); err != nil { 4400 return fmt.Errorf("accessor delete failed: %v", err) 4401 } 4402 } 4403 4404 if err := txn.Insert("index", &IndexEntry{"vault_accessors", index}); err != nil { 4405 return fmt.Errorf("index update failed: %v", err) 4406 } 4407 4408 return txn.Commit() 4409 } 4410 4411 // VaultAccessor returns the given Vault accessor 4412 func (s *StateStore) VaultAccessor(ws memdb.WatchSet, accessor string) (*structs.VaultAccessor, error) { 4413 txn := s.db.ReadTxn() 4414 4415 watchCh, existing, err := txn.FirstWatch("vault_accessors", "id", accessor) 4416 if err != nil { 4417 return nil, fmt.Errorf("accessor lookup failed: %v", err) 4418 } 4419 4420 ws.Add(watchCh) 4421 4422 if existing != nil { 4423 return existing.(*structs.VaultAccessor), nil 4424 } 4425 4426 return nil, nil 4427 } 4428 4429 // VaultAccessors returns an iterator of Vault accessors. 4430 func (s *StateStore) VaultAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 4431 txn := s.db.ReadTxn() 4432 4433 iter, err := txn.Get("vault_accessors", "id") 4434 if err != nil { 4435 return nil, err 4436 } 4437 4438 ws.Add(iter.WatchCh()) 4439 4440 return iter, nil 4441 } 4442 4443 // VaultAccessorsByAlloc returns all the Vault accessors by alloc id 4444 func (s *StateStore) VaultAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.VaultAccessor, error) { 4445 txn := s.db.ReadTxn() 4446 4447 // Get an iterator over the accessors 4448 iter, err := txn.Get("vault_accessors", "alloc_id", allocID) 4449 if err != nil { 4450 return nil, err 4451 } 4452 4453 ws.Add(iter.WatchCh()) 4454 4455 var out []*structs.VaultAccessor 4456 for { 4457 raw := iter.Next() 4458 if raw == nil { 4459 break 4460 } 4461 out = append(out, raw.(*structs.VaultAccessor)) 4462 } 4463 return out, nil 4464 } 4465 4466 // VaultAccessorsByNode returns all the Vault accessors by node id 4467 func (s *StateStore) VaultAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.VaultAccessor, error) { 4468 txn := s.db.ReadTxn() 4469 4470 // Get an iterator over the accessors 4471 iter, err := txn.Get("vault_accessors", "node_id", nodeID) 4472 if err != nil { 4473 return nil, err 4474 } 4475 4476 ws.Add(iter.WatchCh()) 4477 4478 var out []*structs.VaultAccessor 4479 for { 4480 raw := iter.Next() 4481 if raw == nil { 4482 break 4483 } 4484 out = append(out, raw.(*structs.VaultAccessor)) 4485 } 4486 return out, nil 4487 } 4488 4489 func indexEntry(table string, index uint64) *IndexEntry { 4490 return &IndexEntry{ 4491 Key: table, 4492 Value: index, 4493 } 4494 } 4495 4496 const siTokenAccessorTable = "si_token_accessors" 4497 4498 // UpsertSITokenAccessors is used to register a set of Service Identity token accessors. 4499 func (s *StateStore) UpsertSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error { 4500 txn := s.db.WriteTxn(index) 4501 defer txn.Abort() 4502 4503 for _, accessor := range accessors { 4504 // set the create index 4505 accessor.CreateIndex = index 4506 4507 // insert the accessor 4508 if err := txn.Insert(siTokenAccessorTable, accessor); err != nil { 4509 return fmt.Errorf("accessor insert failed: %w", err) 4510 } 4511 } 4512 4513 // update the index for this table 4514 if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil { 4515 return fmt.Errorf("index update failed: %w", err) 4516 } 4517 4518 return txn.Commit() 4519 } 4520 4521 // DeleteSITokenAccessors is used to delete a set of Service Identity token accessors. 4522 func (s *StateStore) DeleteSITokenAccessors(index uint64, accessors []*structs.SITokenAccessor) error { 4523 txn := s.db.WriteTxn(index) 4524 defer txn.Abort() 4525 4526 // Lookup each accessor 4527 for _, accessor := range accessors { 4528 // Delete the accessor 4529 if err := txn.Delete(siTokenAccessorTable, accessor); err != nil { 4530 return fmt.Errorf("accessor delete failed: %w", err) 4531 } 4532 } 4533 4534 // update the index for this table 4535 if err := txn.Insert("index", indexEntry(siTokenAccessorTable, index)); err != nil { 4536 return fmt.Errorf("index update failed: %w", err) 4537 } 4538 4539 return txn.Commit() 4540 } 4541 4542 // SITokenAccessor returns the given Service Identity token accessor. 4543 func (s *StateStore) SITokenAccessor(ws memdb.WatchSet, accessorID string) (*structs.SITokenAccessor, error) { 4544 txn := s.db.ReadTxn() 4545 defer txn.Abort() 4546 4547 watchCh, existing, err := txn.FirstWatch(siTokenAccessorTable, "id", accessorID) 4548 if err != nil { 4549 return nil, fmt.Errorf("accessor lookup failed: %w", err) 4550 } 4551 4552 ws.Add(watchCh) 4553 4554 if existing != nil { 4555 return existing.(*structs.SITokenAccessor), nil 4556 } 4557 4558 return nil, nil 4559 } 4560 4561 // SITokenAccessors returns an iterator of Service Identity token accessors. 4562 func (s *StateStore) SITokenAccessors(ws memdb.WatchSet) (memdb.ResultIterator, error) { 4563 txn := s.db.ReadTxn() 4564 defer txn.Abort() 4565 4566 iter, err := txn.Get(siTokenAccessorTable, "id") 4567 if err != nil { 4568 return nil, err 4569 } 4570 4571 ws.Add(iter.WatchCh()) 4572 4573 return iter, nil 4574 } 4575 4576 // SITokenAccessorsByAlloc returns all the Service Identity token accessors by alloc ID. 4577 func (s *StateStore) SITokenAccessorsByAlloc(ws memdb.WatchSet, allocID string) ([]*structs.SITokenAccessor, error) { 4578 txn := s.db.ReadTxn() 4579 defer txn.Abort() 4580 4581 // Get an iterator over the accessors 4582 iter, err := txn.Get(siTokenAccessorTable, "alloc_id", allocID) 4583 if err != nil { 4584 return nil, err 4585 } 4586 4587 ws.Add(iter.WatchCh()) 4588 4589 var result []*structs.SITokenAccessor 4590 for raw := iter.Next(); raw != nil; raw = iter.Next() { 4591 result = append(result, raw.(*structs.SITokenAccessor)) 4592 } 4593 4594 return result, nil 4595 } 4596 4597 // SITokenAccessorsByNode returns all the Service Identity token accessors by node ID. 4598 func (s *StateStore) SITokenAccessorsByNode(ws memdb.WatchSet, nodeID string) ([]*structs.SITokenAccessor, error) { 4599 txn := s.db.ReadTxn() 4600 defer txn.Abort() 4601 4602 // Get an iterator over the accessors 4603 iter, err := txn.Get(siTokenAccessorTable, "node_id", nodeID) 4604 if err != nil { 4605 return nil, err 4606 } 4607 4608 ws.Add(iter.WatchCh()) 4609 4610 var result []*structs.SITokenAccessor 4611 for raw := iter.Next(); raw != nil; raw = iter.Next() { 4612 result = append(result, raw.(*structs.SITokenAccessor)) 4613 } 4614 4615 return result, nil 4616 } 4617 4618 // UpdateDeploymentStatus is used to make deployment status updates and 4619 // potentially make a evaluation 4620 func (s *StateStore) UpdateDeploymentStatus(msgType structs.MessageType, index uint64, req *structs.DeploymentStatusUpdateRequest) error { 4621 txn := s.db.WriteTxnMsgT(msgType, index) 4622 defer txn.Abort() 4623 4624 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 4625 return err 4626 } 4627 4628 // Upsert the job if necessary 4629 if req.Job != nil { 4630 if err := s.upsertJobImpl(index, nil, req.Job, false, txn); err != nil { 4631 return err 4632 } 4633 } 4634 4635 // Upsert the optional eval 4636 if req.Eval != nil { 4637 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 4638 return err 4639 } 4640 } 4641 4642 return txn.Commit() 4643 } 4644 4645 // updateDeploymentStatusImpl is used to make deployment status updates 4646 func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.DeploymentStatusUpdate, txn *txn) error { 4647 // Retrieve deployment 4648 ws := memdb.NewWatchSet() 4649 deployment, err := s.deploymentByIDImpl(ws, u.DeploymentID, txn) 4650 if err != nil { 4651 return err 4652 } else if deployment == nil { 4653 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", u.DeploymentID) 4654 } else if !deployment.Active() { 4655 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 4656 } 4657 4658 // Apply the new status 4659 copy := deployment.Copy() 4660 copy.Status = u.Status 4661 copy.StatusDescription = u.StatusDescription 4662 copy.ModifyIndex = index 4663 4664 // Insert the deployment 4665 if err := txn.Insert("deployment", copy); err != nil { 4666 return err 4667 } 4668 4669 // Update the index 4670 if err := txn.Insert("index", &IndexEntry{"deployment", index}); err != nil { 4671 return fmt.Errorf("index update failed: %v", err) 4672 } 4673 4674 // If the deployment is being marked as complete, set the job to stable. 4675 if copy.Status == structs.DeploymentStatusSuccessful { 4676 if err := s.updateJobStabilityImpl(index, copy.Namespace, copy.JobID, copy.JobVersion, true, txn); err != nil { 4677 return fmt.Errorf("failed to update job stability: %v", err) 4678 } 4679 } 4680 4681 return nil 4682 } 4683 4684 // UpdateJobStability updates the stability of the given job and version to the 4685 // desired status. 4686 func (s *StateStore) UpdateJobStability(index uint64, namespace, jobID string, jobVersion uint64, stable bool) error { 4687 txn := s.db.WriteTxn(index) 4688 defer txn.Abort() 4689 4690 if err := s.updateJobStabilityImpl(index, namespace, jobID, jobVersion, stable, txn); err != nil { 4691 return err 4692 } 4693 4694 return txn.Commit() 4695 } 4696 4697 // updateJobStabilityImpl updates the stability of the given job and version 4698 func (s *StateStore) updateJobStabilityImpl(index uint64, namespace, jobID string, jobVersion uint64, stable bool, txn *txn) error { 4699 // Get the job that is referenced 4700 job, err := s.jobByIDAndVersionImpl(nil, namespace, jobID, jobVersion, txn) 4701 if err != nil { 4702 return err 4703 } 4704 4705 // Has already been cleared, nothing to do 4706 if job == nil { 4707 return nil 4708 } 4709 4710 // If the job already has the desired stability, nothing to do 4711 if job.Stable == stable { 4712 return nil 4713 } 4714 4715 copy := job.Copy() 4716 copy.Stable = stable 4717 return s.upsertJobImpl(index, nil, copy, true, txn) 4718 } 4719 4720 // UpdateDeploymentPromotion is used to promote canaries in a deployment and 4721 // potentially make a evaluation 4722 func (s *StateStore) UpdateDeploymentPromotion(msgType structs.MessageType, index uint64, req *structs.ApplyDeploymentPromoteRequest) error { 4723 txn := s.db.WriteTxnMsgT(msgType, index) 4724 defer txn.Abort() 4725 4726 // Retrieve deployment and ensure it is not terminal and is active 4727 ws := memdb.NewWatchSet() 4728 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 4729 if err != nil { 4730 return err 4731 } else if deployment == nil { 4732 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 4733 } else if !deployment.Active() { 4734 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 4735 } 4736 4737 // Retrieve effected allocations 4738 iter, err := txn.Get("allocs", "deployment", req.DeploymentID) 4739 if err != nil { 4740 return err 4741 } 4742 4743 // groupIndex is a map of groups being promoted 4744 groupIndex := make(map[string]struct{}, len(req.Groups)) 4745 for _, g := range req.Groups { 4746 groupIndex[g] = struct{}{} 4747 } 4748 4749 // canaryIndex is the set of placed canaries in the deployment 4750 canaryIndex := make(map[string]struct{}, len(deployment.TaskGroups)) 4751 for _, dstate := range deployment.TaskGroups { 4752 for _, c := range dstate.PlacedCanaries { 4753 canaryIndex[c] = struct{}{} 4754 } 4755 } 4756 4757 // healthyCounts is a mapping of group to the number of healthy canaries 4758 healthyCounts := make(map[string]int, len(deployment.TaskGroups)) 4759 4760 // promotable is the set of allocations that we can move from canary to 4761 // non-canary 4762 var promotable []*structs.Allocation 4763 4764 for { 4765 raw := iter.Next() 4766 if raw == nil { 4767 break 4768 } 4769 4770 alloc := raw.(*structs.Allocation) 4771 4772 // Check that the alloc is a canary 4773 if _, ok := canaryIndex[alloc.ID]; !ok { 4774 continue 4775 } 4776 4777 // Check that the canary is part of a group being promoted 4778 if _, ok := groupIndex[alloc.TaskGroup]; !req.All && !ok { 4779 continue 4780 } 4781 4782 // Ensure the canaries are healthy 4783 if alloc.TerminalStatus() || !alloc.DeploymentStatus.IsHealthy() { 4784 continue 4785 } 4786 4787 healthyCounts[alloc.TaskGroup]++ 4788 promotable = append(promotable, alloc) 4789 } 4790 4791 // Determine if we have enough healthy allocations 4792 var unhealthyErr multierror.Error 4793 for tg, dstate := range deployment.TaskGroups { 4794 if _, ok := groupIndex[tg]; !req.All && !ok { 4795 continue 4796 } 4797 4798 need := dstate.DesiredCanaries 4799 if need == 0 { 4800 continue 4801 } 4802 4803 if have := healthyCounts[tg]; have < need { 4804 multierror.Append(&unhealthyErr, fmt.Errorf("Task group %q has %d/%d healthy allocations", tg, have, need)) 4805 } 4806 } 4807 4808 if err := unhealthyErr.ErrorOrNil(); err != nil { 4809 return err 4810 } 4811 4812 // Update deployment 4813 copy := deployment.Copy() 4814 copy.ModifyIndex = index 4815 for tg, status := range copy.TaskGroups { 4816 _, ok := groupIndex[tg] 4817 if !req.All && !ok { 4818 continue 4819 } 4820 4821 // reset the progress deadline 4822 if status.ProgressDeadline > 0 && !status.RequireProgressBy.IsZero() { 4823 status.RequireProgressBy = time.Now().Add(status.ProgressDeadline) 4824 } 4825 status.Promoted = true 4826 } 4827 4828 // If the deployment no longer needs promotion, update its status 4829 if !copy.RequiresPromotion() && copy.Status == structs.DeploymentStatusRunning { 4830 copy.StatusDescription = structs.DeploymentStatusDescriptionRunning 4831 } 4832 4833 // Insert the deployment 4834 if err := s.upsertDeploymentImpl(index, copy, txn); err != nil { 4835 return err 4836 } 4837 4838 // Upsert the optional eval 4839 if req.Eval != nil { 4840 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 4841 return err 4842 } 4843 } 4844 4845 // For each promotable allocation remove the canary field 4846 for _, alloc := range promotable { 4847 promoted := alloc.Copy() 4848 promoted.DeploymentStatus.Canary = false 4849 promoted.DeploymentStatus.ModifyIndex = index 4850 promoted.ModifyIndex = index 4851 promoted.AllocModifyIndex = index 4852 4853 if err := txn.Insert("allocs", promoted); err != nil { 4854 return fmt.Errorf("alloc insert failed: %v", err) 4855 } 4856 } 4857 4858 // Update the alloc index 4859 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 4860 return fmt.Errorf("index update failed: %v", err) 4861 } 4862 4863 return txn.Commit() 4864 } 4865 4866 // UpdateDeploymentAllocHealth is used to update the health of allocations as 4867 // part of the deployment and potentially make a evaluation 4868 func (s *StateStore) UpdateDeploymentAllocHealth(msgType structs.MessageType, index uint64, req *structs.ApplyDeploymentAllocHealthRequest) error { 4869 txn := s.db.WriteTxnMsgT(msgType, index) 4870 defer txn.Abort() 4871 4872 // Retrieve deployment and ensure it is not terminal and is active 4873 ws := memdb.NewWatchSet() 4874 deployment, err := s.deploymentByIDImpl(ws, req.DeploymentID, txn) 4875 if err != nil { 4876 return err 4877 } else if deployment == nil { 4878 return fmt.Errorf("Deployment ID %q couldn't be updated as it does not exist", req.DeploymentID) 4879 } else if !deployment.Active() { 4880 return fmt.Errorf("Deployment %q has terminal status %q:", deployment.ID, deployment.Status) 4881 } 4882 4883 // Update the health status of each allocation 4884 if total := len(req.HealthyAllocationIDs) + len(req.UnhealthyAllocationIDs); total != 0 { 4885 setAllocHealth := func(id string, healthy bool, ts time.Time) error { 4886 existing, err := txn.First("allocs", "id", id) 4887 if err != nil { 4888 return fmt.Errorf("alloc %q lookup failed: %v", id, err) 4889 } 4890 if existing == nil { 4891 return fmt.Errorf("unknown alloc %q", id) 4892 } 4893 4894 old := existing.(*structs.Allocation) 4895 if old.DeploymentID != req.DeploymentID { 4896 return fmt.Errorf("alloc %q is not part of deployment %q", id, req.DeploymentID) 4897 } 4898 4899 // Set the health 4900 copy := old.Copy() 4901 if copy.DeploymentStatus == nil { 4902 copy.DeploymentStatus = &structs.AllocDeploymentStatus{} 4903 } 4904 copy.DeploymentStatus.Healthy = pointer.Of(healthy) 4905 copy.DeploymentStatus.Timestamp = ts 4906 copy.DeploymentStatus.ModifyIndex = index 4907 copy.ModifyIndex = index 4908 4909 if err := s.updateDeploymentWithAlloc(index, copy, old, txn); err != nil { 4910 return fmt.Errorf("error updating deployment: %v", err) 4911 } 4912 4913 if err := txn.Insert("allocs", copy); err != nil { 4914 return fmt.Errorf("alloc insert failed: %v", err) 4915 } 4916 4917 return nil 4918 } 4919 4920 for _, id := range req.HealthyAllocationIDs { 4921 if err := setAllocHealth(id, true, req.Timestamp); err != nil { 4922 return err 4923 } 4924 } 4925 for _, id := range req.UnhealthyAllocationIDs { 4926 if err := setAllocHealth(id, false, req.Timestamp); err != nil { 4927 return err 4928 } 4929 } 4930 4931 // Update the indexes 4932 if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { 4933 return fmt.Errorf("index update failed: %v", err) 4934 } 4935 } 4936 4937 // Update the deployment status as needed. 4938 if req.DeploymentUpdate != nil { 4939 if err := s.updateDeploymentStatusImpl(index, req.DeploymentUpdate, txn); err != nil { 4940 return err 4941 } 4942 } 4943 4944 // Upsert the job if necessary 4945 if req.Job != nil { 4946 if err := s.upsertJobImpl(index, nil, req.Job, false, txn); err != nil { 4947 return err 4948 } 4949 } 4950 4951 // Upsert the optional eval 4952 if req.Eval != nil { 4953 if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil { 4954 return err 4955 } 4956 } 4957 4958 return txn.Commit() 4959 } 4960 4961 // LatestIndex returns the greatest index value for all indexes. 4962 func (s *StateStore) LatestIndex() (uint64, error) { 4963 indexes, err := s.Indexes() 4964 if err != nil { 4965 return 0, err 4966 } 4967 4968 var max uint64 = 0 4969 for { 4970 raw := indexes.Next() 4971 if raw == nil { 4972 break 4973 } 4974 4975 // Prepare the request struct 4976 idx := raw.(*IndexEntry) 4977 4978 // Determine the max 4979 if idx.Value > max { 4980 max = idx.Value 4981 } 4982 } 4983 4984 return max, nil 4985 } 4986 4987 // Index finds the matching index value 4988 func (s *StateStore) Index(name string) (uint64, error) { 4989 txn := s.db.ReadTxn() 4990 4991 // Lookup the first matching index 4992 out, err := txn.First("index", "id", name) 4993 if err != nil { 4994 return 0, err 4995 } 4996 if out == nil { 4997 return 0, nil 4998 } 4999 return out.(*IndexEntry).Value, nil 5000 } 5001 5002 // Indexes returns an iterator over all the indexes 5003 func (s *StateStore) Indexes() (memdb.ResultIterator, error) { 5004 txn := s.db.ReadTxn() 5005 5006 // Walk the entire nodes table 5007 iter, err := txn.Get("index", "id") 5008 if err != nil { 5009 return nil, err 5010 } 5011 return iter, nil 5012 } 5013 5014 // ReconcileJobSummaries re-creates summaries for all jobs present in the state 5015 // store 5016 func (s *StateStore) ReconcileJobSummaries(index uint64) error { 5017 txn := s.db.WriteTxn(index) 5018 defer txn.Abort() 5019 5020 // Get all the jobs 5021 iter, err := txn.Get("jobs", "id") 5022 if err != nil { 5023 return err 5024 } 5025 // COMPAT: Remove after 0.11 5026 // Iterate over jobs to build a list of parent jobs and their children 5027 parentMap := make(map[string][]*structs.Job) 5028 for { 5029 rawJob := iter.Next() 5030 if rawJob == nil { 5031 break 5032 } 5033 job := rawJob.(*structs.Job) 5034 if job.ParentID != "" { 5035 children := parentMap[job.ParentID] 5036 children = append(children, job) 5037 parentMap[job.ParentID] = children 5038 } 5039 } 5040 5041 // Get all the jobs again 5042 iter, err = txn.Get("jobs", "id") 5043 if err != nil { 5044 return err 5045 } 5046 5047 for { 5048 rawJob := iter.Next() 5049 if rawJob == nil { 5050 break 5051 } 5052 job := rawJob.(*structs.Job) 5053 5054 if job.IsParameterized() || job.IsPeriodic() { 5055 // COMPAT: Remove after 0.11 5056 5057 // The following block of code fixes incorrect child summaries due to a bug 5058 // See https://github.com/hernad/nomad/issues/3886 for details 5059 rawSummary, err := txn.First("job_summary", "id", job.Namespace, job.ID) 5060 if err != nil { 5061 return err 5062 } 5063 if rawSummary == nil { 5064 continue 5065 } 5066 5067 oldSummary := rawSummary.(*structs.JobSummary) 5068 5069 // Create an empty summary 5070 summary := &structs.JobSummary{ 5071 JobID: job.ID, 5072 Namespace: job.Namespace, 5073 Summary: make(map[string]structs.TaskGroupSummary), 5074 Children: &structs.JobChildrenSummary{}, 5075 } 5076 5077 // Iterate over children of this job if any to fix summary counts 5078 children := parentMap[job.ID] 5079 for _, childJob := range children { 5080 switch childJob.Status { 5081 case structs.JobStatusPending: 5082 summary.Children.Pending++ 5083 case structs.JobStatusDead: 5084 summary.Children.Dead++ 5085 case structs.JobStatusRunning: 5086 summary.Children.Running++ 5087 } 5088 } 5089 5090 // Insert the job summary if its different 5091 if !reflect.DeepEqual(summary, oldSummary) { 5092 // Set the create index of the summary same as the job's create index 5093 // and the modify index to the current index 5094 summary.CreateIndex = job.CreateIndex 5095 summary.ModifyIndex = index 5096 5097 if err := txn.Insert("job_summary", summary); err != nil { 5098 return fmt.Errorf("error inserting job summary: %v", err) 5099 } 5100 } 5101 5102 // Done with handling a parent job, continue to next 5103 continue 5104 } 5105 5106 // Create a job summary for the job 5107 summary := &structs.JobSummary{ 5108 JobID: job.ID, 5109 Namespace: job.Namespace, 5110 Summary: make(map[string]structs.TaskGroupSummary), 5111 } 5112 for _, tg := range job.TaskGroups { 5113 summary.Summary[tg.Name] = structs.TaskGroupSummary{} 5114 } 5115 5116 // Find all the allocations for the jobs 5117 iterAllocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 5118 if err != nil { 5119 return err 5120 } 5121 5122 // Calculate the summary for the job 5123 for { 5124 rawAlloc := iterAllocs.Next() 5125 if rawAlloc == nil { 5126 break 5127 } 5128 alloc := rawAlloc.(*structs.Allocation) 5129 5130 // Ignore the allocation if it doesn't belong to the currently 5131 // registered job. The allocation is checked because of issue #2304 5132 if alloc.Job == nil || alloc.Job.CreateIndex != job.CreateIndex { 5133 continue 5134 } 5135 5136 tg := summary.Summary[alloc.TaskGroup] 5137 switch alloc.ClientStatus { 5138 case structs.AllocClientStatusFailed: 5139 tg.Failed += 1 5140 case structs.AllocClientStatusLost: 5141 tg.Lost += 1 5142 case structs.AllocClientStatusUnknown: 5143 tg.Unknown += 1 5144 case structs.AllocClientStatusComplete: 5145 tg.Complete += 1 5146 case structs.AllocClientStatusRunning: 5147 tg.Running += 1 5148 case structs.AllocClientStatusPending: 5149 tg.Starting += 1 5150 default: 5151 s.logger.Error("invalid client status set on allocation", "client_status", alloc.ClientStatus, "alloc_id", alloc.ID) 5152 } 5153 summary.Summary[alloc.TaskGroup] = tg 5154 } 5155 5156 // Set the create index of the summary same as the job's create index 5157 // and the modify index to the current index 5158 summary.CreateIndex = job.CreateIndex 5159 summary.ModifyIndex = index 5160 5161 // Insert the job summary 5162 if err := txn.Insert("job_summary", summary); err != nil { 5163 return fmt.Errorf("error inserting job summary: %v", err) 5164 } 5165 } 5166 5167 // Update the indexes table for job summary 5168 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 5169 return fmt.Errorf("index update failed: %v", err) 5170 } 5171 return txn.Commit() 5172 } 5173 5174 // setJobStatuses is a helper for calling setJobStatus on multiple jobs by ID. 5175 // It takes a map of job IDs to an optional forceStatus string. It returns an 5176 // error if the job doesn't exist or setJobStatus fails. 5177 func (s *StateStore) setJobStatuses(index uint64, txn *txn, 5178 jobs map[structs.NamespacedID]string, evalDelete bool) error { 5179 for tuple, forceStatus := range jobs { 5180 5181 existing, err := txn.First("jobs", "id", tuple.Namespace, tuple.ID) 5182 if err != nil { 5183 return fmt.Errorf("job lookup failed: %v", err) 5184 } 5185 5186 if existing == nil { 5187 continue 5188 } 5189 5190 if err := s.setJobStatus(index, txn, existing.(*structs.Job), evalDelete, forceStatus); err != nil { 5191 return err 5192 } 5193 5194 } 5195 5196 return nil 5197 } 5198 5199 // setJobStatus sets the status of the job by looking up associated evaluations 5200 // and allocations. evalDelete should be set to true if setJobStatus is being 5201 // called because an evaluation is being deleted (potentially because of garbage 5202 // collection). If forceStatus is non-empty, the job's status will be set to the 5203 // passed status. 5204 func (s *StateStore) setJobStatus(index uint64, txn *txn, 5205 job *structs.Job, evalDelete bool, forceStatus string) error { 5206 5207 // Capture the current status so we can check if there is a change 5208 oldStatus := job.Status 5209 newStatus := forceStatus 5210 5211 // If forceStatus is not set, compute the jobs status. 5212 if forceStatus == "" { 5213 var err error 5214 newStatus, err = s.getJobStatus(txn, job, evalDelete) 5215 if err != nil { 5216 return err 5217 } 5218 } 5219 5220 // Fast-path if the job has not changed. 5221 if oldStatus == newStatus { 5222 return nil 5223 } 5224 5225 // Copy and update the existing job 5226 updated := job.Copy() 5227 updated.Status = newStatus 5228 updated.ModifyIndex = index 5229 5230 // Insert the job 5231 if err := txn.Insert("jobs", updated); err != nil { 5232 return fmt.Errorf("job insert failed: %v", err) 5233 } 5234 if err := txn.Insert("index", &IndexEntry{"jobs", index}); err != nil { 5235 return fmt.Errorf("index update failed: %v", err) 5236 } 5237 5238 // Update the children summary 5239 if err := s.setJobSummary(txn, updated, index, oldStatus, newStatus); err != nil { 5240 return fmt.Errorf("job summary update failed %w", err) 5241 } 5242 return nil 5243 } 5244 5245 func (s *StateStore) setJobSummary(txn *txn, updated *structs.Job, index uint64, oldStatus, newStatus string) error { 5246 if updated.ParentID == "" { 5247 return nil 5248 } 5249 5250 // Try to update the summary of the parent job summary 5251 summaryRaw, err := txn.First("job_summary", "id", updated.Namespace, updated.ParentID) 5252 if err != nil { 5253 return fmt.Errorf("unable to retrieve summary for parent job: %v", err) 5254 } 5255 5256 // Only continue if the summary exists. It could not exist if the parent 5257 // job was removed 5258 if summaryRaw != nil { 5259 existing := summaryRaw.(*structs.JobSummary) 5260 pSummary := existing.Copy() 5261 if pSummary.Children == nil { 5262 pSummary.Children = new(structs.JobChildrenSummary) 5263 } 5264 5265 // Determine the transition and update the correct fields 5266 children := pSummary.Children 5267 5268 // Decrement old status 5269 if oldStatus != "" { 5270 switch oldStatus { 5271 case structs.JobStatusPending: 5272 children.Pending-- 5273 case structs.JobStatusRunning: 5274 children.Running-- 5275 case structs.JobStatusDead: 5276 children.Dead-- 5277 default: 5278 return fmt.Errorf("unknown old job status %q", oldStatus) 5279 } 5280 } 5281 5282 // Increment new status 5283 switch newStatus { 5284 case structs.JobStatusPending: 5285 children.Pending++ 5286 case structs.JobStatusRunning: 5287 children.Running++ 5288 case structs.JobStatusDead: 5289 children.Dead++ 5290 default: 5291 return fmt.Errorf("unknown new job status %q", newStatus) 5292 } 5293 5294 // Update the index 5295 pSummary.ModifyIndex = index 5296 5297 // Insert the summary 5298 if err := txn.Insert("job_summary", pSummary); err != nil { 5299 return fmt.Errorf("job summary insert failed: %v", err) 5300 } 5301 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 5302 return fmt.Errorf("index update failed: %v", err) 5303 } 5304 } 5305 return nil 5306 } 5307 5308 func (s *StateStore) getJobStatus(txn *txn, job *structs.Job, evalDelete bool) (string, error) { 5309 // System, Periodic and Parameterized jobs are running until explicitly 5310 // stopped. 5311 if job.Type == structs.JobTypeSystem || 5312 job.IsParameterized() || 5313 job.IsPeriodic() { 5314 if job.Stop { 5315 return structs.JobStatusDead, nil 5316 } 5317 return structs.JobStatusRunning, nil 5318 } 5319 5320 allocs, err := txn.Get("allocs", "job", job.Namespace, job.ID) 5321 if err != nil { 5322 return "", err 5323 } 5324 5325 // If there is a non-terminal allocation, the job is running. 5326 hasAlloc := false 5327 for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() { 5328 hasAlloc = true 5329 if !alloc.(*structs.Allocation).TerminalStatus() { 5330 return structs.JobStatusRunning, nil 5331 } 5332 } 5333 5334 evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID) 5335 if err != nil { 5336 return "", err 5337 } 5338 5339 hasEval := false 5340 for raw := evals.Next(); raw != nil; raw = evals.Next() { 5341 e := raw.(*structs.Evaluation) 5342 5343 // Filter non-exact matches 5344 if e.JobID != job.ID { 5345 continue 5346 } 5347 5348 hasEval = true 5349 if !e.TerminalStatus() { 5350 return structs.JobStatusPending, nil 5351 } 5352 } 5353 5354 // The job is dead if all the allocations and evals are terminal or if there 5355 // are no evals because of garbage collection. 5356 if evalDelete || hasEval || hasAlloc { 5357 return structs.JobStatusDead, nil 5358 } 5359 5360 return structs.JobStatusPending, nil 5361 } 5362 5363 // updateSummaryWithJob creates or updates job summaries when new jobs are 5364 // upserted or existing ones are updated 5365 func (s *StateStore) updateSummaryWithJob(index uint64, job *structs.Job, 5366 txn *txn) error { 5367 5368 // Update the job summary 5369 summaryRaw, err := txn.First("job_summary", "id", job.Namespace, job.ID) 5370 if err != nil { 5371 return fmt.Errorf("job summary lookup failed: %v", err) 5372 } 5373 5374 // Get the summary or create if necessary 5375 var summary *structs.JobSummary 5376 hasSummaryChanged := false 5377 if summaryRaw != nil { 5378 summary = summaryRaw.(*structs.JobSummary).Copy() 5379 } else { 5380 summary = &structs.JobSummary{ 5381 JobID: job.ID, 5382 Namespace: job.Namespace, 5383 Summary: make(map[string]structs.TaskGroupSummary), 5384 Children: new(structs.JobChildrenSummary), 5385 CreateIndex: index, 5386 } 5387 hasSummaryChanged = true 5388 } 5389 5390 for _, tg := range job.TaskGroups { 5391 if _, ok := summary.Summary[tg.Name]; !ok { 5392 newSummary := structs.TaskGroupSummary{ 5393 Complete: 0, 5394 Failed: 0, 5395 Running: 0, 5396 Starting: 0, 5397 } 5398 summary.Summary[tg.Name] = newSummary 5399 hasSummaryChanged = true 5400 } 5401 } 5402 5403 // The job summary has changed, so update the modify index. 5404 if hasSummaryChanged { 5405 summary.ModifyIndex = index 5406 5407 // Update the indexes table for job summary 5408 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 5409 return fmt.Errorf("index update failed: %v", err) 5410 } 5411 if err := txn.Insert("job_summary", summary); err != nil { 5412 return err 5413 } 5414 } 5415 5416 return nil 5417 } 5418 5419 // updateJobScalingPolicies upserts any scaling policies contained in the job and removes 5420 // any previous scaling policies that were removed from the job 5421 func (s *StateStore) updateJobScalingPolicies(index uint64, job *structs.Job, txn *txn) error { 5422 5423 ws := memdb.NewWatchSet() 5424 5425 scalingPolicies := job.GetScalingPolicies() 5426 newTargets := map[string]bool{} 5427 for _, p := range scalingPolicies { 5428 newTargets[p.JobKey()] = true 5429 } 5430 // find existing policies that need to be deleted 5431 deletedPolicies := []string{} 5432 iter, err := s.ScalingPoliciesByJobTxn(ws, job.Namespace, job.ID, txn) 5433 if err != nil { 5434 return fmt.Errorf("ScalingPoliciesByJob lookup failed: %v", err) 5435 } 5436 for raw := iter.Next(); raw != nil; raw = iter.Next() { 5437 oldPolicy := raw.(*structs.ScalingPolicy) 5438 if !newTargets[oldPolicy.JobKey()] { 5439 deletedPolicies = append(deletedPolicies, oldPolicy.ID) 5440 } 5441 } 5442 err = s.DeleteScalingPoliciesTxn(index, deletedPolicies, txn) 5443 if err != nil { 5444 return fmt.Errorf("DeleteScalingPolicies of removed policies failed: %v", err) 5445 } 5446 5447 err = s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn) 5448 if err != nil { 5449 return fmt.Errorf("UpsertScalingPolicies of policies failed: %v", err) 5450 } 5451 5452 return nil 5453 } 5454 5455 // updateJobSubmission stores the original job source and variables associated that the 5456 // job structure originates from. It is up to the job submitter to include the source 5457 // material, and as such sub may be nil, in which case nothing is stored. 5458 func (s *StateStore) updateJobSubmission(index uint64, sub *structs.JobSubmission, namespace, jobID string, version uint64, txn *txn) error { 5459 // critical that we operate on a copy; the original must not be modified 5460 // e.g. in the case of job gc and its last second version bump 5461 sub = sub.Copy() 5462 5463 switch { 5464 case sub == nil: 5465 return nil 5466 case namespace == "": 5467 return errors.New("job_submission requires a namespace") 5468 case jobID == "": 5469 return errors.New("job_submission requires a jobID") 5470 default: 5471 sub.Namespace = namespace 5472 sub.JobID = jobID 5473 sub.JobModifyIndex = index 5474 sub.Version = version 5475 } 5476 5477 // check if we already have a submission for this (namespace, jobID, version) 5478 obj, err := txn.First("job_submission", "id", namespace, jobID, version) 5479 if err != nil { 5480 return err 5481 } 5482 if obj != nil { 5483 // if we already have a submission for this (namespace, jobID, version) 5484 // then there is nothing to do; manually avoid potential for duplicates 5485 return nil 5486 } 5487 5488 // insert the job submission for this (namespace, jobID, version) 5489 if err := txn.Insert("job_submission", sub); err != nil { 5490 return err 5491 } 5492 5493 // prune old job submissions 5494 return s.pruneJobSubmissions(namespace, jobID, txn) 5495 } 5496 5497 func (s *StateStore) pruneJobSubmissions(namespace, jobID string, txn *txn) error { 5498 // although the number of tracked submissions is the same as the number of 5499 // tracked job versions, do not assume a 1:1 correlation, as there could be 5500 // holes in the submissions (or none at all) 5501 limit := structs.JobTrackedVersions 5502 5503 // iterate through all stored submissions 5504 iter, err := txn.Get("job_submission", "id_prefix", namespace, jobID) 5505 if err != nil { 5506 return err 5507 } 5508 5509 stored := make([]lang.Pair[uint64, uint64], 0, limit+1) 5510 for next := iter.Next(); next != nil; next = iter.Next() { 5511 sub := next.(*structs.JobSubmission) 5512 // scanning by prefix; make sure we collect exact matches only 5513 if sub.Namespace == namespace && sub.JobID == jobID { 5514 stored = append(stored, lang.Pair[uint64, uint64]{First: sub.JobModifyIndex, Second: sub.Version}) 5515 } 5516 } 5517 5518 // if we are still below the limit, nothing to do 5519 if len(stored) <= limit { 5520 return nil 5521 } 5522 5523 // sort by job modify index descending so we can just keep the first N 5524 slices.SortFunc(stored, func(a, b lang.Pair[uint64, uint64]) bool { 5525 return a.First > b.First 5526 }) 5527 5528 // remove the outdated submission versions 5529 for _, sub := range stored[limit:] { 5530 if err = txn.Delete("job_submission", &structs.JobSubmission{ 5531 Namespace: namespace, 5532 JobID: jobID, 5533 Version: sub.Second, 5534 }); err != nil { 5535 return err 5536 } 5537 } 5538 return nil 5539 } 5540 5541 // updateJobCSIPlugins runs on job update, and indexes the job in the plugin 5542 func (s *StateStore) updateJobCSIPlugins(index uint64, job, prev *structs.Job, txn *txn) error { 5543 plugIns := make(map[string]*structs.CSIPlugin) 5544 5545 upsertFn := func(job *structs.Job, delete bool) error { 5546 for _, tg := range job.TaskGroups { 5547 for _, t := range tg.Tasks { 5548 if t.CSIPluginConfig == nil { 5549 continue 5550 } 5551 5552 plugIn, ok := plugIns[t.CSIPluginConfig.ID] 5553 if !ok { 5554 p, err := s.CSIPluginByIDTxn(txn, nil, t.CSIPluginConfig.ID) 5555 if err != nil { 5556 return err 5557 } 5558 if p == nil { 5559 plugIn = structs.NewCSIPlugin(t.CSIPluginConfig.ID, index) 5560 } else { 5561 plugIn = p.Copy() 5562 plugIn.ModifyIndex = index 5563 } 5564 plugIns[plugIn.ID] = plugIn 5565 } 5566 5567 if delete { 5568 plugIn.DeleteJob(job, nil) 5569 } else { 5570 plugIn.AddJob(job, nil) 5571 } 5572 } 5573 } 5574 5575 return nil 5576 } 5577 5578 if prev != nil { 5579 err := upsertFn(prev, true) 5580 if err != nil { 5581 return err 5582 } 5583 } 5584 5585 err := upsertFn(job, false) 5586 if err != nil { 5587 return err 5588 } 5589 5590 for _, plugIn := range plugIns { 5591 err = txn.Insert("csi_plugins", plugIn) 5592 if err != nil { 5593 return fmt.Errorf("csi_plugins insert error: %v", err) 5594 } 5595 } 5596 5597 if err := txn.Insert("index", &IndexEntry{"csi_plugins", index}); err != nil { 5598 return fmt.Errorf("index update failed: %v", err) 5599 } 5600 5601 return nil 5602 } 5603 5604 // updateDeploymentWithAlloc is used to update the deployment state associated 5605 // with the given allocation. The passed alloc may be updated if the deployment 5606 // status has changed to capture the modify index at which it has changed. 5607 func (s *StateStore) updateDeploymentWithAlloc(index uint64, alloc, existing *structs.Allocation, txn *txn) error { 5608 // Nothing to do if the allocation is not associated with a deployment 5609 if alloc.DeploymentID == "" { 5610 return nil 5611 } 5612 5613 // Get the deployment 5614 ws := memdb.NewWatchSet() 5615 deployment, err := s.deploymentByIDImpl(ws, alloc.DeploymentID, txn) 5616 if err != nil { 5617 return err 5618 } 5619 if deployment == nil { 5620 return nil 5621 } 5622 5623 // Retrieve the deployment state object 5624 _, ok := deployment.TaskGroups[alloc.TaskGroup] 5625 if !ok { 5626 // If the task group isn't part of the deployment, the task group wasn't 5627 // part of a rolling update so nothing to do 5628 return nil 5629 } 5630 5631 // Do not modify in-place. Instead keep track of what must be done 5632 placed := 0 5633 healthy := 0 5634 unhealthy := 0 5635 5636 // If there was no existing allocation, this is a placement and we increment 5637 // the placement 5638 existingHealthSet := existing != nil && existing.DeploymentStatus.HasHealth() 5639 allocHealthSet := alloc.DeploymentStatus.HasHealth() 5640 if existing == nil || existing.DeploymentID != alloc.DeploymentID { 5641 placed++ 5642 } else if !existingHealthSet && allocHealthSet { 5643 if *alloc.DeploymentStatus.Healthy { 5644 healthy++ 5645 } else { 5646 unhealthy++ 5647 } 5648 } else if existingHealthSet && allocHealthSet { 5649 // See if it has gone from healthy to unhealthy 5650 if *existing.DeploymentStatus.Healthy && !*alloc.DeploymentStatus.Healthy { 5651 healthy-- 5652 unhealthy++ 5653 } 5654 } 5655 5656 // Nothing to do 5657 if placed == 0 && healthy == 0 && unhealthy == 0 { 5658 return nil 5659 } 5660 5661 // Update the allocation's deployment status modify index 5662 if alloc.DeploymentStatus != nil && healthy+unhealthy != 0 { 5663 alloc.DeploymentStatus.ModifyIndex = index 5664 } 5665 5666 // Create a copy of the deployment object 5667 deploymentCopy := deployment.Copy() 5668 deploymentCopy.ModifyIndex = index 5669 5670 dstate := deploymentCopy.TaskGroups[alloc.TaskGroup] 5671 dstate.PlacedAllocs += placed 5672 dstate.HealthyAllocs += healthy 5673 dstate.UnhealthyAllocs += unhealthy 5674 5675 // Ensure PlacedCanaries accurately reflects the alloc canary status 5676 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary { 5677 found := false 5678 for _, canary := range dstate.PlacedCanaries { 5679 if alloc.ID == canary { 5680 found = true 5681 break 5682 } 5683 } 5684 if !found { 5685 dstate.PlacedCanaries = append(dstate.PlacedCanaries, alloc.ID) 5686 } 5687 } 5688 5689 // Update the progress deadline 5690 if pd := dstate.ProgressDeadline; pd != 0 { 5691 // If we are the first placed allocation for the deployment start the progress deadline. 5692 if placed != 0 && dstate.RequireProgressBy.IsZero() { 5693 // Use modify time instead of create time because we may in-place 5694 // update the allocation to be part of a new deployment. 5695 dstate.RequireProgressBy = time.Unix(0, alloc.ModifyTime).Add(pd) 5696 } else if healthy != 0 { 5697 if d := alloc.DeploymentStatus.Timestamp.Add(pd); d.After(dstate.RequireProgressBy) { 5698 dstate.RequireProgressBy = d 5699 } 5700 } 5701 } 5702 5703 // Upsert the deployment 5704 if err := s.upsertDeploymentImpl(index, deploymentCopy, txn); err != nil { 5705 return err 5706 } 5707 5708 return nil 5709 } 5710 5711 // updateSummaryWithAlloc updates the job summary when allocations are updated 5712 // or inserted 5713 func (s *StateStore) updateSummaryWithAlloc(index uint64, alloc *structs.Allocation, 5714 existingAlloc *structs.Allocation, txn *txn) error { 5715 5716 // We don't have to update the summary if the job is missing 5717 if alloc.Job == nil { 5718 return nil 5719 } 5720 5721 summaryRaw, err := txn.First("job_summary", "id", alloc.Namespace, alloc.JobID) 5722 if err != nil { 5723 return fmt.Errorf("unable to lookup job summary for job id %q in namespace %q: %v", alloc.JobID, alloc.Namespace, err) 5724 } 5725 5726 if summaryRaw == nil { 5727 // Check if the job is de-registered 5728 rawJob, err := txn.First("jobs", "id", alloc.Namespace, alloc.JobID) 5729 if err != nil { 5730 return fmt.Errorf("unable to query job: %v", err) 5731 } 5732 5733 // If the job is de-registered then we skip updating it's summary 5734 if rawJob == nil { 5735 return nil 5736 } 5737 5738 return fmt.Errorf("job summary for job %q in namespace %q is not present", alloc.JobID, alloc.Namespace) 5739 } 5740 5741 // Get a copy of the existing summary 5742 jobSummary := summaryRaw.(*structs.JobSummary).Copy() 5743 5744 // Not updating the job summary because the allocation doesn't belong to the 5745 // currently registered job 5746 if jobSummary.CreateIndex != alloc.Job.CreateIndex { 5747 return nil 5748 } 5749 5750 tgSummary, ok := jobSummary.Summary[alloc.TaskGroup] 5751 if !ok { 5752 return fmt.Errorf("unable to find task group in the job summary: %v", alloc.TaskGroup) 5753 } 5754 5755 summaryChanged := false 5756 if existingAlloc == nil { 5757 switch alloc.DesiredStatus { 5758 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 5759 s.logger.Error("new allocation inserted into state store with bad desired status", 5760 "alloc_id", alloc.ID, "desired_status", alloc.DesiredStatus) 5761 } 5762 switch alloc.ClientStatus { 5763 case structs.AllocClientStatusPending: 5764 tgSummary.Starting += 1 5765 if tgSummary.Queued > 0 { 5766 tgSummary.Queued -= 1 5767 } 5768 summaryChanged = true 5769 case structs.AllocClientStatusRunning, structs.AllocClientStatusFailed, 5770 structs.AllocClientStatusComplete: 5771 s.logger.Error("new allocation inserted into state store with bad client status", 5772 "alloc_id", alloc.ID, "client_status", alloc.ClientStatus) 5773 } 5774 } else if existingAlloc.ClientStatus != alloc.ClientStatus { 5775 // Incrementing the client of the bin of the current state 5776 switch alloc.ClientStatus { 5777 case structs.AllocClientStatusRunning: 5778 tgSummary.Running += 1 5779 case structs.AllocClientStatusFailed: 5780 tgSummary.Failed += 1 5781 case structs.AllocClientStatusPending: 5782 tgSummary.Starting += 1 5783 case structs.AllocClientStatusComplete: 5784 tgSummary.Complete += 1 5785 case structs.AllocClientStatusLost: 5786 tgSummary.Lost += 1 5787 case structs.AllocClientStatusUnknown: 5788 tgSummary.Unknown += 1 5789 } 5790 5791 // Decrementing the count of the bin of the last state 5792 switch existingAlloc.ClientStatus { 5793 case structs.AllocClientStatusRunning: 5794 if tgSummary.Running > 0 { 5795 tgSummary.Running -= 1 5796 } 5797 case structs.AllocClientStatusPending: 5798 if tgSummary.Starting > 0 { 5799 tgSummary.Starting -= 1 5800 } 5801 case structs.AllocClientStatusLost: 5802 if tgSummary.Lost > 0 { 5803 tgSummary.Lost -= 1 5804 } 5805 case structs.AllocClientStatusUnknown: 5806 if tgSummary.Unknown > 0 { 5807 tgSummary.Unknown -= 1 5808 } 5809 case structs.AllocClientStatusFailed, structs.AllocClientStatusComplete: 5810 default: 5811 s.logger.Error("invalid old client status for allocation", 5812 "alloc_id", existingAlloc.ID, "client_status", existingAlloc.ClientStatus) 5813 } 5814 summaryChanged = true 5815 } 5816 jobSummary.Summary[alloc.TaskGroup] = tgSummary 5817 5818 if summaryChanged { 5819 jobSummary.ModifyIndex = index 5820 5821 s.updatePluginWithJobSummary(index, jobSummary, alloc, txn) 5822 5823 // Update the indexes table for job summary 5824 if err := txn.Insert("index", &IndexEntry{"job_summary", index}); err != nil { 5825 return fmt.Errorf("index update failed: %v", err) 5826 } 5827 5828 if err := txn.Insert("job_summary", jobSummary); err != nil { 5829 return fmt.Errorf("updating job summary failed: %v", err) 5830 } 5831 } 5832 5833 return nil 5834 } 5835 5836 // updatePluginForTerminalAlloc updates the CSI plugins for an alloc when the 5837 // allocation is updated or inserted with a terminal server status. 5838 func (s *StateStore) updatePluginForTerminalAlloc(index uint64, alloc *structs.Allocation, 5839 txn *txn) error { 5840 5841 if !alloc.ServerTerminalStatus() { 5842 return nil 5843 } 5844 5845 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 5846 for _, t := range tg.Tasks { 5847 if t.CSIPluginConfig != nil { 5848 pluginID := t.CSIPluginConfig.ID 5849 plug, err := s.CSIPluginByIDTxn(txn, nil, pluginID) 5850 if err != nil { 5851 return err 5852 } 5853 if plug == nil { 5854 // plugin may not have been created because it never 5855 // became healthy, just move on 5856 return nil 5857 } 5858 plug = plug.Copy() 5859 err = plug.DeleteAlloc(alloc.ID, alloc.NodeID) 5860 if err != nil { 5861 return err 5862 } 5863 err = updateOrGCPlugin(index, txn, plug) 5864 if err != nil { 5865 return err 5866 } 5867 } 5868 } 5869 5870 return nil 5871 } 5872 5873 // updatePluginWithJobSummary updates the CSI plugins for a job when the 5874 // job summary is updated by an alloc 5875 func (s *StateStore) updatePluginWithJobSummary(index uint64, summary *structs.JobSummary, alloc *structs.Allocation, 5876 txn *txn) error { 5877 5878 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 5879 if tg == nil { 5880 return nil 5881 } 5882 5883 for _, t := range tg.Tasks { 5884 if t.CSIPluginConfig != nil { 5885 pluginID := t.CSIPluginConfig.ID 5886 plug, err := s.CSIPluginByIDTxn(txn, nil, pluginID) 5887 if err != nil { 5888 return err 5889 } 5890 if plug == nil { 5891 plug = structs.NewCSIPlugin(pluginID, index) 5892 } else { 5893 plug = plug.Copy() 5894 } 5895 5896 plug.UpdateExpectedWithJob(alloc.Job, summary, 5897 alloc.Job.Status == structs.JobStatusDead) 5898 5899 err = updateOrGCPlugin(index, txn, plug) 5900 if err != nil { 5901 return err 5902 } 5903 } 5904 } 5905 5906 return nil 5907 } 5908 5909 // UpsertACLPolicies is used to create or update a set of ACL policies 5910 func (s *StateStore) UpsertACLPolicies(msgType structs.MessageType, index uint64, policies []*structs.ACLPolicy) error { 5911 txn := s.db.WriteTxnMsgT(msgType, index) 5912 defer txn.Abort() 5913 5914 for _, policy := range policies { 5915 // Ensure the policy hash is non-nil. This should be done outside the state store 5916 // for performance reasons, but we check here for defense in depth. 5917 if len(policy.Hash) == 0 { 5918 policy.SetHash() 5919 } 5920 5921 // Check if the policy already exists 5922 existing, err := txn.First("acl_policy", "id", policy.Name) 5923 if err != nil { 5924 return fmt.Errorf("policy lookup failed: %v", err) 5925 } 5926 5927 // Update all the indexes 5928 if existing != nil { 5929 policy.CreateIndex = existing.(*structs.ACLPolicy).CreateIndex 5930 policy.ModifyIndex = index 5931 } else { 5932 policy.CreateIndex = index 5933 policy.ModifyIndex = index 5934 } 5935 5936 // Update the policy 5937 if err := txn.Insert("acl_policy", policy); err != nil { 5938 return fmt.Errorf("upserting policy failed: %v", err) 5939 } 5940 } 5941 5942 // Update the indexes tabl 5943 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 5944 return fmt.Errorf("index update failed: %v", err) 5945 } 5946 5947 return txn.Commit() 5948 } 5949 5950 // DeleteACLPolicies deletes the policies with the given names 5951 func (s *StateStore) DeleteACLPolicies(msgType structs.MessageType, index uint64, names []string) error { 5952 txn := s.db.WriteTxnMsgT(msgType, index) 5953 defer txn.Abort() 5954 5955 // Delete the policy 5956 for _, name := range names { 5957 if _, err := txn.DeleteAll("acl_policy", "id", name); err != nil { 5958 return fmt.Errorf("deleting acl policy failed: %v", err) 5959 } 5960 } 5961 if err := txn.Insert("index", &IndexEntry{"acl_policy", index}); err != nil { 5962 return fmt.Errorf("index update failed: %v", err) 5963 } 5964 return txn.Commit() 5965 } 5966 5967 // ACLPolicyByName is used to lookup a policy by name 5968 func (s *StateStore) ACLPolicyByName(ws memdb.WatchSet, name string) (*structs.ACLPolicy, error) { 5969 txn := s.db.ReadTxn() 5970 5971 watchCh, existing, err := txn.FirstWatch("acl_policy", "id", name) 5972 if err != nil { 5973 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 5974 } 5975 ws.Add(watchCh) 5976 5977 if existing != nil { 5978 return existing.(*structs.ACLPolicy), nil 5979 } 5980 return nil, nil 5981 } 5982 5983 // ACLPolicyByNamePrefix is used to lookup policies by prefix 5984 func (s *StateStore) ACLPolicyByNamePrefix(ws memdb.WatchSet, prefix string) (memdb.ResultIterator, error) { 5985 txn := s.db.ReadTxn() 5986 5987 iter, err := txn.Get("acl_policy", "id_prefix", prefix) 5988 if err != nil { 5989 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 5990 } 5991 ws.Add(iter.WatchCh()) 5992 5993 return iter, nil 5994 } 5995 5996 // ACLPolicyByJob is used to lookup policies that have been attached to a 5997 // specific job 5998 func (s *StateStore) ACLPolicyByJob(ws memdb.WatchSet, ns, jobID string) (memdb.ResultIterator, error) { 5999 txn := s.db.ReadTxn() 6000 6001 iter, err := txn.Get("acl_policy", "job_prefix", ns, jobID) 6002 if err != nil { 6003 return nil, fmt.Errorf("acl policy lookup failed: %v", err) 6004 } 6005 ws.Add(iter.WatchCh()) 6006 6007 return iter, nil 6008 } 6009 6010 // ACLPolicies returns an iterator over all the acl policies 6011 func (s *StateStore) ACLPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 6012 txn := s.db.ReadTxn() 6013 6014 // Walk the entire table 6015 iter, err := txn.Get("acl_policy", "id") 6016 if err != nil { 6017 return nil, err 6018 } 6019 ws.Add(iter.WatchCh()) 6020 return iter, nil 6021 } 6022 6023 // UpsertACLTokens is used to create or update a set of ACL tokens 6024 func (s *StateStore) UpsertACLTokens(msgType structs.MessageType, index uint64, tokens []*structs.ACLToken) error { 6025 txn := s.db.WriteTxnMsgT(msgType, index) 6026 defer txn.Abort() 6027 6028 for _, token := range tokens { 6029 // Ensure the policy hash is non-nil. This should be done outside the state store 6030 // for performance reasons, but we check here for defense in depth. 6031 if len(token.Hash) == 0 { 6032 token.SetHash() 6033 } 6034 6035 // Check if the token already exists 6036 existing, err := txn.First("acl_token", "id", token.AccessorID) 6037 if err != nil { 6038 return fmt.Errorf("token lookup failed: %v", err) 6039 } 6040 6041 // Update all the indexes 6042 if existing != nil { 6043 existTK := existing.(*structs.ACLToken) 6044 token.CreateIndex = existTK.CreateIndex 6045 token.ModifyIndex = index 6046 6047 // Do not allow SecretID or create time to change 6048 token.SecretID = existTK.SecretID 6049 token.CreateTime = existTK.CreateTime 6050 6051 } else { 6052 token.CreateIndex = index 6053 token.ModifyIndex = index 6054 } 6055 6056 // Update the token 6057 if err := txn.Insert("acl_token", token); err != nil { 6058 return fmt.Errorf("upserting token failed: %v", err) 6059 } 6060 } 6061 6062 // Update the indexes table 6063 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 6064 return fmt.Errorf("index update failed: %v", err) 6065 } 6066 return txn.Commit() 6067 } 6068 6069 // DeleteACLTokens deletes the tokens with the given accessor ids 6070 func (s *StateStore) DeleteACLTokens(msgType structs.MessageType, index uint64, ids []string) error { 6071 txn := s.db.WriteTxnMsgT(msgType, index) 6072 defer txn.Abort() 6073 6074 // Delete the tokens 6075 for _, id := range ids { 6076 if _, err := txn.DeleteAll("acl_token", "id", id); err != nil { 6077 return fmt.Errorf("deleting acl token failed: %v", err) 6078 } 6079 } 6080 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 6081 return fmt.Errorf("index update failed: %v", err) 6082 } 6083 return txn.Commit() 6084 } 6085 6086 // ACLTokenByAccessorID is used to lookup a token by accessor ID 6087 func (s *StateStore) ACLTokenByAccessorID(ws memdb.WatchSet, id string) (*structs.ACLToken, error) { 6088 if id == "" { 6089 return nil, fmt.Errorf("acl token lookup failed: missing accessor id") 6090 } 6091 6092 txn := s.db.ReadTxn() 6093 6094 watchCh, existing, err := txn.FirstWatch("acl_token", "id", id) 6095 if err != nil { 6096 return nil, fmt.Errorf("acl token lookup failed: %v", err) 6097 } 6098 ws.Add(watchCh) 6099 6100 // If the existing token is nil, this indicates it does not exist in state. 6101 if existing == nil { 6102 return nil, nil 6103 } 6104 6105 // Assert the token type which allows us to perform additional work on the 6106 // token that is needed before returning the call. 6107 token := existing.(*structs.ACLToken) 6108 6109 // Handle potential staleness of ACL role links. 6110 if token, err = s.fixTokenRoleLinks(txn, token); err != nil { 6111 return nil, err 6112 } 6113 return token, nil 6114 } 6115 6116 // ACLTokenBySecretID is used to lookup a token by secret ID 6117 func (s *StateStore) ACLTokenBySecretID(ws memdb.WatchSet, secretID string) (*structs.ACLToken, error) { 6118 if secretID == "" { 6119 return nil, fmt.Errorf("acl token lookup failed: missing secret id") 6120 } 6121 6122 txn := s.db.ReadTxn() 6123 6124 watchCh, existing, err := txn.FirstWatch("acl_token", "secret", secretID) 6125 if err != nil { 6126 return nil, fmt.Errorf("acl token lookup failed: %v", err) 6127 } 6128 ws.Add(watchCh) 6129 6130 // If the existing token is nil, this indicates it does not exist in state. 6131 if existing == nil { 6132 return nil, nil 6133 } 6134 6135 // Assert the token type which allows us to perform additional work on the 6136 // token that is needed before returning the call. 6137 token := existing.(*structs.ACLToken) 6138 6139 // Handle potential staleness of ACL role links. 6140 if token, err = s.fixTokenRoleLinks(txn, token); err != nil { 6141 return nil, err 6142 } 6143 return token, nil 6144 } 6145 6146 // ACLTokenByAccessorIDPrefix is used to lookup tokens by prefix 6147 func (s *StateStore) ACLTokenByAccessorIDPrefix(ws memdb.WatchSet, prefix string, sort SortOption) (memdb.ResultIterator, error) { 6148 txn := s.db.ReadTxn() 6149 6150 var iter memdb.ResultIterator 6151 var err error 6152 6153 switch sort { 6154 case SortReverse: 6155 iter, err = txn.GetReverse("acl_token", "id_prefix", prefix) 6156 default: 6157 iter, err = txn.Get("acl_token", "id_prefix", prefix) 6158 } 6159 if err != nil { 6160 return nil, fmt.Errorf("acl token lookup failed: %v", err) 6161 } 6162 6163 ws.Add(iter.WatchCh()) 6164 return iter, nil 6165 } 6166 6167 // ACLTokens returns an iterator over all the tokens 6168 func (s *StateStore) ACLTokens(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { 6169 txn := s.db.ReadTxn() 6170 6171 var iter memdb.ResultIterator 6172 var err error 6173 6174 switch sort { 6175 case SortReverse: 6176 iter, err = txn.GetReverse("acl_token", "create") 6177 default: 6178 iter, err = txn.Get("acl_token", "create") 6179 } 6180 if err != nil { 6181 return nil, err 6182 } 6183 6184 ws.Add(iter.WatchCh()) 6185 return iter, nil 6186 } 6187 6188 // ACLTokensByGlobal returns an iterator over all the tokens filtered by global value 6189 func (s *StateStore) ACLTokensByGlobal(ws memdb.WatchSet, globalVal bool, sort SortOption) (memdb.ResultIterator, error) { 6190 txn := s.db.ReadTxn() 6191 6192 var iter memdb.ResultIterator 6193 var err error 6194 6195 // Walk the entire table 6196 switch sort { 6197 case SortReverse: 6198 iter, err = txn.GetReverse("acl_token", "global", globalVal) 6199 default: 6200 iter, err = txn.Get("acl_token", "global", globalVal) 6201 } 6202 if err != nil { 6203 return nil, err 6204 } 6205 6206 ws.Add(iter.WatchCh()) 6207 return iter, nil 6208 } 6209 6210 // CanBootstrapACLToken checks if bootstrapping is possible and returns the reset index 6211 func (s *StateStore) CanBootstrapACLToken() (bool, uint64, error) { 6212 txn := s.db.ReadTxn() 6213 6214 // Lookup the bootstrap sentinel 6215 out, err := txn.First("index", "id", "acl_token_bootstrap") 6216 if err != nil { 6217 return false, 0, err 6218 } 6219 6220 // No entry, we haven't bootstrapped yet 6221 if out == nil { 6222 return true, 0, nil 6223 } 6224 6225 // Return the reset index if we've already bootstrapped 6226 return false, out.(*IndexEntry).Value, nil 6227 } 6228 6229 // BootstrapACLTokens is used to create an initial ACL token. 6230 func (s *StateStore) BootstrapACLTokens(msgType structs.MessageType, index uint64, resetIndex uint64, token *structs.ACLToken) error { 6231 txn := s.db.WriteTxnMsgT(msgType, index) 6232 defer txn.Abort() 6233 6234 // Check if we have already done a bootstrap 6235 existing, err := txn.First("index", "id", "acl_token_bootstrap") 6236 if err != nil { 6237 return fmt.Errorf("bootstrap check failed: %v", err) 6238 } 6239 if existing != nil { 6240 if resetIndex == 0 { 6241 return fmt.Errorf("ACL bootstrap already done") 6242 } else if resetIndex != existing.(*IndexEntry).Value { 6243 return fmt.Errorf("Invalid reset index for ACL bootstrap") 6244 } 6245 } 6246 6247 // Update the Create/Modify time 6248 token.CreateIndex = index 6249 token.ModifyIndex = index 6250 6251 // Insert the token 6252 if err := txn.Insert("acl_token", token); err != nil { 6253 return fmt.Errorf("upserting token failed: %v", err) 6254 } 6255 6256 // Update the indexes table, prevents future bootstrap until reset 6257 if err := txn.Insert("index", &IndexEntry{"acl_token", index}); err != nil { 6258 return fmt.Errorf("index update failed: %v", err) 6259 } 6260 if err := txn.Insert("index", &IndexEntry{"acl_token_bootstrap", index}); err != nil { 6261 return fmt.Errorf("index update failed: %v", err) 6262 } 6263 return txn.Commit() 6264 } 6265 6266 // UpsertOneTimeToken is used to create or update a set of ACL 6267 // tokens. Validating that we're not upserting an already-expired token is 6268 // made the responsibility of the caller to facilitate testing. 6269 func (s *StateStore) UpsertOneTimeToken(msgType structs.MessageType, index uint64, token *structs.OneTimeToken) error { 6270 txn := s.db.WriteTxnMsgT(msgType, index) 6271 defer txn.Abort() 6272 6273 // we expect the RPC call to set the ExpiresAt 6274 if token.ExpiresAt.IsZero() { 6275 return fmt.Errorf("one-time token must have an ExpiresAt time") 6276 } 6277 6278 // Update all the indexes 6279 token.CreateIndex = index 6280 token.ModifyIndex = index 6281 6282 // Create the token 6283 if err := txn.Insert("one_time_token", token); err != nil { 6284 return fmt.Errorf("upserting one-time token failed: %v", err) 6285 } 6286 6287 // Update the indexes table 6288 if err := txn.Insert("index", &IndexEntry{"one_time_token", index}); err != nil { 6289 return fmt.Errorf("index update failed: %v", err) 6290 } 6291 return txn.Commit() 6292 } 6293 6294 // DeleteOneTimeTokens deletes the tokens with the given ACLToken Accessor IDs 6295 func (s *StateStore) DeleteOneTimeTokens(msgType structs.MessageType, index uint64, ids []string) error { 6296 txn := s.db.WriteTxnMsgT(msgType, index) 6297 defer txn.Abort() 6298 6299 var deleted int 6300 for _, id := range ids { 6301 d, err := txn.DeleteAll("one_time_token", "id", id) 6302 if err != nil { 6303 return fmt.Errorf("deleting one-time token failed: %v", err) 6304 } 6305 deleted += d 6306 } 6307 6308 if deleted > 0 { 6309 if err := txn.Insert("index", &IndexEntry{"one_time_token", index}); err != nil { 6310 return fmt.Errorf("index update failed: %v", err) 6311 } 6312 } 6313 return txn.Commit() 6314 } 6315 6316 // ExpireOneTimeTokens deletes tokens that have expired 6317 func (s *StateStore) ExpireOneTimeTokens(msgType structs.MessageType, index uint64, timestamp time.Time) error { 6318 txn := s.db.WriteTxnMsgT(msgType, index) 6319 defer txn.Abort() 6320 6321 iter, err := s.oneTimeTokensExpiredTxn(txn, nil, timestamp) 6322 if err != nil { 6323 return err 6324 } 6325 6326 var deleted int 6327 for { 6328 raw := iter.Next() 6329 if raw == nil { 6330 break 6331 } 6332 ott, ok := raw.(*structs.OneTimeToken) 6333 if !ok || ott == nil { 6334 return fmt.Errorf("could not decode one-time token") 6335 } 6336 d, err := txn.DeleteAll("one_time_token", "secret", ott.OneTimeSecretID) 6337 if err != nil { 6338 return fmt.Errorf("deleting one-time token failed: %v", err) 6339 } 6340 deleted += d 6341 } 6342 6343 if deleted > 0 { 6344 if err := txn.Insert("index", &IndexEntry{"one_time_token", index}); err != nil { 6345 return fmt.Errorf("index update failed: %v", err) 6346 } 6347 } 6348 return txn.Commit() 6349 } 6350 6351 // oneTimeTokensExpiredTxn returns an iterator over all expired one-time tokens 6352 func (s *StateStore) oneTimeTokensExpiredTxn(txn *txn, ws memdb.WatchSet, timestamp time.Time) (memdb.ResultIterator, error) { 6353 iter, err := txn.Get("one_time_token", "id") 6354 if err != nil { 6355 return nil, fmt.Errorf("one-time token lookup failed: %v", err) 6356 } 6357 6358 ws.Add(iter.WatchCh()) 6359 iter = memdb.NewFilterIterator(iter, expiredOneTimeTokenFilter(timestamp)) 6360 return iter, nil 6361 } 6362 6363 // OneTimeTokenBySecret is used to lookup a token by secret 6364 func (s *StateStore) OneTimeTokenBySecret(ws memdb.WatchSet, secret string) (*structs.OneTimeToken, error) { 6365 if secret == "" { 6366 return nil, fmt.Errorf("one-time token lookup failed: missing secret") 6367 } 6368 6369 txn := s.db.ReadTxn() 6370 6371 watchCh, existing, err := txn.FirstWatch("one_time_token", "secret", secret) 6372 if err != nil { 6373 return nil, fmt.Errorf("one-time token lookup failed: %v", err) 6374 } 6375 ws.Add(watchCh) 6376 6377 if existing != nil { 6378 return existing.(*structs.OneTimeToken), nil 6379 } 6380 return nil, nil 6381 } 6382 6383 // expiredOneTimeTokenFilter returns a filter function that returns only 6384 // expired one-time tokens 6385 func expiredOneTimeTokenFilter(now time.Time) func(interface{}) bool { 6386 return func(raw interface{}) bool { 6387 ott, ok := raw.(*structs.OneTimeToken) 6388 if !ok { 6389 return true 6390 } 6391 6392 return ott.ExpiresAt.After(now) 6393 } 6394 } 6395 6396 // SchedulerConfig is used to get the current Scheduler configuration. 6397 func (s *StateStore) SchedulerConfig() (uint64, *structs.SchedulerConfiguration, error) { 6398 tx := s.db.ReadTxn() 6399 defer tx.Abort() 6400 return s.schedulerConfigTxn(tx) 6401 } 6402 6403 func (s *StateStore) schedulerConfigTxn(txn *txn) (uint64, *structs.SchedulerConfiguration, error) { 6404 6405 // Get the scheduler config 6406 c, err := txn.First("scheduler_config", "id") 6407 if err != nil { 6408 return 0, nil, fmt.Errorf("failed scheduler config lookup: %s", err) 6409 } 6410 6411 config, ok := c.(*structs.SchedulerConfiguration) 6412 if !ok { 6413 return 0, nil, nil 6414 } 6415 6416 return config.ModifyIndex, config, nil 6417 } 6418 6419 // SchedulerSetConfig is used to set the current Scheduler configuration. 6420 func (s *StateStore) SchedulerSetConfig(index uint64, config *structs.SchedulerConfiguration) error { 6421 tx := s.db.WriteTxn(index) 6422 defer tx.Abort() 6423 6424 s.schedulerSetConfigTxn(index, tx, config) 6425 6426 return tx.Commit() 6427 } 6428 6429 func (s *StateStore) ClusterMetadata(ws memdb.WatchSet) (*structs.ClusterMetadata, error) { 6430 txn := s.db.ReadTxn() 6431 defer txn.Abort() 6432 6433 // Get the cluster metadata 6434 watchCh, m, err := txn.FirstWatch("cluster_meta", "id") 6435 if err != nil { 6436 return nil, fmt.Errorf("failed cluster metadata lookup: %w", err) 6437 } 6438 ws.Add(watchCh) 6439 6440 if m != nil { 6441 return m.(*structs.ClusterMetadata), nil 6442 } 6443 6444 return nil, nil 6445 } 6446 6447 func (s *StateStore) ClusterSetMetadata(index uint64, meta *structs.ClusterMetadata) error { 6448 txn := s.db.WriteTxn(index) 6449 defer txn.Abort() 6450 6451 if err := s.setClusterMetadata(txn, meta); err != nil { 6452 return fmt.Errorf("set cluster metadata failed: %w", err) 6453 } 6454 6455 return txn.Commit() 6456 } 6457 6458 // WithWriteTransaction executes the passed function within a write transaction, 6459 // and returns its result. If the invocation returns no error, the transaction 6460 // is committed; otherwise, it's aborted. 6461 func (s *StateStore) WithWriteTransaction(msgType structs.MessageType, index uint64, fn func(Txn) error) error { 6462 tx := s.db.WriteTxnMsgT(msgType, index) 6463 defer tx.Abort() 6464 6465 err := fn(tx) 6466 if err == nil { 6467 return tx.Commit() 6468 } 6469 return err 6470 } 6471 6472 // SchedulerCASConfig is used to update the scheduler configuration with a 6473 // given Raft index. If the CAS index specified is not equal to the last observed index 6474 // for the config, then the call is a noop. 6475 func (s *StateStore) SchedulerCASConfig(index, cidx uint64, config *structs.SchedulerConfiguration) (bool, error) { 6476 tx := s.db.WriteTxn(index) 6477 defer tx.Abort() 6478 6479 // Check for an existing config 6480 existing, err := tx.First("scheduler_config", "id") 6481 if err != nil { 6482 return false, fmt.Errorf("failed scheduler config lookup: %s", err) 6483 } 6484 6485 // If the existing index does not match the provided CAS 6486 // index arg, then we shouldn't update anything and can safely 6487 // return early here. 6488 e, ok := existing.(*structs.SchedulerConfiguration) 6489 if !ok || (e != nil && e.ModifyIndex != cidx) { 6490 return false, nil 6491 } 6492 6493 s.schedulerSetConfigTxn(index, tx, config) 6494 6495 if err := tx.Commit(); err != nil { 6496 return false, err 6497 } 6498 return true, nil 6499 } 6500 6501 func (s *StateStore) schedulerSetConfigTxn(idx uint64, tx *txn, config *structs.SchedulerConfiguration) error { 6502 // Check for an existing config 6503 existing, err := tx.First("scheduler_config", "id") 6504 if err != nil { 6505 return fmt.Errorf("failed scheduler config lookup: %s", err) 6506 } 6507 6508 // Set the indexes. 6509 if existing != nil { 6510 config.CreateIndex = existing.(*structs.SchedulerConfiguration).CreateIndex 6511 } else { 6512 config.CreateIndex = idx 6513 } 6514 config.ModifyIndex = idx 6515 6516 if err := tx.Insert("scheduler_config", config); err != nil { 6517 return fmt.Errorf("failed updating scheduler config: %s", err) 6518 } 6519 return nil 6520 } 6521 6522 func (s *StateStore) setClusterMetadata(txn *txn, meta *structs.ClusterMetadata) error { 6523 // Check for an existing config, if it exists, verify that the cluster ID matches 6524 existing, err := txn.First("cluster_meta", "id") 6525 if err != nil { 6526 return fmt.Errorf("failed cluster meta lookup: %v", err) 6527 } 6528 6529 if existing != nil { 6530 existingClusterID := existing.(*structs.ClusterMetadata).ClusterID 6531 if meta.ClusterID != existingClusterID && existingClusterID != "" { 6532 // there is a bug in cluster ID detection 6533 return fmt.Errorf("refusing to set new cluster id, previous: %s, new: %s", existingClusterID, meta.ClusterID) 6534 } 6535 } 6536 6537 // update is technically a noop, unless someday we add more / mutable fields 6538 if err := txn.Insert("cluster_meta", meta); err != nil { 6539 return fmt.Errorf("set cluster metadata failed: %v", err) 6540 } 6541 6542 return nil 6543 } 6544 6545 // UpsertScalingPolicies is used to insert a new scaling policy. 6546 func (s *StateStore) UpsertScalingPolicies(index uint64, scalingPolicies []*structs.ScalingPolicy) error { 6547 txn := s.db.WriteTxn(index) 6548 defer txn.Abort() 6549 6550 if err := s.UpsertScalingPoliciesTxn(index, scalingPolicies, txn); err != nil { 6551 return err 6552 } 6553 6554 return txn.Commit() 6555 } 6556 6557 // UpsertScalingPoliciesTxn is used to insert a new scaling policy. 6558 func (s *StateStore) UpsertScalingPoliciesTxn(index uint64, scalingPolicies []*structs.ScalingPolicy, 6559 txn *txn) error { 6560 6561 hadUpdates := false 6562 6563 for _, policy := range scalingPolicies { 6564 // Check if the scaling policy already exists 6565 // Policy uniqueness is based on target and type 6566 it, err := txn.Get("scaling_policy", "target", 6567 policy.Target[structs.ScalingTargetNamespace], 6568 policy.Target[structs.ScalingTargetJob], 6569 policy.Target[structs.ScalingTargetGroup], 6570 policy.Target[structs.ScalingTargetTask], 6571 ) 6572 if err != nil { 6573 return fmt.Errorf("scaling policy lookup failed: %v", err) 6574 } 6575 6576 // Check if type matches 6577 var existing *structs.ScalingPolicy 6578 for raw := it.Next(); raw != nil; raw = it.Next() { 6579 p := raw.(*structs.ScalingPolicy) 6580 if p.Type == policy.Type { 6581 existing = p 6582 break 6583 } 6584 } 6585 6586 // Setup the indexes correctly 6587 if existing != nil { 6588 if !existing.Diff(policy) { 6589 continue 6590 } 6591 policy.ID = existing.ID 6592 policy.CreateIndex = existing.CreateIndex 6593 } else { 6594 // policy.ID must have been set already in Job.Register before log apply 6595 policy.CreateIndex = index 6596 } 6597 policy.ModifyIndex = index 6598 6599 // Insert the scaling policy 6600 hadUpdates = true 6601 if err := txn.Insert("scaling_policy", policy); err != nil { 6602 return err 6603 } 6604 } 6605 6606 // Update the indexes table for scaling policy if we updated any policies 6607 if hadUpdates { 6608 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 6609 return fmt.Errorf("index update failed: %v", err) 6610 } 6611 } 6612 6613 return nil 6614 } 6615 6616 // NamespaceByName is used to lookup a namespace by name 6617 func (s *StateStore) NamespaceByName(ws memdb.WatchSet, name string) (*structs.Namespace, error) { 6618 txn := s.db.ReadTxn() 6619 return s.namespaceByNameImpl(ws, txn, name) 6620 } 6621 6622 // namespaceByNameImpl is used to lookup a namespace by name 6623 func (s *StateStore) namespaceByNameImpl(ws memdb.WatchSet, txn *txn, name string) (*structs.Namespace, error) { 6624 watchCh, existing, err := txn.FirstWatch(TableNamespaces, "id", name) 6625 if err != nil { 6626 return nil, fmt.Errorf("namespace lookup failed: %v", err) 6627 } 6628 ws.Add(watchCh) 6629 6630 if existing != nil { 6631 return existing.(*structs.Namespace), nil 6632 } 6633 return nil, nil 6634 } 6635 6636 // namespaceExists returns whether a namespace exists 6637 func (s *StateStore) namespaceExists(txn *txn, namespace string) (bool, error) { 6638 if namespace == structs.DefaultNamespace { 6639 return true, nil 6640 } 6641 6642 existing, err := txn.First(TableNamespaces, "id", namespace) 6643 if err != nil { 6644 return false, fmt.Errorf("namespace lookup failed: %v", err) 6645 } 6646 6647 return existing != nil, nil 6648 } 6649 6650 // NamespacesByNamePrefix is used to lookup namespaces by prefix 6651 func (s *StateStore) NamespacesByNamePrefix(ws memdb.WatchSet, namePrefix string) (memdb.ResultIterator, error) { 6652 txn := s.db.ReadTxn() 6653 6654 iter, err := txn.Get(TableNamespaces, "id_prefix", namePrefix) 6655 if err != nil { 6656 return nil, fmt.Errorf("namespaces lookup failed: %v", err) 6657 } 6658 ws.Add(iter.WatchCh()) 6659 6660 return iter, nil 6661 } 6662 6663 // Namespaces returns an iterator over all the namespaces 6664 func (s *StateStore) Namespaces(ws memdb.WatchSet) (memdb.ResultIterator, error) { 6665 txn := s.db.ReadTxn() 6666 6667 // Walk the entire namespace table 6668 iter, err := txn.Get(TableNamespaces, "id") 6669 if err != nil { 6670 return nil, err 6671 } 6672 ws.Add(iter.WatchCh()) 6673 return iter, nil 6674 } 6675 6676 func (s *StateStore) NamespaceNames() ([]string, error) { 6677 it, err := s.Namespaces(nil) 6678 if err != nil { 6679 return nil, err 6680 } 6681 6682 nses := []string{} 6683 for { 6684 next := it.Next() 6685 if next == nil { 6686 break 6687 } 6688 ns := next.(*structs.Namespace) 6689 nses = append(nses, ns.Name) 6690 } 6691 6692 return nses, nil 6693 } 6694 6695 // UpsertNamespaces is used to register or update a set of namespaces. 6696 func (s *StateStore) UpsertNamespaces(index uint64, namespaces []*structs.Namespace) error { 6697 txn := s.db.WriteTxn(index) 6698 defer txn.Abort() 6699 6700 for _, ns := range namespaces { 6701 // Handle upgrade path. 6702 ns.Canonicalize() 6703 if err := s.upsertNamespaceImpl(index, txn, ns); err != nil { 6704 return err 6705 } 6706 } 6707 6708 if err := txn.Insert("index", &IndexEntry{TableNamespaces, index}); err != nil { 6709 return fmt.Errorf("index update failed: %v", err) 6710 } 6711 6712 return txn.Commit() 6713 } 6714 6715 // upsertNamespaceImpl is used to upsert a namespace 6716 func (s *StateStore) upsertNamespaceImpl(index uint64, txn *txn, namespace *structs.Namespace) error { 6717 // Ensure the namespace hash is non-nil. This should be done outside the state store 6718 // for performance reasons, but we check here for defense in depth. 6719 ns := namespace 6720 if len(ns.Hash) == 0 { 6721 ns.SetHash() 6722 } 6723 6724 // Check if the namespace already exists 6725 existing, err := txn.First(TableNamespaces, "id", ns.Name) 6726 if err != nil { 6727 return fmt.Errorf("namespace lookup failed: %v", err) 6728 } 6729 6730 // Setup the indexes correctly and determine which quotas need to be 6731 // reconciled 6732 var oldQuota string 6733 if existing != nil { 6734 exist := existing.(*structs.Namespace) 6735 ns.CreateIndex = exist.CreateIndex 6736 ns.ModifyIndex = index 6737 6738 // Grab the old quota on the namespace 6739 oldQuota = exist.Quota 6740 } else { 6741 ns.CreateIndex = index 6742 ns.ModifyIndex = index 6743 } 6744 6745 // Validate that the quota on the new namespace exists 6746 if ns.Quota != "" { 6747 exists, err := s.quotaSpecExists(txn, ns.Quota) 6748 if err != nil { 6749 return fmt.Errorf("looking up namespace quota %q failed: %v", ns.Quota, err) 6750 } else if !exists { 6751 return fmt.Errorf("namespace %q using non-existent quota %q", ns.Name, ns.Quota) 6752 } 6753 } 6754 6755 // Insert the namespace 6756 if err := txn.Insert(TableNamespaces, ns); err != nil { 6757 return fmt.Errorf("namespace insert failed: %v", err) 6758 } 6759 6760 // Reconcile changed quotas 6761 return s.quotaReconcile(index, txn, ns.Quota, oldQuota) 6762 } 6763 6764 // DeleteNamespaces is used to remove a set of namespaces 6765 func (s *StateStore) DeleteNamespaces(index uint64, names []string) error { 6766 txn := s.db.WriteTxn(index) 6767 defer txn.Abort() 6768 6769 for _, name := range names { 6770 // Lookup the namespace 6771 existing, err := txn.First(TableNamespaces, "id", name) 6772 if err != nil { 6773 return fmt.Errorf("namespace lookup failed: %v", err) 6774 } 6775 if existing == nil { 6776 return fmt.Errorf("namespace not found") 6777 } 6778 6779 ns := existing.(*structs.Namespace) 6780 if ns.Name == structs.DefaultNamespace { 6781 return fmt.Errorf("default namespace can not be deleted") 6782 } 6783 6784 // Ensure that the namespace doesn't have any non-terminal jobs 6785 iter, err := s.jobsByNamespaceImpl(nil, name, txn) 6786 if err != nil { 6787 return err 6788 } 6789 6790 for { 6791 raw := iter.Next() 6792 if raw == nil { 6793 break 6794 } 6795 job := raw.(*structs.Job) 6796 6797 if job.Status != structs.JobStatusDead { 6798 return fmt.Errorf("namespace %q contains at least one non-terminal job %q. "+ 6799 "All jobs must be terminal in namespace before it can be deleted", name, job.ID) 6800 } 6801 } 6802 6803 vIter, err := s.csiVolumesByNamespaceImpl(txn, nil, name, "") 6804 if err != nil { 6805 return err 6806 } 6807 rawVol := vIter.Next() 6808 if rawVol != nil { 6809 vol := rawVol.(*structs.CSIVolume) 6810 return fmt.Errorf("namespace %q contains at least one CSI volume %q. "+ 6811 "All CSI volumes in namespace must be deleted before it can be deleted", name, vol.ID) 6812 } 6813 6814 varIter, err := s.getVariablesByNamespaceImpl(txn, nil, name) 6815 if err != nil { 6816 return err 6817 } 6818 if varIter.Next() != nil { 6819 // unlike job/volume, don't show the path here because the user may 6820 // not have List permissions on the vars in this namespace 6821 return fmt.Errorf("namespace %q contains at least one variable. "+ 6822 "All variables in namespace must be deleted before it can be deleted", name) 6823 } 6824 6825 // Delete the namespace 6826 if err := txn.Delete(TableNamespaces, existing); err != nil { 6827 return fmt.Errorf("namespace deletion failed: %v", err) 6828 } 6829 } 6830 6831 if err := txn.Insert("index", &IndexEntry{TableNamespaces, index}); err != nil { 6832 return fmt.Errorf("index update failed: %v", err) 6833 } 6834 6835 return txn.Commit() 6836 } 6837 6838 func (s *StateStore) DeleteScalingPolicies(index uint64, ids []string) error { 6839 txn := s.db.WriteTxn(index) 6840 defer txn.Abort() 6841 6842 err := s.DeleteScalingPoliciesTxn(index, ids, txn) 6843 if err == nil { 6844 return txn.Commit() 6845 } 6846 6847 return err 6848 } 6849 6850 // DeleteScalingPoliciesTxn is used to delete a set of scaling policies by ID. 6851 func (s *StateStore) DeleteScalingPoliciesTxn(index uint64, ids []string, txn *txn) error { 6852 if len(ids) == 0 { 6853 return nil 6854 } 6855 6856 for _, id := range ids { 6857 // Lookup the scaling policy 6858 existing, err := txn.First("scaling_policy", "id", id) 6859 if err != nil { 6860 return fmt.Errorf("scaling policy lookup failed: %v", err) 6861 } 6862 if existing == nil { 6863 return fmt.Errorf("scaling policy not found") 6864 } 6865 6866 // Delete the scaling policy 6867 if err := txn.Delete("scaling_policy", existing); err != nil { 6868 return fmt.Errorf("scaling policy delete failed: %v", err) 6869 } 6870 } 6871 6872 if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil { 6873 return fmt.Errorf("index update failed: %v", err) 6874 } 6875 6876 return nil 6877 } 6878 6879 // ScalingPolicies returns an iterator over all the scaling policies 6880 func (s *StateStore) ScalingPolicies(ws memdb.WatchSet) (memdb.ResultIterator, error) { 6881 txn := s.db.ReadTxn() 6882 6883 // Walk the entire scaling_policy table 6884 iter, err := txn.Get("scaling_policy", "id") 6885 if err != nil { 6886 return nil, err 6887 } 6888 6889 ws.Add(iter.WatchCh()) 6890 6891 return iter, nil 6892 } 6893 6894 // ScalingPoliciesByTypePrefix returns an iterator over scaling policies with a certain type prefix. 6895 func (s *StateStore) ScalingPoliciesByTypePrefix(ws memdb.WatchSet, t string) (memdb.ResultIterator, error) { 6896 txn := s.db.ReadTxn() 6897 6898 iter, err := txn.Get("scaling_policy", "type_prefix", t) 6899 if err != nil { 6900 return nil, err 6901 } 6902 6903 ws.Add(iter.WatchCh()) 6904 return iter, nil 6905 } 6906 6907 func (s *StateStore) ScalingPoliciesByNamespace(ws memdb.WatchSet, namespace, typ string) (memdb.ResultIterator, error) { 6908 txn := s.db.ReadTxn() 6909 6910 iter, err := txn.Get("scaling_policy", "target_prefix", namespace) 6911 if err != nil { 6912 return nil, err 6913 } 6914 6915 ws.Add(iter.WatchCh()) 6916 6917 // Wrap the iterator in a filter to exact match the namespace 6918 iter = memdb.NewFilterIterator(iter, scalingPolicyNamespaceFilter(namespace)) 6919 6920 // If policy type is specified as well, wrap again 6921 if typ != "" { 6922 iter = memdb.NewFilterIterator(iter, func(raw interface{}) bool { 6923 p, ok := raw.(*structs.ScalingPolicy) 6924 if !ok { 6925 return true 6926 } 6927 return !strings.HasPrefix(p.Type, typ) 6928 }) 6929 } 6930 6931 return iter, nil 6932 } 6933 6934 func (s *StateStore) ScalingPoliciesByJob(ws memdb.WatchSet, namespace, jobID, policyType string) (memdb.ResultIterator, 6935 error) { 6936 txn := s.db.ReadTxn() 6937 iter, err := s.ScalingPoliciesByJobTxn(ws, namespace, jobID, txn) 6938 if err != nil { 6939 return nil, err 6940 } 6941 6942 if policyType == "" { 6943 return iter, nil 6944 } 6945 6946 filter := func(raw interface{}) bool { 6947 p, ok := raw.(*structs.ScalingPolicy) 6948 if !ok { 6949 return true 6950 } 6951 return policyType != p.Type 6952 } 6953 6954 return memdb.NewFilterIterator(iter, filter), nil 6955 } 6956 6957 func (s *StateStore) ScalingPoliciesByJobTxn(ws memdb.WatchSet, namespace, jobID string, 6958 txn *txn) (memdb.ResultIterator, error) { 6959 6960 iter, err := txn.Get("scaling_policy", "target_prefix", namespace, jobID) 6961 if err != nil { 6962 return nil, err 6963 } 6964 6965 ws.Add(iter.WatchCh()) 6966 6967 filter := func(raw interface{}) bool { 6968 d, ok := raw.(*structs.ScalingPolicy) 6969 if !ok { 6970 return true 6971 } 6972 6973 return d.Target[structs.ScalingTargetJob] != jobID 6974 } 6975 6976 // Wrap the iterator in a filter 6977 wrap := memdb.NewFilterIterator(iter, filter) 6978 return wrap, nil 6979 } 6980 6981 func (s *StateStore) ScalingPolicyByID(ws memdb.WatchSet, id string) (*structs.ScalingPolicy, error) { 6982 txn := s.db.ReadTxn() 6983 6984 watchCh, existing, err := txn.FirstWatch("scaling_policy", "id", id) 6985 if err != nil { 6986 return nil, fmt.Errorf("scaling_policy lookup failed: %v", err) 6987 } 6988 ws.Add(watchCh) 6989 6990 if existing != nil { 6991 return existing.(*structs.ScalingPolicy), nil 6992 } 6993 6994 return nil, nil 6995 } 6996 6997 // ScalingPolicyByTargetAndType returns a fully-qualified policy against a target and policy type, 6998 // or nil if it does not exist. This method does not honor the watchset on the policy type, just the target. 6999 func (s *StateStore) ScalingPolicyByTargetAndType(ws memdb.WatchSet, target map[string]string, typ string) (*structs.ScalingPolicy, 7000 error) { 7001 txn := s.db.ReadTxn() 7002 7003 namespace := target[structs.ScalingTargetNamespace] 7004 job := target[structs.ScalingTargetJob] 7005 group := target[structs.ScalingTargetGroup] 7006 task := target[structs.ScalingTargetTask] 7007 7008 it, err := txn.Get("scaling_policy", "target", namespace, job, group, task) 7009 if err != nil { 7010 return nil, fmt.Errorf("scaling_policy lookup failed: %v", err) 7011 } 7012 7013 ws.Add(it.WatchCh()) 7014 7015 // Check for type 7016 var existing *structs.ScalingPolicy 7017 for raw := it.Next(); raw != nil; raw = it.Next() { 7018 p := raw.(*structs.ScalingPolicy) 7019 if p.Type == typ { 7020 existing = p 7021 break 7022 } 7023 } 7024 7025 if existing != nil { 7026 return existing, nil 7027 } 7028 7029 return nil, nil 7030 } 7031 7032 func (s *StateStore) ScalingPoliciesByIDPrefix(ws memdb.WatchSet, namespace string, prefix string) (memdb.ResultIterator, error) { 7033 txn := s.db.ReadTxn() 7034 7035 iter, err := txn.Get("scaling_policy", "id_prefix", prefix) 7036 if err != nil { 7037 return nil, fmt.Errorf("scaling policy lookup failed: %v", err) 7038 } 7039 7040 ws.Add(iter.WatchCh()) 7041 7042 iter = memdb.NewFilterIterator(iter, scalingPolicyNamespaceFilter(namespace)) 7043 7044 return iter, nil 7045 } 7046 7047 // scalingPolicyNamespaceFilter returns a filter function that filters all 7048 // scaling policies not targeting the given namespace. 7049 func scalingPolicyNamespaceFilter(namespace string) func(interface{}) bool { 7050 return func(raw interface{}) bool { 7051 p, ok := raw.(*structs.ScalingPolicy) 7052 if !ok { 7053 return true 7054 } 7055 7056 return p.Target[structs.ScalingTargetNamespace] != namespace 7057 } 7058 } 7059 7060 // StateSnapshot is used to provide a point-in-time snapshot 7061 type StateSnapshot struct { 7062 StateStore 7063 } 7064 7065 // DenormalizeAllocationsMap takes in a map of nodes to allocations, and queries the 7066 // Allocation for each of the Allocation diffs and merges the updated attributes with 7067 // the existing Allocation, and attaches the Job provided 7068 func (s *StateSnapshot) DenormalizeAllocationsMap(nodeAllocations map[string][]*structs.Allocation) error { 7069 for nodeID, allocs := range nodeAllocations { 7070 denormalizedAllocs, err := s.DenormalizeAllocationSlice(allocs) 7071 if err != nil { 7072 return err 7073 } 7074 7075 nodeAllocations[nodeID] = denormalizedAllocs 7076 } 7077 return nil 7078 } 7079 7080 // DenormalizeAllocationSlice queries the Allocation for each allocation diff 7081 // represented as an Allocation and merges the updated attributes with the existing 7082 // Allocation, and attaches the Job provided. 7083 // 7084 // This should only be called on terminal allocs, particularly stopped or preempted allocs 7085 func (s *StateSnapshot) DenormalizeAllocationSlice(allocs []*structs.Allocation) ([]*structs.Allocation, error) { 7086 allocDiffs := make([]*structs.AllocationDiff, len(allocs)) 7087 for i, alloc := range allocs { 7088 allocDiffs[i] = alloc.AllocationDiff() 7089 } 7090 7091 return s.DenormalizeAllocationDiffSlice(allocDiffs) 7092 } 7093 7094 // DenormalizeAllocationDiffSlice queries the Allocation for each AllocationDiff and merges 7095 // the updated attributes with the existing Allocation, and attaches the Job provided. 7096 // 7097 // This should only be called on terminal alloc, particularly stopped or preempted allocs 7098 func (s *StateSnapshot) DenormalizeAllocationDiffSlice(allocDiffs []*structs.AllocationDiff) ([]*structs.Allocation, error) { 7099 // Output index for denormalized Allocations 7100 j := 0 7101 7102 denormalizedAllocs := make([]*structs.Allocation, len(allocDiffs)) 7103 for _, allocDiff := range allocDiffs { 7104 alloc, err := s.AllocByID(nil, allocDiff.ID) 7105 if err != nil { 7106 return nil, fmt.Errorf("alloc lookup failed: %v", err) 7107 } 7108 if alloc == nil { 7109 return nil, fmt.Errorf("alloc %v doesn't exist", allocDiff.ID) 7110 } 7111 7112 // Merge the updates to the Allocation. Don't update alloc.Job for terminal allocs 7113 // so alloc refers to the latest Job view before destruction and to ease handler implementations 7114 allocCopy := alloc.Copy() 7115 7116 if allocDiff.PreemptedByAllocation != "" { 7117 allocCopy.PreemptedByAllocation = allocDiff.PreemptedByAllocation 7118 allocCopy.DesiredDescription = getPreemptedAllocDesiredDescription(allocDiff.PreemptedByAllocation) 7119 allocCopy.DesiredStatus = structs.AllocDesiredStatusEvict 7120 } else { 7121 // If alloc is a stopped alloc 7122 allocCopy.DesiredDescription = allocDiff.DesiredDescription 7123 allocCopy.DesiredStatus = structs.AllocDesiredStatusStop 7124 if allocDiff.ClientStatus != "" { 7125 allocCopy.ClientStatus = allocDiff.ClientStatus 7126 } 7127 if allocDiff.FollowupEvalID != "" { 7128 allocCopy.FollowupEvalID = allocDiff.FollowupEvalID 7129 } 7130 } 7131 if allocDiff.ModifyTime != 0 { 7132 allocCopy.ModifyTime = allocDiff.ModifyTime 7133 } 7134 7135 // Update the allocDiff in the slice to equal the denormalized alloc 7136 denormalizedAllocs[j] = allocCopy 7137 j++ 7138 } 7139 // Retain only the denormalized Allocations in the slice 7140 denormalizedAllocs = denormalizedAllocs[:j] 7141 return denormalizedAllocs, nil 7142 } 7143 7144 func getPreemptedAllocDesiredDescription(preemptedByAllocID string) string { 7145 return fmt.Sprintf("Preempted by alloc ID %v", preemptedByAllocID) 7146 } 7147 7148 // UpsertRootKeyMeta saves root key meta or updates it in-place. 7149 func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKeyMeta, rekey bool) error { 7150 txn := s.db.WriteTxn(index) 7151 defer txn.Abort() 7152 7153 // get any existing key for updating 7154 raw, err := txn.First(TableRootKeyMeta, indexID, rootKeyMeta.KeyID) 7155 if err != nil { 7156 return fmt.Errorf("root key metadata lookup failed: %v", err) 7157 } 7158 7159 isRotation := false 7160 7161 if raw != nil { 7162 existing := raw.(*structs.RootKeyMeta) 7163 rootKeyMeta.CreateIndex = existing.CreateIndex 7164 rootKeyMeta.CreateTime = existing.CreateTime 7165 isRotation = !existing.Active() && rootKeyMeta.Active() 7166 } else { 7167 rootKeyMeta.CreateIndex = index 7168 isRotation = rootKeyMeta.Active() 7169 } 7170 rootKeyMeta.ModifyIndex = index 7171 7172 if rekey && !isRotation { 7173 return fmt.Errorf("cannot rekey without setting the new key active") 7174 } 7175 7176 // if the upsert is for a newly-active key, we need to set all the 7177 // other keys as inactive in the same transaction. 7178 if isRotation { 7179 iter, err := txn.Get(TableRootKeyMeta, indexID) 7180 if err != nil { 7181 return err 7182 } 7183 for { 7184 raw := iter.Next() 7185 if raw == nil { 7186 break 7187 } 7188 key := raw.(*structs.RootKeyMeta) 7189 modified := false 7190 7191 switch key.State { 7192 case structs.RootKeyStateInactive: 7193 if rekey { 7194 key.SetRekeying() 7195 modified = true 7196 } 7197 case structs.RootKeyStateActive: 7198 if rekey { 7199 key.SetRekeying() 7200 } else { 7201 key.SetInactive() 7202 } 7203 modified = true 7204 case structs.RootKeyStateRekeying, structs.RootKeyStateDeprecated: 7205 // nothing to do 7206 } 7207 7208 if modified { 7209 key.ModifyIndex = index 7210 if err := txn.Insert(TableRootKeyMeta, key); err != nil { 7211 return err 7212 } 7213 } 7214 7215 } 7216 } 7217 7218 if err := txn.Insert(TableRootKeyMeta, rootKeyMeta); err != nil { 7219 return err 7220 } 7221 7222 // update the indexes table 7223 if err := txn.Insert("index", &IndexEntry{TableRootKeyMeta, index}); err != nil { 7224 return fmt.Errorf("index update failed: %v", err) 7225 } 7226 return txn.Commit() 7227 } 7228 7229 // DeleteRootKeyMeta deletes a single root key, or returns an error if 7230 // it doesn't exist. 7231 func (s *StateStore) DeleteRootKeyMeta(index uint64, keyID string) error { 7232 txn := s.db.WriteTxn(index) 7233 defer txn.Abort() 7234 7235 // find the old key 7236 existing, err := txn.First(TableRootKeyMeta, indexID, keyID) 7237 if err != nil { 7238 return fmt.Errorf("root key metadata lookup failed: %v", err) 7239 } 7240 if existing == nil { 7241 return fmt.Errorf("root key metadata not found") 7242 } 7243 if err := txn.Delete(TableRootKeyMeta, existing); err != nil { 7244 return fmt.Errorf("root key metadata delete failed: %v", err) 7245 } 7246 7247 // update the indexes table 7248 if err := txn.Insert("index", &IndexEntry{TableRootKeyMeta, index}); err != nil { 7249 return fmt.Errorf("index update failed: %v", err) 7250 } 7251 7252 return txn.Commit() 7253 } 7254 7255 // RootKeyMetas returns an iterator over all root key metadata 7256 func (s *StateStore) RootKeyMetas(ws memdb.WatchSet) (memdb.ResultIterator, error) { 7257 txn := s.db.ReadTxn() 7258 7259 iter, err := txn.Get(TableRootKeyMeta, indexID) 7260 if err != nil { 7261 return nil, err 7262 } 7263 7264 ws.Add(iter.WatchCh()) 7265 return iter, nil 7266 } 7267 7268 // RootKeyMetaByID returns a specific root key meta 7269 func (s *StateStore) RootKeyMetaByID(ws memdb.WatchSet, id string) (*structs.RootKeyMeta, error) { 7270 txn := s.db.ReadTxn() 7271 7272 watchCh, raw, err := txn.FirstWatch(TableRootKeyMeta, indexID, id) 7273 if err != nil { 7274 return nil, fmt.Errorf("root key metadata lookup failed: %v", err) 7275 } 7276 ws.Add(watchCh) 7277 7278 if raw != nil { 7279 return raw.(*structs.RootKeyMeta), nil 7280 } 7281 return nil, nil 7282 } 7283 7284 // GetActiveRootKeyMeta returns the metadata for the currently active root key 7285 func (s *StateStore) GetActiveRootKeyMeta(ws memdb.WatchSet) (*structs.RootKeyMeta, error) { 7286 txn := s.db.ReadTxn() 7287 7288 iter, err := txn.Get(TableRootKeyMeta, indexID) 7289 if err != nil { 7290 return nil, err 7291 } 7292 ws.Add(iter.WatchCh()) 7293 7294 for { 7295 raw := iter.Next() 7296 if raw == nil { 7297 break 7298 } 7299 key := raw.(*structs.RootKeyMeta) 7300 if key.Active() { 7301 return key, nil 7302 } 7303 } 7304 return nil, nil 7305 } 7306 7307 // IsRootKeyMetaInUse determines whether a key has been used to sign a workload 7308 // identity for a live allocation or encrypt any variables 7309 func (s *StateStore) IsRootKeyMetaInUse(keyID string) (bool, error) { 7310 txn := s.db.ReadTxn() 7311 7312 iter, err := txn.Get(TableAllocs, indexSigningKey, keyID, true) 7313 if err != nil { 7314 return false, err 7315 } 7316 alloc := iter.Next() 7317 if alloc != nil { 7318 return true, nil 7319 } 7320 7321 iter, err = txn.Get(TableVariables, indexKeyID, keyID) 7322 if err != nil { 7323 return false, err 7324 } 7325 variable := iter.Next() 7326 if variable != nil { 7327 return true, nil 7328 } 7329 7330 return false, nil 7331 }